fairseq2.h 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. #pragma once
  2. #include <map>
  3. #include <string>
  4. #include <vector>
  5. #include "ggml.h"
  6. struct fairseq2_model {
  7. // Context containing all tensors memory
  8. ggml_context* tensors_ctx;
  9. // Named tensors, all tensors should belong to tensors_ctx
  10. std::map<std::string, struct ggml_tensor *> tensors;
  11. void* arch;
  12. void* hparams;
  13. // an inference context, not managed by this object
  14. // TODO: is this the best place to store this or should we also pass this to all forward methods ?
  15. ggml_context* ctx;
  16. };
  17. /// allocate the fairseq2 model and hyperparameters
  18. extern "C" fairseq2_model* fairseq2_model_alloc();
  19. // free the models and all its owned tensors
  20. extern "C" void fairseq2_model_free(fairseq2_model* model);
  21. extern "C" void fairseq2_model_set_inference_ctx(fairseq2_model* model, ggml_context* ctx);
  22. extern "C" std::string* std_string_alloc(char* c_str);
  23. extern "C" void std_string_free(std::string* str);
  24. extern "C" ggml_tensor* ggml_slice(
  25. struct ggml_context* ctx,
  26. struct ggml_tensor* a,
  27. int axis,
  28. int64_t start,
  29. int64_t end
  30. );
  31. /// Merge the given dimension and the previous one in the tensor.
  32. /// (..., num_heads, N, ...) -> (..., num_heads * N, ...)
  33. /// dim is the position of the resulting merged dimension
  34. /// ggml_flatten_1d(x, d) <==> torch.flatten(x, -1-d-1, -1-d0
  35. extern "C" ggml_tensor* ggml_flatten_1d(ggml_context* ctx, ggml_tensor* x, int dim);
  36. /// Split the given dimension.
  37. /// (..., K * N, ...) -> (..., K, N, ...)
  38. /// dim is the position of the output dimension with the given number of element (N).
  39. extern "C" ggml_tensor* ggml_unflatten_1d(ggml_context* ctx, ggml_tensor* x, int dim, int num_el);
  40. extern "C" ggml_tensor* Linear_forward(
  41. fairseq2_model& model,
  42. const std::string &prefix,
  43. ggml_tensor* input
  44. );
  45. extern "C" ggml_tensor* LayerNorm_forward(
  46. fairseq2_model& model,
  47. const std::string &prefix,
  48. ggml_tensor* input
  49. );
  50. extern "C" ggml_tensor* StandardFeedForwardNetwork_forward(
  51. fairseq2_model& model,
  52. const std::string& prefix,
  53. ggml_tensor* seqs
  54. );
  55. extern "C" ggml_tensor* MultiheadAttention_forward(
  56. fairseq2_model& model,
  57. const std::string &prefix,
  58. ggml_tensor* queries, // (slen, d_in)
  59. ggml_tensor* keys, // (klen, d_in)
  60. ggml_tensor* values, // (klen, d_out)
  61. ggml_tensor* _ // (klen, slen) TODO: do we need to pass mask here ?
  62. );
  63. extern "C" ggml_tensor* PositionalEmbedding_forward(
  64. fairseq2_model& model,
  65. const std::string& prefix,
  66. ggml_tensor* embeds
  67. );
  68. extern "C" ggml_tensor* TransformerEmbeddingFrontend_forward(
  69. fairseq2_model& model,
  70. const std::string& prefix,
  71. ggml_tensor* seqs
  72. );
  73. extern "C" ggml_tensor* StandardTransformerEncoderLayer_forward(
  74. fairseq2_model& model,
  75. const std::string& prefix,
  76. ggml_tensor* seqs,
  77. ggml_tensor* padding_mask
  78. );
  79. // Specifies the Layer Normalization order.
  80. enum TransformerNormOrder {
  81. TRANSFORMER_NORM_ORDER_POST = 0,
  82. TRANSFORMER_NORM_ORDER_PRE = 1,
  83. TRANSFORMER_NORM_ORDER_PRE_WITH_NORMFORMER = 2
  84. };
  85. /// Holds the options to pass to a sequence generator.
  86. struct SequenceGeneratorOptions {
  87. /// The beam size.
  88. int beam_size = 5;
  89. /// The minimum length of generated sequences (including prefix sequence).
  90. int min_seq_len = 1;
  91. /// The terms ``a`` and ``b`` of ``ax + b`` where ``x`` is the source
  92. /// sequence length. The generated sequences (including prefix sequence) will
  93. /// have the maximum length of ``min(hard_max_seq_len, ax + b)``. See also
  94. /// ``hard_max_seq_len``.
  95. float soft_max_seq_len_a = 1;
  96. int soft_max_seq_len_b = 200;
  97. /// The hard limit on maximum length of generated sequences.
  98. int hard_max_seq_len = 1024;
  99. /// The length penalty, where values less than 1.0 favor shorter, values
  100. /// greater than 1.0 favor longer sequences.
  101. float len_penalty = 1.0;
  102. /// The unknown symbol penalty, where values less than 0 produce more UNKs,
  103. /// values greater than 0 produce fewer UNKs.
  104. float unk_penalty = 0.0;
  105. /// If ``True``, normalizes scores by the length of generated sequences.
  106. bool normalize_scores = true;
  107. };
  108. struct SequenceGeneratorJob {
  109. SequenceGeneratorOptions opts;
  110. ggml_tensor* prefix_seq;
  111. std::int32_t pad_idx;
  112. std::int32_t unk_idx;
  113. std::int32_t bos_idx;
  114. std::int32_t eos_idx;
  115. };
  116. extern "C" float generate_sequence(
  117. fairseq2_model& model,
  118. const SequenceGeneratorJob& opts,
  119. ggml_tensor* encoder_output,
  120. ggml_tensor* encoder_padding_mask,
  121. ggml_tensor* output_seq
  122. );