fairseq2.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. #pragma once
  2. #include <unordered_map>
  3. #include <string>
  4. #include <vector>
  5. #include "ggml.h"
  6. #include "kaldi-native-fbank/csrc/feature-fbank.h"
  7. struct fairseq2_model {
  8. // Context containing all tensors memory
  9. ggml_context* tensors_ctx;
  10. // Named tensors, all tensors should belong to tensors_ctx
  11. std::unordered_map<std::string, struct ggml_tensor *> tensors;
  12. // Hashmap containing model hyper-parameters.
  13. std::unordered_map<std::string, std::int64_t> hparams;
  14. // Hashmap containing layers hyper-parameters.
  15. // Normally those can be inferred from hparams, but it avoids doing this logic in GGML
  16. std::unordered_map<std::string, std::int64_t> layer_config;
  17. // an inference context, not managed by this object
  18. // TODO: is this the best place to store this or should we also pass this to all forward methods ?
  19. ggml_context* ctx;
  20. };
  21. double fairseq2_model_layer_config_double(const fairseq2_model& model, std::string name);
  22. /// allocate the fairseq2 model and hyperparameters
  23. extern "C" fairseq2_model* fairseq2_model_alloc();
  24. // free the models and all its owned tensors
  25. extern "C" void fairseq2_model_free(fairseq2_model* model);
  26. extern "C" void fairseq2_model_set_inference_ctx(fairseq2_model* model, ggml_context* ctx);
  27. extern "C" std::string* std_string_alloc(char* c_str);
  28. extern "C" void std_string_free(std::string* str);
  29. extern "C" ggml_tensor* WaveformToFbank_forward(
  30. fairseq2_model& model,
  31. const std::string &prefix,
  32. ggml_tensor* waveform
  33. );
  34. extern "C" ggml_tensor* ggml_slice(
  35. struct ggml_context* ctx,
  36. struct ggml_tensor* a,
  37. int axis,
  38. int64_t start,
  39. int64_t end
  40. );
  41. /// Merge the given dimension and the previous one in the tensor.
  42. /// (..., num_heads, N, ...) -> (..., num_heads * N, ...)
  43. /// dim is the position of the resulting merged dimension
  44. /// ggml_flatten_1d(x, d) <==> torch.flatten(x, -1-d-1, -1-d0
  45. extern "C" ggml_tensor* ggml_flatten_1d(ggml_context* ctx, ggml_tensor* x, int dim);
  46. /// Split the given dimension.
  47. /// (..., K * N, ...) -> (..., K, N, ...)
  48. /// dim is the position of the output dimension with the given number of element (N).
  49. extern "C" ggml_tensor* ggml_unflatten_1d(ggml_context* ctx, ggml_tensor* x, int dim, int num_el);
  50. extern "C" ggml_tensor* Linear_forward(
  51. fairseq2_model& model,
  52. const std::string &prefix,
  53. ggml_tensor* input
  54. );
  55. extern "C" ggml_tensor* LayerNorm_forward(
  56. fairseq2_model& model,
  57. const std::string &prefix,
  58. ggml_tensor* input
  59. );
  60. extern "C" ggml_tensor* StandardFeedForwardNetwork_forward(
  61. fairseq2_model& model,
  62. const std::string& prefix,
  63. ggml_tensor* seqs
  64. );
  65. extern "C" ggml_tensor* SiluFeedForwardNetwork_forward(
  66. fairseq2_model& model,
  67. const std::string& prefix,
  68. ggml_tensor* seqs
  69. );
  70. extern "C" ggml_tensor* MultiheadAttention_forward(
  71. fairseq2_model& model,
  72. const std::string &prefix,
  73. ggml_tensor* queries, // (slen, d_in)
  74. ggml_tensor* keys, // (klen, d_in)
  75. ggml_tensor* values, // (klen, d_out)
  76. ggml_tensor* _ // (klen, slen) TODO: do we need to pass mask here ?
  77. );
  78. extern "C" ggml_tensor* PositionalEmbedding_forward(
  79. fairseq2_model& model,
  80. const std::string& prefix,
  81. ggml_tensor* embeds
  82. );
  83. extern "C" ggml_tensor* TransformerEmbeddingFrontend_forward(
  84. fairseq2_model& model,
  85. const std::string& prefix,
  86. ggml_tensor* seqs
  87. );
  88. extern "C" ggml_tensor* StandardTransformerEncoderLayer_forward(
  89. fairseq2_model& model,
  90. const std::string& prefix,
  91. ggml_tensor* seqs,
  92. ggml_tensor* padding_mask
  93. );
  94. extern "C" ggml_tensor* RelativePositionMHA_forward(
  95. fairseq2_model& model,
  96. const std::string& prefix,
  97. ggml_tensor* seqs
  98. );
  99. extern "C" ggml_tensor* ConvModule_forward(
  100. fairseq2_model& model,
  101. const std::string& prefix,
  102. ggml_tensor* seqs
  103. );
  104. extern "C" ggml_tensor* StandardConformerEncoderLayer_forward(
  105. fairseq2_model& model,
  106. const std::string& prefix,
  107. ggml_tensor* seqs,
  108. ggml_tensor* padding_mask
  109. );
  110. extern "C" ggml_tensor* StandardConformerEncoder_forward(
  111. fairseq2_model& model,
  112. const std::string& prefix,
  113. ggml_tensor* seqs,
  114. ggml_tensor* padding_mask
  115. );
  116. extern "C" ggml_tensor* StandardConformerEncoderAdaptorLayer_forward(
  117. fairseq2_model& model,
  118. const std::string& prefix,
  119. ggml_tensor* seqs,
  120. ggml_tensor* padding_mask
  121. );
  122. extern "C" ggml_tensor* StandardConformerEncoderAdaptor_forward(
  123. fairseq2_model& model,
  124. const std::string& prefix,
  125. ggml_tensor* seqs,
  126. ggml_tensor* padding_mask
  127. );
  128. // Specifies the Layer Normalization order.
  129. // see fairseq2/nn/transformer/norm_order.py
  130. enum TransformerNormOrder {
  131. TRANSFORMER_NORM_ORDER_POST = 0,
  132. TRANSFORMER_NORM_ORDER_PRE = 1,
  133. TRANSFORMER_NORM_ORDER_PRE_WITH_NORMFORMER = 2
  134. };
  135. /// Holds the options to pass to a sequence generator.
  136. struct SequenceGeneratorOptions {
  137. /// The beam size.
  138. int beam_size = 5;
  139. /// The minimum length of generated sequences (including prefix sequence).
  140. int min_seq_len = 1;
  141. /// The terms ``a`` and ``b`` of ``ax + b`` where ``x`` is the source
  142. /// sequence length. The generated sequences (including prefix sequence) will
  143. /// have the maximum length of ``min(hard_max_seq_len, ax + b)``. See also
  144. /// ``hard_max_seq_len``.
  145. float soft_max_seq_len_a = 1;
  146. int soft_max_seq_len_b = 200;
  147. /// The hard limit on maximum length of generated sequences.
  148. int hard_max_seq_len = 1024;
  149. /// The length penalty, where values less than 1.0 favor shorter, values
  150. /// greater than 1.0 favor longer sequences.
  151. float len_penalty = 1.0;
  152. /// The unknown symbol penalty, where values less than 0 produce more UNKs,
  153. /// values greater than 0 produce fewer UNKs.
  154. float unk_penalty = 0.0;
  155. /// If ``True``, normalizes scores by the length of generated sequences.
  156. bool normalize_scores = true;
  157. };
  158. struct SequenceGeneratorJob {
  159. SequenceGeneratorOptions opts;
  160. ggml_tensor* prefix_seq;
  161. std::int32_t pad_idx;
  162. std::int32_t unk_idx;
  163. std::int32_t bos_idx;
  164. std::int32_t eos_idx;
  165. };
  166. /// Represents a hypothesis produced by a sequence generator.
  167. struct Hypothesis {
  168. /// The generated sequence.
  169. ggml_tensor* seq;
  170. /// The score of the hypothesis.
  171. float score;
  172. /// The score of each individual sequence step.
  173. ggml_tensor* step_scores;
  174. };
  175. extern "C" Hypothesis* generate_sequence(
  176. fairseq2_model& model,
  177. const SequenceGeneratorJob& opts,
  178. ggml_tensor* encoder_output,
  179. ggml_tensor* encoder_padding_mask,
  180. ggml_context* result_ctx
  181. );