fairseq2.h 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. #pragma once
  2. #include <map>
  3. #include <string>
  4. #include <vector>
  5. #include "ggml.h"
  6. #include "kaldi-native-fbank/csrc/feature-fbank.h"
  7. struct fairseq2_model {
  8. // Context containing all tensors memory
  9. ggml_context* tensors_ctx;
  10. // Named tensors, all tensors should belong to tensors_ctx
  11. std::map<std::string, struct ggml_tensor *> tensors;
  12. std::map<std::string, std::int64_t> layer_config;
  13. void* hparams;
  14. // an inference context, not managed by this object
  15. // TODO: is this the best place to store this or should we also pass this to all forward methods ?
  16. ggml_context* ctx;
  17. };
  18. double fairseq2_model_layer_config_double(const fairseq2_model& model, std::string name);
  19. /// allocate the fairseq2 model and hyperparameters
  20. extern "C" fairseq2_model* fairseq2_model_alloc();
  21. // free the models and all its owned tensors
  22. extern "C" void fairseq2_model_free(fairseq2_model* model);
  23. extern "C" void fairseq2_model_set_inference_ctx(fairseq2_model* model, ggml_context* ctx);
  24. extern "C" std::string* std_string_alloc(char* c_str);
  25. extern "C" void std_string_free(std::string* str);
  26. extern "C" ggml_tensor* WaveformToFbank_forward(
  27. fairseq2_model& model,
  28. const std::string &prefix,
  29. ggml_tensor* waveform
  30. );
  31. extern "C" ggml_tensor* ggml_slice(
  32. struct ggml_context* ctx,
  33. struct ggml_tensor* a,
  34. int axis,
  35. int64_t start,
  36. int64_t end
  37. );
  38. /// Merge the given dimension and the previous one in the tensor.
  39. /// (..., num_heads, N, ...) -> (..., num_heads * N, ...)
  40. /// dim is the position of the resulting merged dimension
  41. /// ggml_flatten_1d(x, d) <==> torch.flatten(x, -1-d-1, -1-d0
  42. extern "C" ggml_tensor* ggml_flatten_1d(ggml_context* ctx, ggml_tensor* x, int dim);
  43. /// Split the given dimension.
  44. /// (..., K * N, ...) -> (..., K, N, ...)
  45. /// dim is the position of the output dimension with the given number of element (N).
  46. extern "C" ggml_tensor* ggml_unflatten_1d(ggml_context* ctx, ggml_tensor* x, int dim, int num_el);
  47. extern "C" ggml_tensor* Linear_forward(
  48. fairseq2_model& model,
  49. const std::string &prefix,
  50. ggml_tensor* input
  51. );
  52. extern "C" ggml_tensor* LayerNorm_forward(
  53. fairseq2_model& model,
  54. const std::string &prefix,
  55. ggml_tensor* input
  56. );
  57. extern "C" ggml_tensor* StandardFeedForwardNetwork_forward(
  58. fairseq2_model& model,
  59. const std::string& prefix,
  60. ggml_tensor* seqs
  61. );
  62. extern "C" ggml_tensor* SiluFeedForwardNetwork_forward(
  63. fairseq2_model& model,
  64. const std::string& prefix,
  65. ggml_tensor* seqs
  66. );
  67. extern "C" ggml_tensor* MultiheadAttention_forward(
  68. fairseq2_model& model,
  69. const std::string &prefix,
  70. ggml_tensor* queries, // (slen, d_in)
  71. ggml_tensor* keys, // (klen, d_in)
  72. ggml_tensor* values, // (klen, d_out)
  73. ggml_tensor* _ // (klen, slen) TODO: do we need to pass mask here ?
  74. );
  75. extern "C" ggml_tensor* PositionalEmbedding_forward(
  76. fairseq2_model& model,
  77. const std::string& prefix,
  78. ggml_tensor* embeds
  79. );
  80. extern "C" ggml_tensor* TransformerEmbeddingFrontend_forward(
  81. fairseq2_model& model,
  82. const std::string& prefix,
  83. ggml_tensor* seqs
  84. );
  85. extern "C" ggml_tensor* StandardTransformerEncoderLayer_forward(
  86. fairseq2_model& model,
  87. const std::string& prefix,
  88. ggml_tensor* seqs,
  89. ggml_tensor* padding_mask
  90. );
  91. extern "C" ggml_tensor* RelativePositionMHA_forward(
  92. fairseq2_model& model,
  93. const std::string& prefix,
  94. ggml_tensor* seqs
  95. );
  96. extern "C" ggml_tensor* ConvModule_forward(
  97. fairseq2_model& model,
  98. const std::string& prefix,
  99. ggml_tensor* seqs
  100. );
  101. extern "C" ggml_tensor* StandardConformerEncoderLayer_forward(
  102. fairseq2_model& model,
  103. const std::string& prefix,
  104. ggml_tensor* seqs,
  105. ggml_tensor* padding_mask
  106. );
  107. extern "C" ggml_tensor* StandardConformerEncoder_forward(
  108. fairseq2_model& model,
  109. const std::string& prefix,
  110. ggml_tensor* seqs,
  111. ggml_tensor* padding_mask
  112. );
  113. extern "C" ggml_tensor* StandardConformerEncoderAdaptorLayer_forward(
  114. fairseq2_model& model,
  115. const std::string& prefix,
  116. ggml_tensor* seqs,
  117. ggml_tensor* padding_mask
  118. );
  119. extern "C" ggml_tensor* StandardConformerEncoderAdaptor_forward(
  120. fairseq2_model& model,
  121. const std::string& prefix,
  122. ggml_tensor* seqs,
  123. ggml_tensor* padding_mask
  124. );
  125. // Specifies the Layer Normalization order.
  126. enum TransformerNormOrder {
  127. TRANSFORMER_NORM_ORDER_POST = 0,
  128. TRANSFORMER_NORM_ORDER_PRE = 1,
  129. TRANSFORMER_NORM_ORDER_PRE_WITH_NORMFORMER = 2
  130. };
  131. /// Holds the options to pass to a sequence generator.
  132. struct SequenceGeneratorOptions {
  133. /// The beam size.
  134. int beam_size = 5;
  135. /// The minimum length of generated sequences (including prefix sequence).
  136. int min_seq_len = 1;
  137. /// The terms ``a`` and ``b`` of ``ax + b`` where ``x`` is the source
  138. /// sequence length. The generated sequences (including prefix sequence) will
  139. /// have the maximum length of ``min(hard_max_seq_len, ax + b)``. See also
  140. /// ``hard_max_seq_len``.
  141. float soft_max_seq_len_a = 1;
  142. int soft_max_seq_len_b = 200;
  143. /// The hard limit on maximum length of generated sequences.
  144. int hard_max_seq_len = 1024;
  145. /// The length penalty, where values less than 1.0 favor shorter, values
  146. /// greater than 1.0 favor longer sequences.
  147. float len_penalty = 1.0;
  148. /// The unknown symbol penalty, where values less than 0 produce more UNKs,
  149. /// values greater than 0 produce fewer UNKs.
  150. float unk_penalty = 0.0;
  151. /// If ``True``, normalizes scores by the length of generated sequences.
  152. bool normalize_scores = true;
  153. };
  154. struct SequenceGeneratorJob {
  155. SequenceGeneratorOptions opts;
  156. ggml_tensor* prefix_seq;
  157. std::int32_t pad_idx;
  158. std::int32_t unk_idx;
  159. std::int32_t bos_idx;
  160. std::int32_t eos_idx;
  161. };
  162. /// Represents a hypothesis produced by a sequence generator.
  163. struct Hypothesis {
  164. /// The generated sequence.
  165. ggml_tensor* seq;
  166. /// The score of the hypothesis.
  167. float score;
  168. /// The score of each individual sequence step.
  169. ggml_tensor* step_scores;
  170. };
  171. extern "C" Hypothesis* generate_sequence(
  172. fairseq2_model& model,
  173. const SequenceGeneratorJob& opts,
  174. ggml_tensor* encoder_output,
  175. ggml_tensor* encoder_padding_mask,
  176. ggml_context* result_ctx
  177. );