fairseq2.h 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. #pragma once
  2. #include <map>
  3. #include <string>
  4. #include <vector>
  5. #include "ggml.h"
  6. struct fairseq2_model {
  7. // Context containing all tensors memory
  8. ggml_context* tensors_ctx;
  9. // Named tensors, all tensors should belong to tensors_ctx
  10. std::map<std::string, struct ggml_tensor *> tensors;
  11. void* arch;
  12. void* hparams;
  13. // an inference context, not managed by this object
  14. // TODO: is this the best place to store this or should we also pass this to all forward methods ?
  15. ggml_context* ctx;
  16. };
  17. /// allocate the fairseq2 model and hyperparameters
  18. extern "C" fairseq2_model* fairseq2_model_alloc();
  19. // free the models and all its owned tensors
  20. extern "C" void fairseq2_model_free(fairseq2_model* model);
  21. extern "C" void fairseq2_model_set_inference_ctx(fairseq2_model* model, ggml_context* ctx);
  22. extern "C" std::string* std_string_alloc(char* c_str);
  23. extern "C" void std_string_free(std::string* str);
  24. struct Linear {
  25. struct ggml_tensor* weight; // out_dim * in_dim
  26. struct ggml_tensor* bias; // out_dim
  27. };
  28. std::size_t Linear_size(int32_t input_dim, int32_t output_dim);
  29. void Linear_init(Linear& self,fairseq2_model& model, const std::string &prefix, int input_dim, int output_dim, bool bias);
  30. // LayerNorm
  31. struct LayerNorm {
  32. struct ggml_tensor* weight; // model_dim
  33. struct ggml_tensor* bias; // model_dim
  34. };
  35. std::size_t LayerNorm_size(int32_t dim);
  36. void LayerNorm_init(LayerNorm& self, fairseq2_model& model, const std::string &prefix, int dim);
  37. // ConformerConvolution
  38. // struct ConformerConvolution {
  39. // // pointwise_conv1: Conv1d
  40. // // pointwise_conv1_activation: GLU
  41. // // depthwise_conv: Conv1d
  42. // // batch_norm: BatchNorm1d
  43. // // depthwise_activation: Module
  44. // // pointwise_conv2: Conv1d
  45. // };
  46. // std::size_t ConformerConvolution_size(int32_t dim);
  47. // void ConformerConvolution_init(ConformerConvolution* self, fairseq2_model& model, const std::string &prefix, int dim);
  48. struct MultiheadAttention {
  49. // num_key_value_heads: int
  50. struct Linear q_proj;
  51. struct Linear k_proj;
  52. struct Linear v_proj;
  53. // pos_encoder: Optional[PositionEncoder]
  54. struct ggml_tensor* bias_k;
  55. struct ggml_tensor* bias_v;
  56. // add_zero_attn: bool
  57. // head_scale_weight: Optional[Parameter]
  58. struct Linear output_proj;
  59. };
  60. void MultiheadAttention_init(MultiheadAttention& self, fairseq2_model& model, const std::string &prefix, int model_dim, int num_heads);
  61. struct StandardFeedForwardNetwork {
  62. struct Linear inner_proj; // ffn_inner_dim x model_dim
  63. // inner_activation -> Relu for unity
  64. // struct Dropout inner_dropout;
  65. struct LayerNorm inner_layer_norm; // ffn_inner_dim
  66. struct Linear output_proj; // model_dim x ffn_inner_dim
  67. };
  68. std::size_t StandardFeedForwardNetwork_size(int32_t dim, int32_t inner_dim);
  69. void StandardFeedForwardNetwork_init(
  70. StandardFeedForwardNetwork& self,
  71. fairseq2_model& model,
  72. const std::string &prefix,
  73. int model_dim,
  74. int inner_dim
  75. );
  76. extern "C" ggml_tensor* StandardFeedForwardNetwork_forward(
  77. fairseq2_model& model,
  78. const std::string& prefix,
  79. ggml_tensor* input
  80. );
  81. // Transformer
  82. enum TransformerNormOrder {
  83. TRANSFORMER_NORM_ORDER_POST = 0,
  84. TRANSFORMER_NORM_ORDER_PRE = 1,
  85. TRANSFORMER_NORM_ORDER_PRE_WITH_NORMFORMER = 2
  86. };
  87. struct TransformerDecoderLayer {
  88. struct MultiheadAttention self_attn;
  89. struct LayerNorm self_attn_norm;
  90. // self_attn_dropout: Optional[Dropout]
  91. struct LayerNorm self_attn_layer_norm;
  92. struct MultiheadAttention encoder_decoder_attn;
  93. // encoder_decoder_dropout: Optional[Dropout]
  94. struct LayerNorm encoder_decoder_attn_layer_norm;
  95. struct StandardFeedForwardNetwork ffn;
  96. // ffn_dropout: Optional[Dropout]
  97. // residual_scale: Optional[Parameter]
  98. struct LayerNorm ffn_layer_norm;
  99. // norm_order: TransformerNormOrder
  100. };
  101. void TransformerDecoderLayer_init();
  102. struct TransformerDecoder {
  103. std::vector<TransformerDecoderLayer> layers;
  104. struct LayerNorm layer_norm;
  105. };
  106. // std::size_t TransformerDecoder_size(int32_t input_dim, int32_t output_dim);
  107. // void TransformerDecoder_init(TransformerEncoder* self, fairseq2_model& model, const std::string &prefix, TransformerNormOrder norm_order);
  108. // std::size_t TransformerEncoder_size(int32_t input_dim, int32_t output_dim);
  109. // void TransformerEncoder_init(TransformerEncoder* self, fairseq2_model& model, const std::string &prefix, TransformerNormOrder norm_order);
  110. //