fairseq2.cpp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. #include "ggml.h"
  2. #include "fairseq2.h"
  3. /// allocate the fairseq2 model and hyperparameters
  4. extern "C" fairseq2_model* fairseq2_model_alloc() {
  5. // pre-allocate some memory to write hyperparameters and tensors pointers
  6. auto* model = new fairseq2_model;
  7. model->hparams = new std::uint8_t[8 * 1024];
  8. model->arch = new std::uint64_t[16 * 1024]; // max tensors allowed
  9. return model;
  10. };
  11. extern "C" void fairseq2_model_free(fairseq2_model* model) {
  12. delete (std::uint64_t*)(model->arch);
  13. delete (std::uint8_t*)model->hparams;
  14. delete model;
  15. };
  16. extern "C" std::string* std_string_alloc(char* c_str) {
  17. return new std::string(c_str);
  18. }
  19. extern "C" void std_string_free(std::string* str) {
  20. delete str;
  21. }
  22. // Linear
  23. std::size_t Linear_size(int32_t input_dim, int32_t output_dim)
  24. {
  25. return (input_dim * output_dim * ggml_type_size(GGML_TYPE_F32)) // weight
  26. + (output_dim * ggml_type_size(GGML_TYPE_F32)); // bias
  27. };
  28. void Linear_init(
  29. Linear& self,
  30. fairseq2_model& model,
  31. const std::string &prefix,
  32. int input_dim,
  33. int output_dim,
  34. bool bias
  35. ) {
  36. self.weight = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, output_dim, input_dim);
  37. model.tensors[prefix + ".weight"] = self.weight;
  38. if (bias) {
  39. self.bias = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, output_dim);
  40. model.tensors[prefix + ".inner_proj.bias"] = self.bias;
  41. }
  42. }
  43. extern "C" ggml_tensor*
  44. Linear_forward(
  45. fairseq2_model& model,
  46. const std::string &prefix,
  47. ggml_tensor* input // (d_in)
  48. ) {
  49. // Note: for now we assumed un-batched input
  50. ggml_tensor* weight = model.tensors[prefix + ".weight"]; // (d_in, d_out)
  51. ggml_tensor* bias = model.tensors[prefix + ".bias"]; // (d_out)
  52. return ggml_add(
  53. model.ctx,
  54. ggml_mul_mat(model.ctx, weight, input), // (d_out)
  55. bias
  56. );
  57. }
  58. // LayerNorm
  59. std::size_t LayerNorm_size(int32_t dim)
  60. {
  61. return 2 * dim * ggml_type_size(GGML_TYPE_F32); // weight and bias
  62. };
  63. void LayerNorm_init(
  64. LayerNorm& self,
  65. fairseq2_model& model,
  66. const std::string &prefix,
  67. int dim
  68. ) {
  69. self.weight = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, dim);
  70. model.tensors[prefix + ".weight"] = self.weight;
  71. self.bias = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, dim);
  72. model.tensors[prefix + ".bias"] = self.bias;
  73. }
  74. extern "C" ggml_tensor* LayerNorm_forward(
  75. fairseq2_model& model,
  76. const std::string &prefix,
  77. ggml_tensor* input) {
  78. ggml_tensor* weight = model.tensors[prefix + ".weight"];
  79. ggml_tensor* bias = model.tensors[prefix + ".bias"];
  80. auto ctx = model.ctx;
  81. // TODO: should `eps` be part of unity hparams ?
  82. input = ggml_norm(ctx, input, /*eps*/1e-5);
  83. return ggml_add(
  84. ctx,
  85. ggml_mul(ctx, ggml_repeat(ctx, weight, input), input),
  86. ggml_repeat(ctx, bias, input)
  87. );
  88. }
  89. std::size_t StandardFeedForwardNetwork_size(int32_t dim, int32_t inner_dim)
  90. {
  91. return LayerNorm_size(dim) + Linear_size(dim, inner_dim) + Linear_size(inner_dim, dim);
  92. };
  93. void StandardFeedForwardNetwork_init(
  94. StandardFeedForwardNetwork& self,
  95. fairseq2_model& model,
  96. const std::string &prefix,
  97. int model_dim,
  98. int inner_dim
  99. ) {
  100. Linear_init(self.inner_proj, model, prefix + ".inner_proj", model_dim, inner_dim, true);
  101. LayerNorm_init(self.inner_layer_norm, model, prefix + ".inner_layer_norm", inner_dim);
  102. Linear_init(self.output_proj, model, prefix + ".output_proj", inner_dim, model_dim, true);
  103. }
  104. extern "C" ggml_tensor* StandardFeedForwardNetwork_forward(
  105. fairseq2_model& model,
  106. const std::string& prefix,
  107. ggml_tensor* seqs
  108. ) {
  109. seqs = Linear_forward(model, prefix + ".inner_proj", seqs);
  110. // inner_activation = ReLu // TODO: allow other activation
  111. seqs = ggml_relu(model.ctx, seqs);
  112. if (model.tensors.find(prefix + ".inner_layer_norm.weight") != model.tensors.end()) {
  113. seqs = LayerNorm_forward(model, prefix + ".inner_layer_norm", seqs);
  114. }
  115. // TODO: inference dropout
  116. // if self.inner_dropout is not None:
  117. // seqs = self.inner_dropout(seqs)
  118. seqs = Linear_forward(model, prefix + ".output_proj", seqs);
  119. return seqs;
  120. }
  121. void MultiheadAttention_init(
  122. MultiheadAttention& self,
  123. fairseq2_model& model,
  124. const std::string &prefix,
  125. int model_dim,
  126. int num_heads
  127. ) {
  128. int bias = true;
  129. int num_key_value_heads = num_heads;
  130. int head_dim = model_dim / num_heads;
  131. Linear_init(self.q_proj, model, prefix + ".q_proj", model_dim, model_dim, bias);
  132. Linear_init(self.k_proj, model, prefix + ".k_proj", model_dim, head_dim * num_key_value_heads, bias);
  133. Linear_init(self.v_proj, model, prefix + ".v_proj", model_dim, model_dim, bias);
  134. // (H, 1, K_h)
  135. self.bias_k = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F32, num_heads, 1, head_dim * num_key_value_heads/ num_heads);
  136. // (H, 1, V_h)
  137. self.bias_v = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F32, num_heads, 1, model_dim / num_heads);
  138. }
  139. // void TransformerDecoderLayer_init(TransformerDecoderLayer& self);