fairseq2.cpp 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. #include "ggml.h"
  2. #include "fairseq2.h"
  3. /// allocate the fairseq2 model and hyperparameters
  4. extern "C" fairseq2_model* fairseq2_model_alloc() {
  5. // pre-allocate some memory to write hyperparameters and tensors pointers
  6. auto* model = new fairseq2_model;
  7. model->hparams = new std::uint8_t[8 * 1024];
  8. model->arch = new std::uint64_t[16 * 1024]; // max tensors allowed
  9. return model;
  10. };
  11. extern "C" void fairseq2_model_free(fairseq2_model* model) {
  12. delete (std::uint64_t*)(model->arch);
  13. delete (std::uint8_t*)model->hparams;
  14. delete model;
  15. };
  16. extern "C" std::string* std_string_alloc(char* c_str) {
  17. return new std::string(c_str);
  18. }
  19. extern "C" void std_string_free(std::string* str) {
  20. delete str;
  21. }
  22. // Linear
  23. std::size_t Linear_size(int32_t input_dim, int32_t output_dim)
  24. {
  25. return (input_dim * output_dim * ggml_type_size(GGML_TYPE_F32)) // weight
  26. + (output_dim * ggml_type_size(GGML_TYPE_F32)); // bias
  27. };
  28. void Linear_init(
  29. Linear& self,
  30. fairseq2_model& model,
  31. const std::string &prefix,
  32. int input_dim,
  33. int output_dim,
  34. bool bias
  35. ) {
  36. self.weight = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, output_dim, input_dim);
  37. model.tensors[prefix + ".weight"] = self.weight;
  38. if (bias) {
  39. self.bias = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, output_dim);
  40. model.tensors[prefix + ".inner_proj.bias"] = self.bias;
  41. }
  42. }
  43. extern "C" ggml_tensor* Linear_forward(
  44. fairseq2_model& model,
  45. const std::string &prefix,
  46. ggml_tensor* input
  47. ) {
  48. ggml_tensor* weight = model.tensors[prefix + ".weight"];
  49. ggml_tensor* bias = model.tensors[prefix + ".bias"];
  50. return ggml_add(model.ctx, ggml_mul_mat(model.ctx, weight, input), bias);
  51. }
  52. // LayerNorm
  53. std::size_t LayerNorm_size(int32_t dim)
  54. {
  55. return 2 * dim * ggml_type_size(GGML_TYPE_F32); // weight and bias
  56. };
  57. void LayerNorm_init(
  58. LayerNorm& self,
  59. fairseq2_model& model,
  60. const std::string &prefix,
  61. int dim
  62. ) {
  63. self.weight = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, dim);
  64. model.tensors[prefix + ".weight"] = self.weight;
  65. self.bias = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, dim);
  66. model.tensors[prefix + ".bias"] = self.bias;
  67. }
  68. extern "C" ggml_tensor* LayerNorm_forward(
  69. fairseq2_model& model,
  70. const std::string &prefix,
  71. ggml_tensor* input) {
  72. ggml_tensor* weight = model.tensors[prefix + ".weight"];
  73. ggml_tensor* bias = model.tensors[prefix + ".bias"];
  74. auto ctx = model.ctx;
  75. // TODO: should `eps` be part of unity hparams ?
  76. input = ggml_norm(ctx, input, /*eps*/1e-5);
  77. return ggml_add(
  78. ctx,
  79. ggml_mul(ctx, ggml_repeat(ctx, weight, input), input),
  80. ggml_repeat(ctx, bias, input)
  81. );
  82. }
  83. std::size_t StandardFeedForwardNetwork_size(int32_t dim, int32_t inner_dim)
  84. {
  85. return LayerNorm_size(dim) + Linear_size(dim, inner_dim) + Linear_size(inner_dim, dim);
  86. };
  87. void StandardFeedForwardNetwork_init(
  88. StandardFeedForwardNetwork& self,
  89. fairseq2_model& model,
  90. const std::string &prefix,
  91. int model_dim,
  92. int inner_dim
  93. ) {
  94. Linear_init(self.inner_proj, model, prefix + ".inner_proj", model_dim, inner_dim, true);
  95. LayerNorm_init(self.inner_layer_norm, model, prefix + ".inner_layer_norm", inner_dim);
  96. Linear_init(self.output_proj, model, prefix + ".output_proj", inner_dim, model_dim, true);
  97. }
  98. extern "C" ggml_tensor* StandardFeedForwardNetwork_forward(
  99. fairseq2_model& model,
  100. const std::string& prefix,
  101. ggml_tensor* seqs
  102. ) {
  103. seqs = Linear_forward(model, prefix + ".inner_proj", seqs);
  104. // inner_activation = ReLu // TODO: allow other activation
  105. seqs = ggml_relu(model.ctx, seqs);
  106. if (model.tensors.find(prefix + ".inner_layer_norm.weight") != model.tensors.end()) {
  107. seqs = LayerNorm_forward(model, prefix + ".inner_layer_norm", seqs);
  108. }
  109. // TODO: inference dropout
  110. // if self.inner_dropout is not None:
  111. // seqs = self.inner_dropout(seqs)
  112. seqs = Linear_forward(model, prefix + ".output_proj", seqs);
  113. return seqs;
  114. }
  115. void MultiheadAttention_init(
  116. MultiheadAttention& self,
  117. fairseq2_model& model,
  118. const std::string &prefix,
  119. int model_dim,
  120. int num_heads
  121. ) {
  122. int bias = true;
  123. int num_key_value_heads = num_heads;
  124. int head_dim = model_dim / num_heads;
  125. Linear_init(self.q_proj, model, prefix + ".q_proj", model_dim, model_dim, bias);
  126. Linear_init(self.k_proj, model, prefix + ".k_proj", model_dim, head_dim * num_key_value_heads, bias);
  127. Linear_init(self.v_proj, model, prefix + ".v_proj", model_dim, model_dim, bias);
  128. // (H, 1, K_h)
  129. self.bias_k = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F32, num_heads, 1, head_dim * num_key_value_heads/ num_heads);
  130. // (H, 1, V_h)
  131. self.bias_v = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F32, num_heads, 1, model_dim / num_heads);
  132. }
  133. // void TransformerDecoderLayer_init(TransformerDecoderLayer& self);