unity_model_loader.cpp 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. // Copyright (c) Meta Platforms, Inc. and affiliates.
  2. // All rights reserved.
  3. //
  4. // This source code is licensed under the license found in the
  5. // LICENSE file in the root directory of this source tree.
  6. #include "ggml/ggml.h"
  7. #include "ggml/ggml-alloc.h"
  8. #include "common.h"
  9. #include "common-ggml.h"
  10. #include "unity_model_loader.h"
  11. void unity_model_loader::load_hparams(fairseq2_model& model, std::ifstream &fin)
  12. {
  13. auto& hparams = (unity_hparams&)model.hparams;
  14. fin.read((char*) &hparams.model_dim, sizeof(hparams.model_dim));
  15. fin.read((char*) &hparams.w2v2_encoder_config__model_dim, sizeof(hparams.w2v2_encoder_config__model_dim));
  16. fin.read((char*) &hparams.w2v2_encoder_config__max_seq_len, sizeof(hparams.w2v2_encoder_config__max_seq_len));
  17. fin.read((char*) &hparams.w2v2_encoder_config__feature_dim, sizeof(hparams.w2v2_encoder_config__feature_dim));
  18. fin.read((char*) &hparams.w2v2_encoder_config__use_fbank, sizeof(hparams.w2v2_encoder_config__use_fbank));
  19. fin.read((char*) &hparams.w2v2_encoder_config__first_pass_dropout_p, sizeof(hparams.w2v2_encoder_config__first_pass_dropout_p));
  20. fin.read((char*) &hparams.w2v2_encoder_config__layer_norm_features, sizeof(hparams.w2v2_encoder_config__layer_norm_features));
  21. fin.read((char*) &hparams.w2v2_encoder_config__feature_extractor_bias, sizeof(hparams.w2v2_encoder_config__feature_extractor_bias));
  22. fin.read((char*) &hparams.w2v2_encoder_config__feature_extractor_layer_norm_convs, sizeof(hparams.w2v2_encoder_config__feature_extractor_layer_norm_convs));
  23. fin.read((char*) &hparams.w2v2_encoder_config__feature_grad_scale, sizeof(hparams.w2v2_encoder_config__feature_grad_scale));
  24. fin.read((char*) &hparams.w2v2_encoder_config__num_fbank_channels, sizeof(hparams.w2v2_encoder_config__num_fbank_channels));
  25. fin.read((char*) &hparams.w2v2_encoder_config__fbank_stride, sizeof(hparams.w2v2_encoder_config__fbank_stride));
  26. fin.read((char*) &hparams.w2v2_encoder_config__sample_fbank_every_k, sizeof(hparams.w2v2_encoder_config__sample_fbank_every_k));
  27. fin.read((char*) &hparams.w2v2_encoder_config__pos_encoder_depth, sizeof(hparams.w2v2_encoder_config__pos_encoder_depth));
  28. fin.read((char*) &hparams.w2v2_encoder_config__pos_conv_kernel_size, sizeof(hparams.w2v2_encoder_config__pos_conv_kernel_size));
  29. fin.read((char*) &hparams.w2v2_encoder_config__num_pos_conv_groups, sizeof(hparams.w2v2_encoder_config__num_pos_conv_groups));
  30. fin.read((char*) &hparams.w2v2_encoder_config__use_conformer, sizeof(hparams.w2v2_encoder_config__use_conformer));
  31. fin.read((char*) &hparams.w2v2_encoder_config__num_encoder_layers, sizeof(hparams.w2v2_encoder_config__num_encoder_layers));
  32. fin.read((char*) &hparams.w2v2_encoder_config__num_encoder_attn_heads, sizeof(hparams.w2v2_encoder_config__num_encoder_attn_heads));
  33. fin.read((char*) &hparams.w2v2_encoder_config__ffn_inner_dim, sizeof(hparams.w2v2_encoder_config__ffn_inner_dim));
  34. fin.read((char*) &hparams.w2v2_encoder_config__dropout_p, sizeof(hparams.w2v2_encoder_config__dropout_p));
  35. fin.read((char*) &hparams.w2v2_encoder_config__attn_dropout_p, sizeof(hparams.w2v2_encoder_config__attn_dropout_p));
  36. fin.read((char*) &hparams.w2v2_encoder_config__layer_drop_p, sizeof(hparams.w2v2_encoder_config__layer_drop_p));
  37. fin.read((char*) &hparams.w2v2_encoder_config__norm_order, sizeof(hparams.w2v2_encoder_config__norm_order));
  38. fin.read((char*) &hparams.w2v2_encoder_config__depthwise_conv_kernel_size, sizeof(hparams.w2v2_encoder_config__depthwise_conv_kernel_size));
  39. fin.read((char*) &hparams.nllb_config__model_dim, sizeof(hparams.nllb_config__model_dim));
  40. fin.read((char*) &hparams.nllb_config__max_seq_len, sizeof(hparams.nllb_config__max_seq_len));
  41. fin.read((char*) &hparams.nllb_config__vocabulary_size, sizeof(hparams.nllb_config__vocabulary_size));
  42. fin.read((char*) &hparams.nllb_config__pad_idx, sizeof(hparams.nllb_config__pad_idx));
  43. fin.read((char*) &hparams.nllb_config__num_encoder_layers, sizeof(hparams.nllb_config__num_encoder_layers));
  44. fin.read((char*) &hparams.nllb_config__num_decoder_layers, sizeof(hparams.nllb_config__num_decoder_layers));
  45. fin.read((char*) &hparams.nllb_config__num_encoder_attn_heads, sizeof(hparams.nllb_config__num_encoder_attn_heads));
  46. fin.read((char*) &hparams.nllb_config__num_decoder_attn_heads, sizeof(hparams.nllb_config__num_decoder_attn_heads));
  47. fin.read((char*) &hparams.nllb_config__ffn_inner_dim, sizeof(hparams.nllb_config__ffn_inner_dim));
  48. fin.read((char*) &hparams.nllb_config__dropout_p, sizeof(hparams.nllb_config__dropout_p));
  49. fin.read((char*) &hparams.t2u_config__model_dim, sizeof(hparams.t2u_config__model_dim));
  50. fin.read((char*) &hparams.t2u_config__unit_max_seq_len, sizeof(hparams.t2u_config__unit_max_seq_len));
  51. fin.read((char*) &hparams.t2u_config__unit_vocabulary_size, sizeof(hparams.t2u_config__unit_vocabulary_size));
  52. fin.read((char*) &hparams.t2u_config__unit_pad_idx, sizeof(hparams.t2u_config__unit_pad_idx));
  53. fin.read((char*) &hparams.t2u_config__num_encoder_layers, sizeof(hparams.t2u_config__num_encoder_layers));
  54. fin.read((char*) &hparams.t2u_config__num_decoder_layers, sizeof(hparams.t2u_config__num_decoder_layers));
  55. fin.read((char*) &hparams.t2u_config__num_encoder_attn_heads, sizeof(hparams.t2u_config__num_encoder_attn_heads));
  56. fin.read((char*) &hparams.t2u_config__num_decoder_attn_heads, sizeof(hparams.t2u_config__num_decoder_attn_heads));
  57. fin.read((char*) &hparams.t2u_config__ffn_inner_dim, sizeof(hparams.t2u_config__ffn_inner_dim));
  58. fin.read((char*) &hparams.t2u_config__dropout_p, sizeof(hparams.t2u_config__dropout_p));
  59. fin.read((char*) &hparams.use_text_encoder, sizeof(hparams.use_text_encoder));
  60. fin.read((char*) &hparams.use_conformer_adaptor, sizeof(hparams.use_conformer_adaptor));
  61. fin.read((char*) &hparams.num_adaptor_layers, sizeof(hparams.num_adaptor_layers));
  62. fin.read((char*) &hparams.adaptor_kernel_size, sizeof(hparams.adaptor_kernel_size));
  63. fin.read((char*) &hparams.adaptor_stride, sizeof(hparams.adaptor_stride));
  64. fin.read((char*) &hparams.adaptor_layer_norm, sizeof(hparams.adaptor_layer_norm));
  65. fin.read((char*) &hparams.adaptor_dropout_p, sizeof(hparams.adaptor_dropout_p));
  66. };
  67. std::size_t
  68. unity_model_loader::compute_context_size(void* raw_hparams)
  69. {
  70. // TODO
  71. auto hparams = (unity_hparams&)raw_hparams;
  72. return hparams.model_dim * 1024 * 100;
  73. };
  74. struct UnityArch {
  75. struct TransformerDecoder text_decoder;
  76. };
  77. void unity_model_loader::tensors_alloc(fairseq2_model &model)
  78. {
  79. auto hparams = (unity_hparams&)model.hparams;
  80. auto& arch = (UnityArch&)model.arch;
  81. const auto ctx = model.ctx;
  82. auto tensors = model.tensors;
  83. const auto vocab_size = hparams.nllb_config__vocabulary_size;
  84. const auto model_dim = hparams.nllb_config__model_dim;
  85. // This can be simplified by adding syntax sugar
  86. // frontend
  87. // arch.frontend_embed_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, vocab_size, dim);
  88. // tensor_map["text_decoder_frontend.embed.weight"] = arch.frontend_embed_w;
  89. // layers
  90. {
  91. const auto n_layers = hparams.nllb_config__num_decoder_layers;
  92. arch.text_decoder.layers = std::vector<TransformerDecoderLayer>(n_layers);
  93. auto layers = arch.text_decoder.layers;
  94. auto num_heads = hparams.nllb_config__num_decoder_attn_heads;
  95. for (int i = 0; i < n_layers; ++i) {
  96. auto prefix = "text_decoder.layers." + std::to_string(i);
  97. MultiheadAttention_init(layers[i].self_attn, model, prefix + "self_attn", model_dim, num_heads);
  98. LayerNorm_init(layers[i].self_attn_norm, model, prefix + "self_attn_norm", model_dim);
  99. }
  100. }
  101. // // layer_norm
  102. // arch.layer_norm_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim);
  103. // tensor_map["text_decoder.layer_norm.weight"] = arch.layer_norm_w;
  104. // arch.layer_norm_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim);
  105. // tensor_map["text_decoder.layer_norm.bias"] = arch.layer_norm_b;
  106. };
  107. extern "C" void load_unity_ggml_file(fairseq2_model& model, const char* fname) {
  108. return load_fairseq2_ggml_file<unity_model_loader>(model, fname);
  109. }