Bläddra i källkod

remove size/init functions

Guillaume Wenzek 1 år sedan
förälder
incheckning
be236b8584
2 ändrade filer med 19 tillägg och 190 borttagningar
  1. 3 86
      ggml/examples/unity/fairseq2.cpp
  2. 16 104
      ggml/examples/unity/fairseq2.h

+ 3 - 86
ggml/examples/unity/fairseq2.cpp

@@ -31,33 +31,7 @@ extern "C" void std_string_free(std::string* str) {
 }
 
 
-
-// Linear
-
-std::size_t Linear_size(int32_t input_dim, int32_t output_dim)
-{
-    return (input_dim * output_dim * ggml_type_size(GGML_TYPE_F32)) // weight
-        + (output_dim * ggml_type_size(GGML_TYPE_F32)); // bias
-};
-
-void Linear_init(
-    Linear& self,
-    fairseq2_model& model,
-    const std::string &prefix,
-    int input_dim,
-    int output_dim,
-    bool bias
-) {
-    self.weight = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, output_dim, input_dim);
-    model.tensors[prefix + ".weight"] = self.weight;
-    if (bias) {
-        self.bias = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, output_dim);
-        model.tensors[prefix + ".inner_proj.bias"] = self.bias;
-    }
-}
-
-extern "C" ggml_tensor*
-Linear_forward(
+extern "C" ggml_tensor* Linear_forward(
     fairseq2_model& model,
     const std::string &prefix,
     ggml_tensor* input  // (d_in)
@@ -73,25 +47,6 @@ Linear_forward(
     );
 }
 
-// LayerNorm
-
-std::size_t LayerNorm_size(int32_t dim)
-{
-    return 2 * dim * ggml_type_size(GGML_TYPE_F32); // weight and bias
-};
-
-void LayerNorm_init(
-    LayerNorm& self,
-    fairseq2_model& model,
-    const std::string &prefix,
-    int dim
-) {
-    self.weight = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, dim);
-    model.tensors[prefix + ".weight"] = self.weight;
-    self.bias = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, dim);
-    model.tensors[prefix + ".bias"] = self.bias;
-}
-
 extern "C" ggml_tensor* LayerNorm_forward(
     fairseq2_model& model,
     const std::string &prefix,
@@ -110,23 +65,6 @@ extern "C" ggml_tensor* LayerNorm_forward(
 }
 
 
-std::size_t StandardFeedForwardNetwork_size(int32_t dim, int32_t inner_dim)
-{
-    return LayerNorm_size(dim) + Linear_size(dim, inner_dim) + Linear_size(inner_dim, dim);
-};
-
-void StandardFeedForwardNetwork_init(
-    StandardFeedForwardNetwork& self,
-    fairseq2_model& model,
-    const std::string &prefix,
-    int model_dim,
-    int inner_dim
-) {
-    Linear_init(self.inner_proj, model, prefix + ".inner_proj", model_dim, inner_dim, true);
-    LayerNorm_init(self.inner_layer_norm, model, prefix + ".inner_layer_norm", inner_dim);
-    Linear_init(self.output_proj, model, prefix + ".output_proj", inner_dim, model_dim, true);
-}
-
 extern "C" ggml_tensor* StandardFeedForwardNetwork_forward(
     fairseq2_model& model,
     const std::string& prefix,
@@ -147,26 +85,6 @@ extern "C" ggml_tensor* StandardFeedForwardNetwork_forward(
     return seqs;
 }
 
-void MultiheadAttention_init(
-    MultiheadAttention& self,
-    fairseq2_model& model,
-    const std::string &prefix,
-    int model_dim,
-    int num_heads
-) {
-    int bias = true;
-    int num_key_value_heads = num_heads;
-    int head_dim = model_dim / num_heads;
-
-    Linear_init(self.q_proj, model, prefix + ".q_proj", model_dim, model_dim, bias);
-    Linear_init(self.k_proj, model, prefix + ".k_proj", model_dim, head_dim * num_key_value_heads, bias);
-    Linear_init(self.v_proj, model, prefix + ".v_proj", model_dim, model_dim, bias);
-
-    // (H, 1, K_h)
-    self.bias_k = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F32, num_heads, 1, head_dim * num_key_value_heads/ num_heads);
-    // (H, 1, V_h)
-    self.bias_v = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F32, num_heads, 1, model_dim / num_heads);
-}
 
 ggml_tensor* reshape_num_head(ggml_context* ctx, ggml_tensor* x, int num_heads) {
     int slen = x->ne[1];
@@ -179,9 +97,8 @@ ggml_tensor* reshape_num_head(ggml_context* ctx, ggml_tensor* x, int num_heads)
 }
 
 
-
-extern "C" ggml_tensor* // (slen, d_in)
-MultiheadAttention_forward(
+// TODO: borken
+extern "C" ggml_tensor* MultiheadAttention_forward(
     fairseq2_model& model,
     const std::string &prefix,
     ggml_tensor* queries,  // (slen, d_in)

+ 16 - 104
ggml/examples/unity/fairseq2.h

@@ -28,117 +28,29 @@ extern "C" std::string* std_string_alloc(char* c_str);
 extern "C" void std_string_free(std::string* str);
 
 
-struct Linear {
-    struct ggml_tensor* weight;  // out_dim * in_dim
-    struct ggml_tensor* bias;  // out_dim
-};
-
-std::size_t Linear_size(int32_t input_dim, int32_t output_dim);
-void Linear_init(Linear& self,fairseq2_model& model, const std::string &prefix, int input_dim, int output_dim, bool bias);
-
-// LayerNorm
-
-struct LayerNorm {
-    struct ggml_tensor* weight;  // model_dim
-    struct ggml_tensor* bias;  // model_dim
-};
-
-std::size_t LayerNorm_size(int32_t dim);
-
-void LayerNorm_init(LayerNorm& self, fairseq2_model& model, const std::string &prefix, int dim);
-
-// ConformerConvolution
-// struct ConformerConvolution {
-//     // pointwise_conv1: Conv1d
-//     // pointwise_conv1_activation: GLU
-//     // depthwise_conv: Conv1d
-//     // batch_norm: BatchNorm1d
-//     // depthwise_activation: Module
-//     // pointwise_conv2: Conv1d
-// };
-
-// std::size_t ConformerConvolution_size(int32_t dim);
-
-// void ConformerConvolution_init(ConformerConvolution* self, fairseq2_model& model, const std::string &prefix, int dim);
-
-
-
-struct MultiheadAttention {
-    // num_key_value_heads: int
-    struct Linear q_proj;
-    struct Linear k_proj;
-    struct Linear v_proj;
-    // pos_encoder: Optional[PositionEncoder]
-    struct ggml_tensor* bias_k;
-    struct ggml_tensor* bias_v;
-    // add_zero_attn: bool
-    // head_scale_weight: Optional[Parameter]
-    struct Linear output_proj;
-};
-
-void MultiheadAttention_init(MultiheadAttention& self, fairseq2_model& model, const std::string &prefix, int model_dim, int num_heads);
-
-struct StandardFeedForwardNetwork {
-    struct Linear inner_proj; // ffn_inner_dim x model_dim
-    // inner_activation -> Relu for unity
-    // struct Dropout inner_dropout;
-    struct LayerNorm inner_layer_norm; // ffn_inner_dim
-    struct Linear output_proj; // model_dim x ffn_inner_dim
-};
-
-std::size_t StandardFeedForwardNetwork_size(int32_t dim, int32_t inner_dim);
+extern "C" ggml_tensor* Linear_forward(
+    fairseq2_model& model,
+    const std::string &prefix,
+    ggml_tensor* input
+);
 
-void StandardFeedForwardNetwork_init(
-    StandardFeedForwardNetwork& self,
+extern "C" ggml_tensor* LayerNorm_forward(
     fairseq2_model& model,
     const std::string &prefix,
-    int model_dim,
-    int inner_dim
+    ggml_tensor* input
 );
 
 extern "C" ggml_tensor* StandardFeedForwardNetwork_forward(
     fairseq2_model& model,
     const std::string& prefix,
-    ggml_tensor* input
+    ggml_tensor* seqs
 );
 
-// Transformer
-
-enum TransformerNormOrder {
-    TRANSFORMER_NORM_ORDER_POST = 0,
-    TRANSFORMER_NORM_ORDER_PRE = 1,
-    TRANSFORMER_NORM_ORDER_PRE_WITH_NORMFORMER = 2
-};
-
-
-struct TransformerDecoderLayer {
-    struct MultiheadAttention self_attn;
-    struct LayerNorm self_attn_norm;
-    // self_attn_dropout: Optional[Dropout]
-    struct LayerNorm self_attn_layer_norm;
-    struct MultiheadAttention encoder_decoder_attn;
-    // encoder_decoder_dropout: Optional[Dropout]
-    struct LayerNorm encoder_decoder_attn_layer_norm;
-    struct StandardFeedForwardNetwork ffn;
-    // ffn_dropout: Optional[Dropout]
-    // residual_scale: Optional[Parameter]
-    struct LayerNorm ffn_layer_norm;
-    // norm_order: TransformerNormOrder
-};
-
-void TransformerDecoderLayer_init();
-
-
-struct TransformerDecoder {
-    std::vector<TransformerDecoderLayer> layers;
-    struct LayerNorm layer_norm;
-};
-
-// std::size_t TransformerDecoder_size(int32_t input_dim, int32_t output_dim);
-// void TransformerDecoder_init(TransformerEncoder* self, fairseq2_model& model, const std::string &prefix, TransformerNormOrder norm_order);
-
-
-// std::size_t TransformerEncoder_size(int32_t input_dim, int32_t output_dim);
-// void TransformerEncoder_init(TransformerEncoder* self, fairseq2_model& model, const std::string &prefix, TransformerNormOrder norm_order);
-
-//
+extern "C" ggml_tensor* MultiheadAttention_forward(
+    fairseq2_model& model,
+    const std::string &prefix,
+    ggml_tensor* queries,  // (slen, d_in)
+    ggml_tensor* keys,  // (klen, d_in)
+    ggml_tensor* values,  // (klen, d_out)
+    ggml_tensor* _ // (klen, slen)  TODO: do we need to pass mask here ?
+);