Guillaume Wenzek 1 жил өмнө
parent
commit
c28db8c8ac

+ 16 - 4
ggml/examples/unity/fairseq2.cpp

@@ -212,6 +212,7 @@ extern "C" ggml_tensor* MultiheadAttention_forward(
     ggml_set_name(qk, "qk_scaled");
 
     // TODO: Should we replace this by ggml_diag_mask_inf ?
+    // TODO: masks have the wrong shape to be added here
     if (mask) qk = ggml_add(ctx, qk, mask);
     // TODO: upgrade qk to float32 if needed
     ggml_tensor* attn_weights = ggml_soft_max(ctx, qk);  // (B * H, S, Sk)
@@ -341,7 +342,14 @@ extern "C" ggml_tensor* TransformerEmbeddingFrontend_forward(
         embeds = ggml_get_rows(ctx, embed_weights, seqs);
     } else {
         // ggml_get_rows isn't very flexible, we have to handle the reshape ourselves.
-        embeds = ggml_get_rows(ctx, embed_weights, ggml_reshape_1d(ctx, seqs, ggml_nelements(seqs)));
+        ggml_tensor* flat_seqs = seqs;
+        if (!ggml_is_contiguous(seqs)) {
+            flat_seqs->type = GGML_TYPE_F32;
+            flat_seqs = ggml_cont(ctx, flat_seqs);
+        }
+        flat_seqs = ggml_reshape_1d(ctx, flat_seqs, ggml_nelements(seqs));
+        flat_seqs->type = GGML_TYPE_I32;
+        embeds = ggml_get_rows(ctx, embed_weights, flat_seqs);
         embeds = ggml_reshape_4d(ctx, embeds, embed_weights->ne[0], seqs->ne[0], seqs->ne[1], seqs->ne[2]);
         embeds->n_dims = seqs->n_dims + 1;
     }
@@ -551,8 +559,8 @@ void _fan_out_encoder_output(
     // (S_enc, M) -> (B, S_enc, M)
     *encoder_output_out = ggml_repeat(ctx, encoder_output, shape);
     // (S_enc) -> (B, S_enc)
-    ggml_tensor* shape_mask = ggml_new_tensor_2d(ctx, GGML_TYPE_I8, encoder_padding_mask->ne[0], beam_size);
     if (encoder_padding_mask != nullptr) {
+        ggml_tensor* shape_mask = ggml_new_tensor_3d(ctx, GGML_TYPE_I8, encoder_padding_mask->ne[0], 1, beam_size);
         *encoder_padding_mask_out = ggml_repeat(ctx, encoder_padding_mask, shape_mask);
     }
 }
@@ -599,6 +607,7 @@ void _bootstrap_seqs_and_scores(
     // output to correctly initialize its incremental state.
     // Note: we don't start decoding the last prefix token just yet.
     seqs = ggml_slice(ctx, seqs, 0, 0, prefix_seq_len - 1);
+    seqs->type = GGML_TYPE_I32;
 
     // Bootstrap the model state with prefix sequence.
     seqs = TransformerEmbeddingFrontend_forward(model, "text_decoder_frontend", seqs);
@@ -608,7 +617,7 @@ void _bootstrap_seqs_and_scores(
         seqs,
         /*padding_mask*/ nullptr,
         encoder_output,
-        encoder_padding_mask
+        /*encoder_padding_mask*/ nullptr // TODO: do we need padding for encoder ?
         // TODO: state_bag
     );
     // TODO state_bag.increment_step(prefix_seq_len - 1)
@@ -675,6 +684,7 @@ int StandardBeamSearch_step(
         // The first step always indicates the beginning of the sequence and
         // has no score.
         if (step_nr > 0) {
+            last_scores = ggml_slice(ctx, last_scores, 1, 0, 1);
             lprobs = ggml_add_inplace(ctx, lprobs, ggml_repeat(ctx, last_scores, lprobs));
         }
     } else {
@@ -791,7 +801,9 @@ extern "C" float generate_sequence(
     // Initialize buffers. (B, S)
     ggml_tensor* seqs = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, max_seq_len, beam_size);
     ggml_set_i32(seqs, 0);
+    ggml_set_name(seqs, "seqs_0");
     ggml_tensor* scores = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, max_seq_len, beam_size);
+    ggml_set_name(scores, "scores_0");
     ggml_set_f32(scores, 0.0);
 
     IncrementalStateBag state_bag = {};
@@ -845,7 +857,7 @@ extern "C" float generate_sequence(
             decoder_input,
             nullptr,  // We never generate PAD.
             encoder_output,
-            encoder_padding_mask
+            /*encoder_padding_mask*/ nullptr // TODO: do we need padding for encoder ?
             // state_bag=state_bag,
         ); // (B, S, D)
 

+ 1 - 1
ggml/test_unity_cpp.py

@@ -401,7 +401,7 @@ def test_t2tt(ctx: Ctx, g_model: c_void_p):
     encoder_padding_mask = ggml.from_numpy(ctx, text_out["encoder_padding_mask"])
 
     job = ggml.SequenceGeneratorJob()
-    job.opts.beam_size = 1
+    job.opts.beam_size = 2
     job.opts.min_seq_len = 1
     job.opts.soft_max_seq_len_a = 1
     job.opts.soft_max_seq_len_b = 200