Browse Source

pass correct prefix sequence in test

Guillaume Wenzek 1 năm trước cách đây
mục cha
commit
7c9b2a1b95

+ 32 - 17
ggml/examples/unity/fairseq2.cpp

@@ -506,14 +506,23 @@ void _fan_out_encoder_output(
 }
 }
 
 
 ggml_tensor* ggml_log_softmax(ggml_context* ctx, ggml_tensor* logits) {
 ggml_tensor* ggml_log_softmax(ggml_context* ctx, ggml_tensor* logits) {
-    // TODO: this isn't the smartest way of doing this
+    // TODO: this isn't the most precise way of doing this
     return ggml_log_inplace(ctx, ggml_soft_max_inplace(ctx, logits));
     return ggml_log_inplace(ctx, ggml_soft_max_inplace(ctx, logits));
 }
 }
 
 
+ggml_tensor* ggml_expand_2d(ggml_context* ctx, ggml_tensor* x, int64_t ne0, int64_t ne1) {
+    ggml_tensor* shape = ggml_new_tensor_2d(ctx, GGML_TYPE_I8, ne0, ne1);
+    ggml_type true_type = x->type;
+    x->type = GGML_TYPE_F32;
+    ggml_tensor* y = ggml_repeat(ctx, x, shape);
+    y->type = true_type;
+    return y;
+}
+
 void _bootstrap_seqs_and_scores(
 void _bootstrap_seqs_and_scores(
     fairseq2_model& model,
     fairseq2_model& model,
     const SequenceGeneratorJob& job,
     const SequenceGeneratorJob& job,
-    ggml_tensor* seqs,
+    ggml_tensor* full_seqs,
     ggml_tensor* scores,
     ggml_tensor* scores,
     ggml_tensor* encoder_output,
     ggml_tensor* encoder_output,
     ggml_tensor* encoder_padding_mask,
     ggml_tensor* encoder_padding_mask,
@@ -528,23 +537,24 @@ void _bootstrap_seqs_and_scores(
 
 
     ggml_context* ctx = model.ctx;
     ggml_context* ctx = model.ctx;
 
 
-    // seqs[:, : prefix_seq_len] = job.prefix_seq;
-    ggml_cpy(ctx, job.prefix_seq, ggml_view_2d(ctx, seqs, 0, prefix_seq_len, seqs->nb[1], 0));
+    // full_seqs[:, : prefix_seq_len] = job.prefix_seq;
+    full_seqs->type = GGML_TYPE_F32;
+    job.prefix_seq->type = GGML_TYPE_F32;
+    ggml_tensor* seqs = ggml_cpy(ctx, job.prefix_seq, ggml_slice(ctx, full_seqs, 0, 0, prefix_seq_len));
 
 
     // We have to bootstrap the model with the already fanned-out encoder
     // We have to bootstrap the model with the already fanned-out encoder
-    // output to correctly initialize its incremental state. This causes some
-    // redundancy as we have to expand `decoder_input` to match the shape of
-    // `encoder_output`.
+    // output to correctly initialize its incremental state.
     // (S_pfx) -> (N x B, S_pfx - 1)
     // (S_pfx) -> (N x B, S_pfx - 1)
-    // prefix_seq[:-1].expand(encoder_output.size(0), -1)
-    ggml_tensor* decoder_input = ggml_repeat(ctx, ggml_view_1d(ctx, job.prefix_seq, prefix_seq_len - 1, 0), encoder_output);
+    // prefix_seq[:-1].expand(beam_size, -1)
+    seqs = ggml_expand_2d(ctx, ggml_slice(ctx, seqs, 0, 0, prefix_seq_len - 1), prefix_seq_len - 1, beam_size);
+    seqs->type = GGML_TYPE_I32;
 
 
     // Bootstrap the model state with prefix sequence.
     // Bootstrap the model state with prefix sequence.
-    decoder_input = TransformerEmbeddingFrontend_forward(model, "text_decoder_frontend", decoder_input);
+    seqs = TransformerEmbeddingFrontend_forward(model, "text_decoder_frontend", seqs);
     ggml_tensor* decoder_output = StandardTransformerDecoder_forward(
     ggml_tensor* decoder_output = StandardTransformerDecoder_forward(
         model,
         model,
         "text_decoder",
         "text_decoder",
-        decoder_input,
+        seqs,
         /*padding_mask*/ nullptr,
         /*padding_mask*/ nullptr,
         encoder_output,
         encoder_output,
         encoder_padding_mask
         encoder_padding_mask
@@ -554,11 +564,13 @@ void _bootstrap_seqs_and_scores(
 
 
     // logits, lprobs: (N, S_pfx - 1, V)
     // logits, lprobs: (N, S_pfx - 1, V)
     ggml_tensor* logits = Linear_forward(model, "final_proj", decoder_output);
     ggml_tensor* logits = Linear_forward(model, "final_proj", decoder_output);
-    ggml_tensor* lprobs = ggml_log_softmax(ctx, ggml_view_3d(ctx, logits, logits->ne[0], logits->ne[1], 1, 0, 0, 0));
     int vocab_size = logits->ne[0];
     int vocab_size = logits->ne[0];
+    ggml_tensor* lprobs = ggml_log_softmax(ctx, ggml_slice(ctx, logits, 1, 0, 1));
 
 
     ggml_cgraph gf = ggml_build_forward(lprobs);
     ggml_cgraph gf = ggml_build_forward(lprobs);
     ggml_graph_compute_with_ctx(ctx, &gf, 1);
     ggml_graph_compute_with_ctx(ctx, &gf, 1);
+    full_seqs->type = GGML_TYPE_I32;
+    job.prefix_seq->type = GGML_TYPE_I32;
 
 
     // Fetch scores of next steps from "lprobs"
     // Fetch scores of next steps from "lprobs"
     float p_score = 0;
     float p_score = 0;
@@ -612,7 +624,7 @@ int StandardBeamSearch_step(
         // The first step always indicates the beginning of the sequence and
         // The first step always indicates the beginning of the sequence and
         // has no score.
         // has no score.
         if (step_nr > 0) {
         if (step_nr > 0) {
-            lprobs = ggml_add_inplace(ctx, lprobs, last_scores);
+            lprobs = ggml_add_inplace(ctx, lprobs, ggml_repeat(ctx, last_scores, lprobs));
         }
         }
     } else {
     } else {
         // Make probabilities contain cumulative scores for each hypothesis.
         // Make probabilities contain cumulative scores for each hypothesis.
@@ -738,10 +750,12 @@ extern "C" float generate_sequence(
 
 
     // Array with integers up to 'vocab_size * beam_size' to represent next beams to explore
     // Array with integers up to 'vocab_size * beam_size' to represent next beams to explore
     ggml_tensor* candidate_indices = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, vocab_size * beam_size);
     ggml_tensor* candidate_indices = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, vocab_size * beam_size);
-    for (std::size_t i = 0; i < vocab_size * beam_size; ++i) ggml_set_i32_1d(candidate_indices, i, i);
+    for (std::size_t i = 0; i < vocab_size * beam_size; ++i)
+        ((int32_t *)(candidate_indices->data))[i] = i;
 
 
     // TODO: memory management
     // TODO: memory management
     // there should be a per-step ggml_context for intermediary results
     // there should be a per-step ggml_context for intermediary results
+    // start of beam search:
     for (int step_nr = start_step; step_nr < max_seq_len - 1; ++step_nr) {
     for (int step_nr = start_step; step_nr < max_seq_len - 1; ++step_nr) {
         // if (beam_indices != nullptr) {
         // if (beam_indices != nullptr) {
         //     // If not `None`, it means in the last step we finalized one or
         //     // If not `None`, it means in the last step we finalized one or
@@ -829,9 +843,9 @@ extern "C" float generate_sequence(
                 ongoing_beams += 1 - finished;
                 ongoing_beams += 1 - finished;
             }
             }
             if (ongoing_beams >= beam_size) break;
             if (ongoing_beams >= beam_size) break;
-            if (finished_searches.size() >= beam_size) break;
+            if (finished_searches.size() >= beam_size)
+                goto end_of_beam_search;
         }
         }
-        if (finished_searches.size() >= beam_size) break;
 
 
         // Reorder beams in the `seq` and `score` buffers. The same beam can
         // Reorder beams in the `seq` and `score` buffers. The same beam can
         // be selected more than once.
         // be selected more than once.
@@ -860,6 +874,7 @@ extern "C" float generate_sequence(
         scores = new_scores;
         scores = new_scores;
     }
     }
 
 
+end_of_beam_search:
     // Ensure that hypotheses are sorted by decreasing scores before returning.
     // Ensure that hypotheses are sorted by decreasing scores before returning.
     std::sort(
     std::sort(
         finished_searches.begin(),
         finished_searches.begin(),
@@ -871,5 +886,5 @@ extern "C" float generate_sequence(
     // TODO: return structured output
     // TODO: return structured output
     *output_seq = *(finished_searches[0].seq);
     *output_seq = *(finished_searches[0].seq);
 
 
-    return 0.0f;
+    return finished_searches[0].score;
 }
 }

+ 8 - 6
ggml/test_unity_cpp.py

@@ -638,6 +638,7 @@ def test_StandardTransformerDecoder_forward(
 
 
 
 
 def test_t2tt(ctx: Ctx, g_model: c_void_p):
 def test_t2tt(ctx: Ctx, g_model: c_void_p):
+    # def test_t2tt(ctx: Ctx, g_model: c_void_p, translator):
     # device = translator.device
     # device = translator.device
     src_lang = "eng"
     src_lang = "eng"
     src_text = "We are all in a yellow submarine."
     src_text = "We are all in a yellow submarine."
@@ -672,7 +673,7 @@ def test_t2tt(ctx: Ctx, g_model: c_void_p):
     text_out = np.load(Path(__file__).parent / "sample_input.npz")
     text_out = np.load(Path(__file__).parent / "sample_input.npz")
     score = text_out["score"].item()
     score = text_out["score"].item()
 
 
-    tgt_tokens = ggml.from_numpy(ctx, text_out["tgt_tokens"].astype(np.int32))
+    tgt_tokens = list(text_out["tgt_tokens"])
     encoder_out = ggml.from_numpy(ctx, text_out["encoder_output"])
     encoder_out = ggml.from_numpy(ctx, text_out["encoder_output"])
     encoder_padding_mask = ggml.from_numpy(ctx, text_out["encoder_padding_mask"])
     encoder_padding_mask = ggml.from_numpy(ctx, text_out["encoder_padding_mask"])
 
 
@@ -681,17 +682,18 @@ def test_t2tt(ctx: Ctx, g_model: c_void_p):
     job.opts.min_seq_len = 1
     job.opts.min_seq_len = 1
     job.opts.soft_max_seq_len_a = 1
     job.opts.soft_max_seq_len_a = 1
     job.opts.soft_max_seq_len_b = 200
     job.opts.soft_max_seq_len_b = 200
-    job.opts.hard_max_seq_len = 1024
+    job.opts.hard_max_seq_len = int(len(tgt_tokens) * 1.5)
     job.opts.len_penalty = 1.0
     job.opts.len_penalty = 1.0
     job.opts.unk_penalty = 0.0
     job.opts.unk_penalty = 0.0
-    job.prefix_seq = ggml.from_numpy(ctx, text_out["tgt_tokens"].astype(np.int32)[:1])
+    job.prefix_seq = ggml.from_numpy(ctx, text_out["tgt_tokens"].astype(np.int32)[:2])
     job.eos_idx = 3
     job.eos_idx = 3
 
 
-    result = ctypes.byref(ggml.ggml_tensor())
+    result = ggml.ggml_tensor()
     g_score = ggml.generate_sequence(
     g_score = ggml.generate_sequence(
-        g_model, job, encoder_out, encoder_padding_mask, result
+        g_model, job, encoder_out, encoder_padding_mask, ctypes.byref(result)
     )
     )
-    breakpoint()
+    tokens = list(ggml.to_numpy(result))
+    assert tokens == tgt_tokens
     assert g_score == pytest.approx(score)
     assert g_score == pytest.approx(score)