Kaynağa Gözat

revert unexpected changes

cndn 1 yıl önce
ebeveyn
işleme
b1bfd87e75
2 değiştirilmiş dosya ile 10 ekleme ve 11 silme
  1. 10 8
      ggml/examples/unity/fairseq2.cpp
  2. 0 3
      ggml/examples/unity/unity.cpp

+ 10 - 8
ggml/examples/unity/fairseq2.cpp

@@ -140,13 +140,12 @@ void append_to_prev_kv(const fairseq2_model& model, const std::string& prefix, g
 
     // qk is (B * H, Sq, Sk) == (B*H, 1, Sk) in incremental mode
     // we return the Sq slice of the (Sq, Sk) attention mask
-    *self_attn_mask = ggml_slice(
-        model.ctx,
-        ggml_slice(model.ctx, kv.self_attn_mask, 0, 0, step_nr),
-        1,
-        step_nr - 1,
-        step_nr
-    );
+    if (self_attn_mask != nullptr) {
+        *self_attn_mask = ggml_slice(
+            ctx, ggml_slice(ctx, kv.self_attn_mask, 0, 0, step_nr),
+            1, step_nr - 1, step_nr
+        );
+    }
 
     kv.step_nr = step_nr;
 }
@@ -1481,7 +1480,7 @@ extern "C" Hypothesis* generate_sequence(
             }
         }
         ggml_tensor* prev_token = ggml_slice(step_ctx, seqs, 0, step_nr, step_nr + 1);
-        
+
         ggml_tensor* decoder_input = TransformerEmbeddingFrontend_forward(model, "text_decoder_frontend", prev_token);
         ggml_tensor* decoder_output = StandardTransformerDecoder_forward(
             model,
@@ -1565,6 +1564,8 @@ extern "C" Hypothesis* generate_sequence(
         // Reorder beams in the `seq` and `score` buffers. The same beam can
         // be selected more than once.
         // (B, S), (B) -> (B, S)
+        // don't use allocr API, cause it might reuse a kv cache buffer several time.
+        ggml_set_no_alloc(step_ctx, false);
         ggml_tensor* new_seqs = ggml_get_rows(step_ctx, seqs, beam_indices);
         ggml_tensor* new_scores = ggml_get_rows(step_ctx, scores, beam_indices);
         struct ggml_cgraph * gf_reorder = ggml_new_graph(step_ctx);
@@ -1799,6 +1800,7 @@ extern "C" std::size_t fairseq2_spm_detokenize(fairseq2_model* model, ggml_tenso
         std::size_t n = token.end() - begin;
         written += n;
         out += n;
+
     }
     *out = '0';
     return written;

+ 0 - 3
ggml/examples/unity/unity.cpp

@@ -36,9 +36,6 @@ void unity_print_usage(int /*argc*/, char ** argv, const unity_params & params)
     fprintf(stderr, "\n");
     fprintf(stderr, "options:\n");
     fprintf(stderr, "  -h, --help            show this help message and exit\n");
-    fprintf(stderr, "  -i, --input           Input text for the text-2-text translation\n");
-    fprintf(stderr, "  -l, --tgt-lang        Target translation lang (default: %s\n", params.tgt_lang);
-
     fprintf(stderr, "  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads);
     fprintf(stderr, "  -v, --verbose         Print out word level confidence score and LID score (default: off)");
     fprintf(stderr, "  -m FNAME, --model FNAME\n");