il y a 1 an · 2828c97c36
--- a/ggml/examples/unity/fairseq2.cpp
+++ b/ggml/examples/unity/fairseq2.cpp
@@ -140,15 +140,15 @@ void append_to_prev_kv(const fairseq2_model& model, const std::string& prefix, g
 
				 
			
 
				     // qk is (B * H, Sq, Sk) == (B*H, 1, Sk) in incremental mode
			
 
				     // we return the Sq slice of the (Sq, Sk) attention mask
			
 
				-    if (self_attn_mask != nullptr) {
			
 
				-        *self_attn_mask = ggml_slice(
			
 
				-            ctx, ggml_slice(ctx, kv.self_attn_mask, 0, 0, step_nr),
			
 
				-            1, step_nr - 1, step_nr
			
 
				-        );
			
 
				-    }
			
 
				+    *self_attn_mask = ggml_slice(
			
 
				+        model.ctx,
			
 
				+        ggml_slice(model.ctx, kv.self_attn_mask, 0, 0, step_nr),
			
 
				+        1,
			
 
				+        step_nr - 1,
			
 
				+        step_nr
			
 
				+    );
			
 
				 
			
 
				     kv.step_nr = step_nr;
			
 
				-    ggml_set_no_alloc(ctx, no_alloc_save);
			
 
				 }
			
 
				 
			
 
				 // variant of ggml_get_rows that allows for a with more than 2 dims.
			
@@ -1481,7 +1481,7 @@ extern "C" Hypothesis* generate_sequence(
 
				             }
			
 
				         }
			
 
				         ggml_tensor* prev_token = ggml_slice(step_ctx, seqs, 0, step_nr, step_nr + 1);
			
 
				-
			
 
				+        
			
 
				         ggml_tensor* decoder_input = TransformerEmbeddingFrontend_forward(model, "text_decoder_frontend", prev_token);
			
 
				         ggml_tensor* decoder_output = StandardTransformerDecoder_forward(
			
 
				             model,
			
@@ -1565,8 +1565,6 @@ extern "C" Hypothesis* generate_sequence(
 
				         // Reorder beams in the `seq` and `score` buffers. The same beam can
			
 
				         // be selected more than once.
			
 
				         // (B, S), (B) -> (B, S)
			
 
				-        // don't use allocr API, cause it might reuse a kv cache buffer several time.
			
 
				-        ggml_set_no_alloc(step_ctx, false);
			
 
				         ggml_tensor* new_seqs = ggml_get_rows(step_ctx, seqs, beam_indices);
			
 
				         ggml_tensor* new_scores = ggml_get_rows(step_ctx, scores, beam_indices);
			
 
				         struct ggml_cgraph * gf_reorder = ggml_new_graph(step_ctx);
			
@@ -1848,7 +1846,6 @@ std::pair<std::vector<std::string>, std::vector<float>> fairseq2_spm_detokenize(
 
				         std::size_t n = token.end() - begin;
			
 
				         written += n;
			
 
				         out += n;
			
 
				-
			
 
				     }
			
 
				     if(subword_scores.size() > 0) {
			
 
				         word_scores.push_back(*std::min_element(subword_scores.begin(), subword_scores.end()));
			
--- a/ggml/examples/unity/unity.cpp
+++ b/ggml/examples/unity/unity.cpp
@@ -36,6 +36,9 @@ void unity_print_usage(int /*argc*/, char ** argv, const unity_params & params)
 
				     fprintf(stderr, "\n");
			
 
				     fprintf(stderr, "options:\n");
			
 
				     fprintf(stderr, "  -h, --help            show this help message and exit\n");
			
 
				+    fprintf(stderr, "  -i, --input           Input text for the text-2-text translation\n");
			
 
				+    fprintf(stderr, "  -l, --tgt-lang        Target translation lang (default: %s\n", params.tgt_lang);
			
 
				+
			
 
				     fprintf(stderr, "  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads);
			
 
				     fprintf(stderr, "  -v, --verbose         Print out word level confidence score and LID score (default: off)");
			
 
				     fprintf(stderr, "  -m FNAME, --model FNAME\n");