1 سال پیش · 86993cbd00
--- a/ggml/examples/unity/fairseq2.cpp
+++ b/ggml/examples/unity/fairseq2.cpp
@@ -502,7 +502,7 @@ int _determine_max_seq_len(const SequenceGeneratorJob& job, int source_seq_len)
 
															     if (source_seq_len <= 0 || opts.soft_max_seq_len_a <= 0) {
														
 
															         max_seq_len = opts.hard_max_seq_len;
														
 
															     } else {
														
 
															-        max_seq_len = std::min(opts.hard_max_seq_len, int(opts.soft_max_seq_len_a * source_seq_len + opts.soft_max_seq_len_b));
														
 
															+        max_seq_len = std::min(opts.hard_max_seq_len, int(opts.soft_max_seq_len_a * source_seq_len) + opts.soft_max_seq_len_b);
														
 
															     }
														
 
															     if (opts.min_seq_len > max_seq_len) {
														
@@ -839,14 +839,16 @@ extern "C" float generate_sequence(
 
															             encoder_output,
														
 
															             encoder_padding_mask
														
 
															             // state_bag=state_bag,
														
 
															-        );
														
 
															+        ); // (B, S, D)
														
 
															         // state_bag.increment_step()
														
 
															         // Because of no IncrementalStateBag decoder_output here is of shape (B, S, D)
														
 
															         // Just look at the last token.
														
 
															         decoder_output = ggml_slice(ctx, decoder_output, 1, step_nr, step_nr+1);
														
 
															-        ggml_tensor* logits = Linear_forward(model, "final_proj", decoder_output);
														
 
															+        decoder_output = ggml_cont(ctx, decoder_output);
														
 
															+        decoder_output = ggml_flatten_1d(ctx, decoder_output, 0);  // (B, model_dim)
														
 
															+        ggml_tensor* logits = Linear_forward(model, "final_proj", decoder_output);  // (B, vocab_size)
														
 
															         ggml_tensor* lprobs = ggml_log_softmax(ctx, logits);
														
 
															         // Compute lprobs here so we can modify it in place in the lprob tweaking phase
														
--- a/ggml/examples/unity/fairseq2.h
+++ b/ggml/examples/unity/fairseq2.h
@@ -114,7 +114,7 @@ struct SequenceGeneratorOptions {
 
															     /// sequence length. The generated sequences (including prefix sequence) will
														
 
															     /// have the maximum length of ``min(hard_max_seq_len, ax + b)``. See also
														
 
															     /// ``hard_max_seq_len``.
														
 
															-    int soft_max_seq_len_a = 1;
														
 
															+    float soft_max_seq_len_a = 1;
														
 
															     int soft_max_seq_len_b = 200;
														
 
															     /// The hard limit on maximum length of generated sequences.
														
--- a/ggml/ggml.py
+++ b/ggml/ggml.py
@@ -406,7 +406,7 @@ def ggml_unflatten_1d(
 
															 class SequenceGeneratorOptions:
														
 
															     beam_size: int
														
 
															     min_seq_len: int
														
 
															-    soft_max_seq_len_a: int
														
 
															+    soft_max_seq_len_a: float
														
 
															     soft_max_seq_len_b: int
														
 
															     hard_max_seq_len: int
														
 
															     len_penalty: float
														
--- a/ggml/test_unity_cpp.py
+++ b/ggml/test_unity_cpp.py
@@ -224,16 +224,16 @@ def test_MultiheadAttention_forward(ctx: Ctx, g_model: c_void_p, pt_model: Any)
 
															 def test_StandardTransformerEncoderLayer_forward(
														
 
															     ctx: Ctx, g_model: c_void_p, pt_model: Any
														
 
															 ) -> None:
														
 
															-    x = torch.empty((1, 21, 1024))
														
 
															-    padding_mask = torch.ones((1, 21))
														
 
															+    x = torch.empty((2, 21, 1024))
														
 
															+    padding_mask = torch.ones((2, 21))
														
 
															     torch.random.manual_seed(0)
														
 
															     torch.nn.init.uniform_(x, -1, 1)
														
 
															     layer = pt_model.text_encoder.layers[0]
														
 
															-    gx = ggml.from_numpy(ctx, x[0])
														
 
															+    gx = ggml.from_numpy(ctx, x)
														
 
															     ggml.ggml_set_name(gx, b"x")
														
 
															-    gpad = ggml.from_numpy(ctx, padding_mask[0])
														
 
															+    gpad = ggml.from_numpy(ctx, padding_mask)
														
 
															     ggml.ggml_set_name(gpad, b"padding_mask")
														
 
															     gy = ggml.forward(
														
 
															         "StandardTransformerEncoderLayer",
														
@@ -248,7 +248,7 @@ def test_StandardTransformerEncoderLayer_forward(
 
															     y = ggml.to_numpy(gy)
														
 
															     y_exp, _ = layer(x, padding_mask)
														
 
															-    y_exp = y_exp.squeeze(0).numpy()  # remove batch dimension
														
 
															+    y_exp = y_exp.numpy()
														
 
															     assert y.shape == y_exp.shape
														
 
															     assert np.allclose(y_exp, y, atol=1e-4 if UNITY_FLASH_ATTN else 1e-2)
														
@@ -257,14 +257,14 @@ def test_StandardTransformerEncoderLayer_forward(
 
															 def test_StandardTransformerEncoder_forward(
														
 
															     ctx: Ctx, g_model: c_void_p, pt_model: Any
														
 
															 ) -> None:
														
 
															-    x = torch.empty((1, 21, 1024))
														
 
															-    padding_mask = torch.ones((1, 21))
														
 
															+    x = torch.empty((2, 21, 1024))
														
 
															+    padding_mask = torch.ones((2, 21))
														
 
															     torch.random.manual_seed(0)
														
 
															     torch.nn.init.uniform_(x, -1, 1)
														
 
															-    gx = ggml.from_numpy(ctx, x[0])
														
 
															+    gx = ggml.from_numpy(ctx, x)
														
 
															     ggml.ggml_set_name(gx, b"x")
														
 
															-    gpad = ggml.from_numpy(ctx, padding_mask[0])
														
 
															+    gpad = ggml.from_numpy(ctx, padding_mask)
														
 
															     ggml.ggml_set_name(gpad, b"padding_mask")
														
 
															     gy = ggml.forward(
														
 
															         "StandardTransformerEncoder",
														
@@ -279,7 +279,7 @@ def test_StandardTransformerEncoder_forward(
 
															     y = ggml.to_numpy(gy)
														
 
															     y_exp, _ = pt_model.text_encoder(x, padding_mask)
														
 
															-    y_exp = y_exp.squeeze(0).numpy()  # remove batch dimension
														
 
															+    y_exp = y_exp.numpy()
														
 
															     assert y.shape == y_exp.shape
														
 
															     assert np.allclose(y_exp, y, atol=1e-4 if UNITY_FLASH_ATTN else 1e-2)