Ver código fonte

text encoder

Guillaume Wenzek 1 ano atrás
pai
commit
e1faba0c5d
2 arquivos alterados com 59 adições e 4 exclusões
  1. 24 0
      ggml/examples/unity/fairseq2.cpp
  2. 35 4
      ggml/test_unity_cpp.py

+ 24 - 0
ggml/examples/unity/fairseq2.cpp

@@ -228,3 +228,27 @@ extern "C" ggml_tensor* StandardTransformerEncoderLayer_forward(
 
     return seqs;
 }
+
+
+extern "C" ggml_tensor* StandardTransformerEncoder_forward(
+    fairseq2_model& model,
+    const std::string& prefix,
+    ggml_tensor* seqs,
+    ggml_tensor* padding_mask
+) {
+    int layer_idx = 0;
+    // TODO: this isn't nice.
+    // When loading model we should add nullptr for the module key to avoid those concatenation.
+    while (has_layer(model, prefix + ".layers." + std::to_string(layer_idx)  + ".self_attn_layer_norm.weight")) {
+        seqs = StandardTransformerEncoderLayer_forward(
+            model, prefix + ".layers." + std::to_string(layer_idx), seqs, padding_mask
+        );
+        ggml_set_name(seqs, ("x_" + std::to_string(layer_idx)).c_str());
+        layer_idx += 1;
+    }
+
+    if (has_layer(model, prefix + ".layer_norm.weight"))
+        seqs = LayerNorm_forward(model, prefix + ".layer_norm", seqs);
+
+    return seqs;
+}

+ 35 - 4
ggml/test_unity_cpp.py

@@ -17,14 +17,14 @@ from seamless_communication.models.unity import load_unity_model
 Ctx = ggml.ggml_context_p
 
 UNITY_MODELS = Path(__file__).parent / "examples/unity/models"
-PARAMS_16MB = ggml.ggml_init_params(mem_size=16 * 1024 * 1024, mem_buffer=None)
+PARAMS_256MB = ggml.ggml_init_params(mem_size=256 * 1024 * 1024, mem_buffer=None)
 
 
 @pytest.fixture(name="ctx")
 def _ctx() -> Iterator[Ctx]:
-    """Allocate a new context with 16 MB of memory"""
+    """Allocate a new context with 256 MB of memory"""
     try:
-        ctx = ggml.ggml_init(params=PARAMS_16MB)
+        ctx = ggml.ggml_init(params=PARAMS_256MB)
         yield ctx
     finally:
         ggml.ggml_free(ctx)
@@ -422,7 +422,7 @@ def test_forward_self_attn(ctx: Ctx, g_model: c_void_p, pt_model: Any) -> None:
         gxq,
         gx,
         gx,
-        ctypes.pointer(),  # TODO: tests with causal attention masks
+        None,  # TODO: tests with causal attention masks
     )
     gf = ggml.ggml_build_forward(gy)
     ggml.ggml_graph_compute_with_ctx(ctx, ctypes.pointer(gf), 1)
@@ -504,3 +504,34 @@ def test_StandardTransformerEncoderLayer_forward(
 
     assert y.shape == y_exp.shape
     assert np.allclose(y_exp, y, atol=1e-4)
+
+
+def test_StandardTransformerEncoder_forward(
+    ctx: Ctx, g_model: c_void_p, pt_model: Any
+) -> None:
+    x = torch.empty((1, 21, 1024))
+    padding_mask = torch.ones((1, 21))
+    torch.random.manual_seed(0)
+    torch.nn.init.uniform_(x, -1, 1)
+
+    gx = ggml.from_numpy(ctx, x[0])
+    ggml.ggml_set_name(gx, b"x")
+    gpad = ggml.from_numpy(ctx, padding_mask[0])
+    ggml.ggml_set_name(gpad, b"padding_mask")
+    gy = ggml.forward(
+        "StandardTransformerEncoder",
+        g_model,
+        "text_encoder",
+        gx,
+        None,  # TODO support padding mask
+    )
+    gf = ggml.ggml_build_forward(gy)
+    ggml.ggml_graph_compute_with_ctx(ctx, ctypes.pointer(gf), 1)
+
+    y = ggml.to_numpy(gy)
+
+    y_exp, _ = pt_model.text_encoder(x, padding_mask)
+    y_exp = y_exp.squeeze(0).numpy()  # remove batch dimension
+
+    assert y.shape == y_exp.shape
+    assert np.allclose(y_exp, y, atol=1e-4)