2 years ago · b1c4943c4e
--- a/ggml/examples/unity/buffered_ggml_writer.py
+++ b/ggml/examples/unity/buffered_ggml_writer.py
@@ -1,82 +0,0 @@
 
				-# Copyright (c) Meta Platforms, Inc. and affiliates.
			
 
				-# All rights reserved.
			
 
				-
			
 
				-# This source code is licensed under the license found in the
			
 
				-# LICENSE file in the root directory of this source tree.
			
 
				-
			
 
				-import struct
			
 
				-from io import BufferedWriter
			
 
				-
			
 
				-import torch
			
 
				-
			
 
				-from ggml.examples.unity.type_utils import to_ctype
			
 
				-
			
 
				-
			
 
				-class BufferedGGMLWriter:
			
 
				-    buffer: BufferedWriter
			
 
				-
			
 
				-    def __init__(self, buffer: BufferedWriter) -> None:
			
 
				-        self.buffer = buffer
			
 
				-
			
 
				-    def write_magic_hex(self) -> None:
			
 
				-        """Write GGML Magic Number to internal buffer.
			
 
				-        This should be called at the start of your convert process.
			
 
				-        """
			
 
				-        self.buffer.write(struct.pack("i", 0x67676d6c))
			
 
				-
			
 
				-    def write_hparams(self, hparams: dict) -> None:
			
 
				-        """Write hyper parameters to internal buffer.
			
 
				-
			
 
				-        :params hparams:
			
 
				-            flattened dict containing model's hyper parameters.
			
 
				-        """
			
 
				-        for key in hparams.keys():
			
 
				-            try:
			
 
				-                value = hparams[key]
			
 
				-                ctype, cvalue = to_ctype(value)
			
 
				-                self.buffer.write(struct.pack(ctype, cvalue))
			
 
				-            except ValueError as e:
			
 
				-                # TODO use logger
			
 
				-                print(f"[Warning] {e}. Skipping config for key {key}")
			
 
				-                continue
			
 
				-
			
 
				-    def write_state_dict(self, state_dict: dict) -> None:
			
 
				-        """Write pytorch state dict to internal buffer.
			
 
				-
			
 
				-        :paras state_dict:
			
 
				-            state dict returned by pytorch model
			
 
				-        """
			
 
				-        for key, value in state_dict.items():
			
 
				-            self.write_string(key)
			
 
				-            self.write_tensor(value)
			
 
				-
			
 
				-    def write_string(self, value: str) -> None:
			
 
				-        """Write string in utf-8 format to internal buffer.
			
 
				-
			
 
				-        :params value:
			
 
				-            string value to dump.
			
 
				-        """
			
 
				-        str_ = value.encode("utf-8")
			
 
				-        self.buffer.write(struct.pack("i", len(str_)))
			
 
				-        self.buffer.write(str_)
			
 
				-
			
 
				-    def write_tensor(self, value: torch.Tensor) -> None:
			
 
				-        """Write torch tensor in ggml format to internal buffer.
			
 
				-
			
 
				-        First we save the number of dimensions and the dtype.
			
 
				-        Then we save the data as numpy array.
			
 
				-
			
 
				-        :params value:
			
 
				-            Tensor to dump.
			
 
				-        """
			
 
				-        data = value.squeeze().numpy()
			
 
				-        n_dims = len(data.shape)
			
 
				-
			
 
				-        # TODO: Convert to fp16 when necessary!
			
 
				-        ftype = 0
			
 
				-
			
 
				-        self.buffer.write(struct.pack("ii", n_dims, ftype))
			
 
				-        for i in range(n_dims):
			
 
				-            self.buffer.write(struct.pack("i", data.shape[n_dims - 1 - i]))
			
 
				-
			
 
				-        data.tofile(self.buffer)
			
--- a/ggml/examples/unity/fairseq2_to_ggml_converter.py
+++ b/ggml/examples/unity/fairseq2_to_ggml_converter.py
@@ -1,157 +0,0 @@
 
				-# Copyright (c) Meta Platforms, Inc. and affiliates.
			
 
				-# All rights reserved.
			
 
				-
			
 
				-# This source code is licensed under the license found in the
			
 
				-# LICENSE file in the root directory of this source tree.
			
 
				-
			
 
				-import dataclasses
			
 
				-from pathlib import Path
			
 
				-from typing import Any, Callable, Optional, Union
			
 
				-
			
 
				-from fairseq2.assets import AssetCard
			
 
				-
			
 
				-from ggml.examples.unity.buffered_ggml_writer import BufferedGGMLWriter
			
 
				-from ggml.examples.unity.type_utils import get_cpp_type
			
 
				-from seamless_communication.models.unity import (
			
 
				-    load_unity_config,
			
 
				-    load_unity_model
			
 
				-)
			
 
				-
			
 
				-Preprocessor = Callable[[Any], Any]
			
 
				-
			
 
				-
			
 
				-class Fairseq2ToGGMLConverter:
			
 
				-    """Converter from fairseq2 format to GGML format"""
			
 
				-
			
 
				-    config_preprocessor: Preprocessor
			
 
				-    nested_params_separtor: str
			
 
				-
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        nested_params_separtor: str = ".",
			
 
				-        config_preprocessor: Optional[Preprocessor] = None,
			
 
				-    ) -> None:
			
 
				-        """
			
 
				-        :param nested_params_separtor:
			
 
				-            string separator used when flattening nested hparams
			
 
				-        :param config_preprocessor:
			
 
				-            Preprocessor used for config/hparams values
			
 
				-        """
			
 
				-        self.config_preprocessor = config_preprocessor or (lambda v: v)
			
 
				-        self.nested_params_separtor = nested_params_separtor
			
 
				-
			
 
				-    def convert_to_ggml(
			
 
				-        self,
			
 
				-        model_name_or_card: Union[str, AssetCard],
			
 
				-        output_file: Path
			
 
				-    ) -> None:
			
 
				-        """Load model from card, convert to ggml format and save result.
			
 
				-
			
 
				-        :param model_name_or_card:
			
 
				-            The name or asset card of the model to load.
			
 
				-        :param output_file:
			
 
				-            File path to store binary output.
			
 
				-        """
			
 
				-        hparams = self._load_config(model_name_or_card)
			
 
				-        state_dict = self._load_state_dict(model_name_or_card)
			
 
				-
			
 
				-        buffer = output_file.open("wb")
			
 
				-
			
 
				-        ggml_writer = BufferedGGMLWriter(buffer)
			
 
				-
			
 
				-        ggml_writer.write_magic_hex()
			
 
				-        ggml_writer.write_hparams(hparams)
			
 
				-        ggml_writer.write_state_dict(state_dict)
			
 
				-
			
 
				-        buffer.close()
			
 
				-
			
 
				-    def generate_hparams_struct(
			
 
				-        self,
			
 
				-        model_name_or_card: Union[str, AssetCard],
			
 
				-        struct_name: str,
			
 
				-    ) -> str:
			
 
				-        """Transform config to c++ struct
			
 
				-
			
 
				-        :param model_name_or_card:
			
 
				-            The name or asset card of the model to load.
			
 
				-        :param output_file:
			
 
				-            File path to store binary output.
			
 
				-        """
			
 
				-        hparams = self._load_config(model_name_or_card)
			
 
				-        result = f"struct {struct_name} {{\n"
			
 
				-        for key, value in hparams.items():
			
 
				-            result = f"{result}\t{get_cpp_type(value)} {key};\n"
			
 
				-
			
 
				-        result = f"{result}}};"
			
 
				-
			
 
				-        return result
			
 
				-
			
 
				-    def _load_config(
			
 
				-        self,
			
 
				-        model_name_or_card: Union[str, AssetCard]
			
 
				-    ) -> dict:
			
 
				-        """Load model config and transform it to flattened dict.
			
 
				-
			
 
				-        :param model_name_or_card:
			
 
				-            The name or asset card of the model to load.
			
 
				-
			
 
				-        :returns:
			
 
				-            Flat dictionnary containing all hyper parameters.
			
 
				-        """
			
 
				-        model_config = load_unity_config(model_name_or_card)
			
 
				-        model_config_dict = dataclasses.asdict(model_config)
			
 
				-        flattened = self.__flatten(model_config_dict)
			
 
				-
			
 
				-        return flattened
			
 
				-
			
 
				-    def _load_state_dict(
			
 
				-        self,
			
 
				-        model_name_or_card: Union[str, AssetCard]
			
 
				-    ) -> dict:
			
 
				-        """Load model and return state dict.
			
 
				-
			
 
				-        :param model_name_or_card:
			
 
				-            The name or asset card of the model to load.
			
 
				-
			
 
				-        :returns:
			
 
				-            State dict returned by pytorch model.
			
 
				-        """
			
 
				-        model = load_unity_model(model_name_or_card)
			
 
				-
			
 
				-        return model.state_dict()
			
 
				-
			
 
				-    def __flatten(
			
 
				-        self,
			
 
				-        config: dict
			
 
				-    ) -> dict:
			
 
				-        """Flatten nested dictionnary
			
 
				-
			
 
				-        :param config:
			
 
				-            nested dictionnary containing model config.
			
 
				-
			
 
				-        :returns:
			
 
				-            flat dictionnary
			
 
				-        """
			
 
				-        return self.__flatten_recursive(config, '')
			
 
				-
			
 
				-    def __flatten_recursive(
			
 
				-        self,
			
 
				-        config: dict,
			
 
				-        prefix: str
			
 
				-    ) -> dict:
			
 
				-        """Recursive method used to flatten nested dictionnary"""
			
 
				-        result = {}
			
 
				-        for key in config:
			
 
				-            new_key = f"{prefix}{key}"
			
 
				-            if isinstance(config[key], dict):
			
 
				-                nested_result = self.__flatten_recursive(
			
 
				-                    config[key],
			
 
				-                    f"{new_key}{self.nested_params_separtor}"
			
 
				-                )
			
 
				-                result.update(nested_result)
			
 
				-            else:
			
 
				-                new_config = self.config_preprocessor(config[key])
			
 
				-                if new_config is not None:
			
 
				-                    result[new_key] = config[key]
			
 
				-
			
 
				-        return result
			
--- a/ggml/examples/unity/ggml_convert.py
+++ b/ggml/examples/unity/ggml_convert.py
@@ -0,0 +1,223 @@
 
				+# Copyright (c) Meta Platforms, Inc. and affiliates.
			
 
				+# All rights reserved.
			
 
				+
			
 
				+# This source code is licensed under the license found in the
			
 
				+# LICENSE file in the root directory of this source tree.
			
 
				+
			
 
				+import dataclasses
			
 
				+import logging
			
 
				+import struct
			
 
				+from enum import Enum
			
 
				+from io import BufferedWriter
			
 
				+from pathlib import Path
			
 
				+from typing import Any, Callable, Dict, Optional, Tuple, Union
			
 
				+
			
 
				+import torch
			
 
				+from fairseq2.assets import AssetCard
			
 
				+from seamless_communication.models.unity import load_unity_config, load_unity_model
			
 
				+
			
 
				+Preprocessor = Callable[[Any], Any]
			
 
				+
			
 
				+
			
 
				+def to_ctype(value: Any) -> Tuple[str, Any]:
			
 
				+    """Transform python type to ctype.
			
 
				+
			
 
				+    :params value:
			
 
				+        value to cast into ctype
			
 
				+
			
 
				+    :returns:
			
 
				+        A tuple of ctype and cvalue.
			
 
				+    """
			
 
				+    if isinstance(value, int):
			
 
				+        return ("i", value)
			
 
				+    if isinstance(value, float):
			
 
				+        return ("f", value)
			
 
				+    if isinstance(value, bool):
			
 
				+        return ("?", value)
			
 
				+    if isinstance(value, Enum):
			
 
				+        return ("i", value.value)
			
 
				+
			
 
				+    raise ValueError(f"Unsupported type {type(value)}")
			
 
				+
			
 
				+
			
 
				+def get_cpp_type(value: Any) -> str:
			
 
				+    """Return equivalent cpp type in string format
			
 
				+
			
 
				+    :params value:
			
 
				+        value to cast into ctype
			
 
				+
			
 
				+    :returns:
			
 
				+        str containing cpp type
			
 
				+    """
			
 
				+    # used to have compatibility between types
			
 
				+    try:
			
 
				+        ctype, _ = to_ctype(value)
			
 
				+    except ValueError as e:
			
 
				+        return f"// Error: {e}"
			
 
				+
			
 
				+    if ctype == "i":
			
 
				+        return "std::int32_t"
			
 
				+    if ctype == "f":
			
 
				+        return "std::float32"
			
 
				+    if ctype == "?":
			
 
				+        return "bool"
			
 
				+
			
 
				+    raise RuntimeError(
			
 
				+        f"Should not have reached this part." f"Missing cpp translation for {ctype}"
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def write_ggml_header(out: BufferedWriter) -> None:
			
 
				+    """Write GGML header"""
			
 
				+    out.write(b"ggml")
			
 
				+
			
 
				+
			
 
				+def write_hparams(out: BufferedWriter, hparams: Dict[str, Any]) -> None:
			
 
				+    """Write hyper parameters.
			
 
				+
			
 
				+    :params hparams:
			
 
				+        flattened dict containing model's hyper parameters.
			
 
				+
			
 
				+    """
			
 
				+    for key, value in hparams.items():
			
 
				+        try:
			
 
				+            # TODO: this is not cross platform, what's the standard way of writing hparams in GGML ?
			
 
				+            ctype, cvalue = to_ctype(value)
			
 
				+            out.write(struct.pack(ctype, cvalue))
			
 
				+        except ValueError as e:
			
 
				+            logging.warning(f"[Warning] {e}. Skipping config for key {key}")
			
 
				+            continue
			
 
				+
			
 
				+
			
 
				+def write_state_dict(out: BufferedWriter, state_dict: Dict[str, torch.Tensor]) -> None:
			
 
				+    """Write pytorch state dict.
			
 
				+
			
 
				+    :paras state_dict:
			
 
				+        state dict returned by pytorch model
			
 
				+    """
			
 
				+    for key, value in state_dict.items():
			
 
				+        write_string(out, key)
			
 
				+        write_tensor(out, value)
			
 
				+
			
 
				+
			
 
				+def write_string(out: BufferedWriter, value: str) -> None:
			
 
				+    """Write string in utf-8 format.
			
 
				+
			
 
				+    :params value:
			
 
				+        string value to dump.
			
 
				+    """
			
 
				+    str_ = value.encode("utf-8")
			
 
				+    out.write(struct.pack("i", len(str_)))
			
 
				+    out.write(str_)
			
 
				+
			
 
				+
			
 
				+def write_tensor(out: BufferedWriter, value: torch.Tensor) -> None:
			
 
				+    """Write torch tensor in ggml format.
			
 
				+
			
 
				+    First we save the number of dimensions and the dtype.
			
 
				+    Then we save the data as numpy array.
			
 
				+
			
 
				+    :params value:
			
 
				+        Tensor to dump.
			
 
				+    """
			
 
				+    data = value.squeeze().numpy()
			
 
				+    n_dims = len(data.shape)
			
 
				+
			
 
				+    # TODO: Convert to fp16 when necessary!
			
 
				+    ftype = 0
			
 
				+
			
 
				+    out.write(struct.pack("ii", n_dims, ftype))
			
 
				+    for i in range(n_dims):
			
 
				+        out.write(struct.pack("i", data.shape[n_dims - 1 - i]))
			
 
				+
			
 
				+    data.tofile(out)
			
 
				+
			
 
				+
			
 
				+def write_ggml_file(
			
 
				+    out: BufferedWriter, hparams: Dict[str, Any], state_dict: Dict[str, torch.Tensor]
			
 
				+) -> None:
			
 
				+    write_ggml_header(out)
			
 
				+    write_hparams(out, hparams)
			
 
				+    write_state_dict(out, state_dict)
			
 
				+
			
 
				+
			
 
				+def flatten_config(
			
 
				+    config: Dict[str, Any],
			
 
				+    separator: str,
			
 
				+    config_preprocessor: Optional[Preprocessor] = None,
			
 
				+) -> Dict[str, Any]:
			
 
				+    """Flatten nested dictionnary
			
 
				+
			
 
				+    :param config:
			
 
				+        nested dictionnary containing model config.
			
 
				+    :param separator:
			
 
				+            string separator used when flattening nested hparams
			
 
				+    :param config_preprocessor:
			
 
				+        Preprocessor used for config/hparams values
			
 
				+
			
 
				+    :returns:
			
 
				+        flat dictionnary
			
 
				+    """
			
 
				+
			
 
				+    if config_preprocessor is None:
			
 
				+        config_preprocessor = lambda x: x
			
 
				+
			
 
				+    def __flatten(config: Dict[str, Any], prefix: str = "") -> Dict[str, Any]:
			
 
				+        result = {}
			
 
				+        for key in config:
			
 
				+            new_key = f"{prefix}{key}"
			
 
				+            if isinstance(config[key], dict):
			
 
				+                nested_result = __flatten(config[key], f"{new_key}{separator}")
			
 
				+                result.update(nested_result)
			
 
				+            else:
			
 
				+                new_config = config_preprocessor(config[key])
			
 
				+                if new_config is not None:
			
 
				+                    result[new_key] = config[key]
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    return __flatten(config)
			
 
				+
			
 
				+
			
 
				+def generate_hparams_struct(
			
 
				+    hparams: Dict[str, Any],
			
 
				+    struct_name: str,
			
 
				+) -> str:
			
 
				+    """Generate a c++ struct to hold the model hyper-parameters.
			
 
				+
			
 
				+    :param hparams:
			
 
				+        Flattened config of the model.
			
 
				+    :param struct_name:
			
 
				+        Name of the generated struct.
			
 
				+    """
			
 
				+    struct = f"struct {struct_name} {{\n"
			
 
				+    fields = "\n".join(
			
 
				+        [f"    {get_cpp_type(value)} {key};" for key, value in hparams.items()]
			
 
				+    )
			
 
				+
			
 
				+    return struct + fields + "\n};\n"
			
 
				+
			
 
				+
			
 
				+def main(model_name: str, out: Optional[Path] = None) -> None:
			
 
				+    if out is None:
			
 
				+        out = Path(model_name).with_suffix(".ggml")
			
 
				+
			
 
				+    # The type of model depends on the name
			
 
				+    if "unity" in model_name or "seamlessM4T" in model_name:
			
 
				+        model_config = load_unity_config(model_name)
			
 
				+        hparams = flatten_config(dataclasses.asdict(model_config), separator="__")
			
 
				+        model = load_unity_model(model_name)
			
 
				+    else:
			
 
				+        raise ValueError(f"Unsupported model type: {model_name}")
			
 
				+
			
 
				+    with out.open("wb") as o:
			
 
				+        write_ggml_file(o, hparams, model.state_dict())
			
 
				+
			
 
				+    with out.with_suffix(".hparams.h").open("w") as h:
			
 
				+        h.write(generate_hparams_struct(hparams, model_name + "_hparams"))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import func_argparse
			
 
				+
			
 
				+    func_argparse.single_main(main)
			
--- a/ggml/examples/unity/type_utils.py
+++ b/ggml/examples/unity/type_utils.py
@@ -1,58 +0,0 @@
 
				-# Copyright (c) Meta Platforms, Inc. and affiliates.
			
 
				-# All rights reserved.
			
 
				-
			
 
				-# This source code is licensed under the license found in the
			
 
				-# LICENSE file in the root directory of this source tree.
			
 
				-
			
 
				-
			
 
				-from enum import Enum
			
 
				-from typing import Any, Tuple
			
 
				-
			
 
				-
			
 
				-def to_ctype(value: Any) -> Tuple[str, Any]:
			
 
				-    """Transform python type to ctype.
			
 
				-
			
 
				-    :params value:
			
 
				-        value to cast into ctype
			
 
				-
			
 
				-    :returns:
			
 
				-        A tuple of ctype and cvalue.
			
 
				-    """
			
 
				-    if isinstance(value, int):
			
 
				-        return ("i", value)
			
 
				-    if isinstance(value, float):
			
 
				-        return ("f", value)
			
 
				-    if isinstance(value, bool):
			
 
				-        return ('?', value)
			
 
				-    if isinstance(value, Enum):
			
 
				-        return ('i', value.value)
			
 
				-
			
 
				-    raise ValueError(f"Unsupported type {type(value)}")
			
 
				-
			
 
				-
			
 
				-def get_cpp_type(value) -> str:
			
 
				-    """Return equivalent cpp type in string format
			
 
				-
			
 
				-    :params value:
			
 
				-        value to cast into ctype
			
 
				-
			
 
				-    :returns:
			
 
				-        str containing cpp type
			
 
				-    """
			
 
				-    # used to have compatibility between types
			
 
				-    try:
			
 
				-        ctype, _ = to_ctype(value)
			
 
				-    except ValueError as e:
			
 
				-        return f"Error[{e}]"
			
 
				-
			
 
				-    if ctype == "i":
			
 
				-        return "int32_t"
			
 
				-    if ctype == "f":
			
 
				-        return "float"
			
 
				-    if ctype == "?":
			
 
				-        return "bool"
			
 
				-
			
 
				-    raise RuntimeError(
			
 
				-        f"Should not have reached this part."
			
 
				-        f"Missing cpp translation for {ctype}"
			
 
				-    )