2 years ago · 88121acf32
--- a/README.md
+++ b/README.md
@@ -22,22 +22,27 @@ Links:
 
				 
			
 
				 # Quick Start
			
 
				 ## Installation
			
 
				+
			
 
				 ```
			
 
				-pip install fairseq2==0.1
			
 
				 pip install .
			
 
				 ```
			
 
				 
			
 
				+A temporary extra requirement for fairseq2 is [libsndfile](https://github.com/libsndfile/libsndfile). From [Conda](https://docs.conda.io/en/latest/) environment it can be installed via:
			
 
				+```
			
 
				+conda install -y -c conda-forge libsndfile
			
 
				+```
			
 
				+
			
 
				 ## Running inference
			
 
				 
			
 
				 Here’s an example of using the CLI from the root directory to run inference.
			
 
				 
			
 
				 S2ST task:
			
 
				 ```bash
			
 
				-python scripts/m4t/predict/predict.py <path_to_input_audio> s2st <tgt_lang> --output_path <path_to_save_audio>
			
 
				+m4t_predict <path_to_input_audio> s2st <tgt_lang> --output_path <path_to_save_audio>
			
 
				 ```
			
 
				 T2TT task:
			
 
				 ```bash
			
 
				-python scripts/m4t/predict/predict.py <input_text> t2tt <tgt_lang> --src_lang <src_lang>
			
 
				+m4t_predict <input_text> t2tt <tgt_lang> --src_lang <src_lang>
			
 
				 ```
			
 
				 
			
 
				 Please refer to the [evaluation README](scripts/m4t/predict) for detailed instruction on how to run inference.
			
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -0,0 +1,4 @@
 
				+pytest
			
 
				+black
			
 
				+flake8
			
 
				+isort
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ datasets
 
				 torchaudio
			
 
				 soundfile
			
 
				 librosa
			
 
				+fairseq2
			
--- a/scripts/m4t/finetune/README.md
+++ b/scripts/m4t/finetune/README.md
@@ -29,12 +29,12 @@ Below is an example bash script that prepares a training and evaluation dataset
 
				 export DATASET_DIR=~/m4t_dataset
			
 
				 mkdir -p $DATASET_DIR
			
 
				 
			
 
				-python scripts/m4t/finetune/dataset.py \
			
 
				+m4t_prepare_dataset \
			
 
				   --source_lang eng \
			
 
				   --target_lang kor \
			
 
				   --split train \
			
 
				   --save_dir $DATASET_DIR
			
 
				- python scripts/m4t/finetune/dataset.py \
			
 
				+m4t_prepare_dataset \
			
 
				   --source_lang eng \
			
 
				   --target_lang kor \
			
 
				   --split validation \
			
@@ -97,7 +97,8 @@ torchrun \
 
				    --rdzv-endpoint=localhost:0 \
			
 
				    --nnodes=1 \
			
 
				    --nproc-per-node=8  \
			
 
				-  scripts/m4t/finetune/finetune.py \
			
 
				+   --no-python \
			
 
				+  m4t_finetune \
			
 
				    --mode SPEECH_TO_TEXT \
			
 
				    --train_dataset $DATASET_DIR/train_manifest.json  \
			
 
				    --eval_dataset $DATASET_DIR/validation_manifest.json \
			
--- a/scripts/m4t/finetune/dataset.py
+++ b/scripts/m4t/finetune/dataset.py
@@ -10,7 +10,6 @@ import dataclasses
 
				 import json
			
 
				 import logging
			
 
				 import os
			
 
				-from argparse import Namespace
			
 
				 from pathlib import Path
			
 
				 
			
 
				 from seamless_communication.datasets.huggingface import (
			
@@ -157,7 +156,8 @@ def init_parser() -> argparse.ArgumentParser:
 
				     return parser
			
 
				 
			
 
				 
			
 
				-def main(args: Namespace) -> None:
			
 
				+def main() -> None:
			
 
				+    args = init_parser().parse_args()
			
 
				     manifest_path = download_fleurs_dataset(
			
 
				         source_lang=args.source_lang,
			
 
				         target_lang=args.target_lang,
			
@@ -168,5 +168,4 @@ def main(args: Namespace) -> None:
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    args = init_parser().parse_args()
			
 
				-    main(args)
			
 
				+    main()
			
--- a/scripts/m4t/finetune/finetune.py
+++ b/scripts/m4t/finetune/finetune.py
@@ -7,7 +7,6 @@
 
				 import argparse
			
 
				 import logging
			
 
				 import os
			
 
				-from argparse import Namespace
			
 
				 from pathlib import Path
			
 
				 
			
 
				 import dataloader
			
@@ -125,7 +124,8 @@ def init_parser() -> argparse.ArgumentParser:
 
				     return parser
			
 
				 
			
 
				 
			
 
				-def run_finetune(args: Namespace) -> None:
			
 
				+def main() -> None:
			
 
				+    args = init_parser().parse_args()
			
 
				     dist_utils.init_distributed([logger, trainer.logger])
			
 
				     device = torch.device("cuda")
			
 
				     text_tokenizer: NllbTokenizer = load_unity_text_tokenizer(args.model_name)
			
@@ -182,5 +182,4 @@ def run_finetune(args: Namespace) -> None:
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    parser = init_parser()
			
 
				-    run_finetune(parser.parse_args())
			
 
				+    main()
			
--- a/scripts/m4t/predict/predict.py
+++ b/scripts/m4t/predict/predict.py
@@ -9,8 +9,11 @@ import torch
 
				 import torchaudio
			
 
				 from seamless_communication.models.inference import Translator
			
 
				 
			
 
				+logging.basicConfig(
			
 
				+    level=logging.INFO,
			
 
				+    format="%(asctime)s %(levelname)s -- %(name)s: %(message)s",
			
 
				+)
			
 
				 
			
 
				-logging.basicConfig(level=logging.INFO)
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 
			
--- a/setup.py
+++ b/setup.py
@@ -4,12 +4,44 @@
 
				 # This source code is licensed under the license found in the
			
 
				 # LICENSE file in the root directory of this source tree.
			
 
				 
			
 
				+from pathlib import Path
			
 
				+from typing import Iterable
			
 
				+
			
 
				+import pkg_resources
			
 
				 from setuptools import find_packages, setup
			
 
				 
			
 
				+
			
 
				+def _load_requirements(fname: str) -> Iterable[str]:
			
 
				+    with open(Path(__file__).parent / fname) as fp_in:
			
 
				+        for req in pkg_resources.parse_requirements(fp_in):
			
 
				+            yield str(req)
			
 
				+
			
 
				+
			
 
				+default_requirements = list(_load_requirements("requirements.txt"))
			
 
				+dev_requirements = list(_load_requirements("dev_requirements.txt"))
			
 
				+
			
 
				 setup(
			
 
				     name="seamless_communication",
			
 
				-    version="0.1",
			
 
				-    packages=find_packages(where="src"),
			
 
				-    package_dir={"": "src"},
			
 
				-    package_data={"": ["assets/cards/*.yaml"]},
			
 
				+    version="1.0.0",
			
 
				+    packages=find_packages(where="src") + ['m4t_scripts.finetune', 'm4t_scripts.predict'],
			
 
				+    package_dir={"m4t_scripts": "scripts/m4t", "seamless_communication": "src/seamless_communication"},
			
 
				+    package_data={"": ["seamless_communication/assets/cards/*.yaml"]},
			
 
				+    description="SeamlessM4T -- Massively Multilingual & Multimodal Machine Translation Model",
			
 
				+    long_description=open("README.md", encoding="utf-8").read(),
			
 
				+    long_description_content_type="text/markdown",
			
 
				+    readme="README.md",
			
 
				+    python_requires=">=3.8",
			
 
				+    author="Meta Platforms",
			
 
				+    url="https://ai.meta.com/",
			
 
				+    license="Creative Commons",
			
 
				+    install_requires=default_requirements,
			
 
				+    extras_require={"dev": default_requirements + dev_requirements},
			
 
				+    entry_points={
			
 
				+        "console_scripts": [
			
 
				+           "m4t_predict=m4t_scripts.predict.predict:main",
			
 
				+           "m4t_finetune=m4t_scripts.finetune.finetune:main",
			
 
				+           "m4t_prepare_dataset=m4t_scripts.finetune.dataset:main",
			
 
				+        ],
			
 
				+    },
			
 
				+    include_package_data=True,
			
 
				 )