2 years ago · cf9b36f68b
--- a/scripts/m4t/predict/README.md
+++ b/scripts/m4t/predict/README.md
@@ -12,11 +12,11 @@ SeamlessM4T models currently support five tasks:
 
															 ## Quick start:
														
 
															 Inference is run with the CLI, from the root directory of the repository.
														
 
															-The model can be specified with `--model_name` `multitask_unity_large` or `multitask_unity_medium`:
														
 
															+The model can be specified with `--model_name` `seamlessM4T_large` or `seamlessM4T_medium`:
														
 
															 **S2ST**:
														
 
															 ```bash
														
 
															-python scripts/m4t/predict/predict.py <path_to_input_audio> s2st <tgt_lang> --output_path <path_to_save_audio> --model_name multitask_unity_large
														
 
															+python scripts/m4t/predict/predict.py <path_to_input_audio> s2st <tgt_lang> --output_path <path_to_save_audio> --model_name seamlessM4T_large
														
 
															 ```
														
 
															 **S2TT**:
														
@@ -42,8 +42,8 @@ python scripts/m4t/predict/predict.py <path_to_input_audio> asr <tgt_lang>
 
															 ## Inference breakdown
														
 
															 Inference calls for the `Translator` object instantiated with a Multitasking UnitY model with the options:
														
 
															-- `multitask_unity_large`
														
 
															-- `multitask_unity_medium`
														
 
															+- `seamlessM4T_large`
														
 
															+- `seamlessM4T_medium`
														
 
															 and a vocoder `vocoder_36langs`
														
@@ -54,7 +54,7 @@ from seamless_communication.models.inference import Translator
 
															 # Initialize a Translator object with a multitask model, vocoder on the GPU.
														
 
															-translator = Translator("multitask_unity_large", "vocoder_36langs", torch.device("cuda:0"))
														
 
															+translator = Translator("seamlessM4T_large", "vocoder_36langs", torch.device("cuda:0"))
														
 
															 ```
														
 
															 Now `predict()` can be used to run inference as many times on any of the supported tasks.
														
--- a/scripts/m4t/predict/predict.py
+++ b/scripts/m4t/predict/predict.py
@@ -36,7 +36,7 @@ def main():
 
															         default=None,
														
 
															     )
														
 
															     parser.add_argument(
														
 
															-        "--model_name", type=str, help="Model name", default="multitask_unity_large"
														
 
															+        "--model_name", type=str, help="Model name", default="seamlessM4T_large"
														
 
															     )
														
 
															     parser.add_argument(
														
 
															         "--vocoder_name", type=str, help="Vocoder name", default="vocoder_36langs"
														
--- a/src/seamless_communication/assets/cards/multitask_unity_large.yaml
+++ b/src/seamless_communication/assets/cards/multitask_unity_large.yaml
@@ -1,50 +0,0 @@
 
															-# Copyright (c) Meta Platforms, Inc. and affiliates.
														
 
															-# All rights reserved.
														
 
															-#
														
 
															-# This source code is licensed under the BSD-style license found in the
														
 
															-# LICENSE file in the root directory of this source tree.
														
 
															-
														
 
															-name: multitask_unity_large
														
 
															-base: unity_nllb-100
														
 
															-model_arch: base
														
 
															-checkpoint: "https://dl.fbaipublicfiles.com/seamless_aug/models/large_unity/multitask_unity_large.pt"
														
 
															-num_units: 10000
														
 
															-unit_langs:
														
 
															-  - arb
														
 
															-  - ben
														
 
															-  - cat
														
 
															-  - ces
														
 
															-  - cmn
														
 
															-  - cym
														
 
															-  - dan
														
 
															-  - deu
														
 
															-  - eng
														
 
															-  - est
														
 
															-  - fin
														
 
															-  - fra
														
 
															-  - hin
														
 
															-  - ind
														
 
															-  - ita
														
 
															-  - jpn
														
 
															-  - kan
														
 
															-  - kor
														
 
															-  - mlt
														
 
															-  - nld
														
 
															-  - pes
														
 
															-  - pol
														
 
															-  - por
														
 
															-  - ron
														
 
															-  - rus
														
 
															-  - slk
														
 
															-  - spa
														
 
															-  - swe
														
 
															-  - swh
														
 
															-  - tam
														
 
															-  - tel
														
 
															-  - tgl
														
 
															-  - tha
														
 
															-  - tur
														
 
															-  - ukr
														
 
															-  - urd
														
 
															-  - uzn
														
 
															-  - vie
														
--- a/src/seamless_communication/assets/cards/multitask_unity_medium.yaml
+++ b/src/seamless_communication/assets/cards/multitask_unity_medium.yaml
@@ -1,50 +0,0 @@
 
															-# Copyright (c) Meta Platforms, Inc. and affiliates.
														
 
															-# All rights reserved.
														
 
															-#
														
 
															-# This source code is licensed under the BSD-style license found in the
														
 
															-# LICENSE file in the root directory of this source tree.
														
 
															-
														
 
															-name: multitask_unity_medium
														
 
															-base: unity_nllb-200
														
 
															-model_arch: medium
														
 
															-checkpoint: "https://dl.fbaipublicfiles.com/seamless_aug/models/medium_unity/multitask_unity_medium.pt"
														
 
															-num_units: 10000
														
 
															-unit_langs:
														
 
															-  - arb
														
 
															-  - ben
														
 
															-  - cat
														
 
															-  - ces
														
 
															-  - cmn
														
 
															-  - cym
														
 
															-  - dan
														
 
															-  - deu
														
 
															-  - eng
														
 
															-  - est
														
 
															-  - fin
														
 
															-  - fra
														
 
															-  - hin
														
 
															-  - ind
														
 
															-  - ita
														
 
															-  - jpn
														
 
															-  - kan
														
 
															-  - kor
														
 
															-  - mlt
														
 
															-  - nld
														
 
															-  - pes
														
 
															-  - pol
														
 
															-  - por
														
 
															-  - ron
														
 
															-  - rus
														
 
															-  - slk
														
 
															-  - spa
														
 
															-  - swe
														
 
															-  - swh
														
 
															-  - tam
														
 
															-  - tel
														
 
															-  - tgl
														
 
															-  - tha
														
 
															-  - tur
														
 
															-  - ukr
														
 
															-  - urd
														
 
															-  - uzn
														
 
															-  - vie