2 жил өмнө · 8fac405bcb
--- a/src/seamless_communication/assets/cards/unity_nar_multilingual.yaml
+++ b/src/seamless_communication/assets/cards/unity_nar_multilingual.yaml
@@ -4,7 +4,7 @@
 
				 # This source code is licensed under the BSD-style license found in the
			
 
				 # LICENSE file in the root directory of this source tree.
			
 
				 
			
 
				-name: nar_multilingual
			
 
				+name: unity_nar_multilingual
			
 
				 base: unity_nllb-100
			
 
				 model_arch: nar_multilingual
			
 
				 char_tokenizer: "file://checkpoint/krs/unity2/spm_char_lang38_tc.model"
			
--- a/src/seamless_communication/models/inference/translator.py
+++ b/src/seamless_communication/models/inference/translator.py
@@ -26,6 +26,7 @@ from seamless_communication.models.unity import (
 
				     UnitTokenizer,
			
 
				     UnitYGenerator,
			
 
				     UnitYModel,
			
 
				+    UnitYT2UModel,
			
 
				     load_unity_model,
			
 
				     load_unity_text_tokenizer,
			
 
				     load_unity_unit_tokenizer,
			
@@ -242,6 +243,13 @@ class Translator(nn.Module):
 
				         if output_modality == Modality.TEXT:
			
 
				             return text_out.sentences[0], None, None
			
 
				         else:
			
 
				-            units = unit_out.units[:, 1:][0].cpu().numpy().tolist()
			
 
				+            if isinstance(self.model.t2u_model, UnitYT2UModel):
			
 
				+                # Remove the lang token for AR UnitY.
			
 
				+                units = unit_out.units[:, 1:]
			
 
				+            else:
			
 
				+                units = unit_out.units
			
 
				+
			
 
				+            # TODO: batch_size set to 1 for now, implement batching.
			
 
				+            units = units[0].cpu().numpy().tolist()
			
 
				             wav_out = self.vocoder(units, tgt_lang, spkr, dur_prediction=True)
			
 
				             return text_out.sentences[0], wav_out, sample_rate