2 年前 · 8687b689a5
--- a/README.md
+++ b/README.md
@@ -4,8 +4,8 @@ SeamlessM4T is designed to provide high quality translation, allowing people fro
 
				 
			
 
				 SeamlessM4T covers:
			
 
				 - 📥 101 languages for speech input
			
 
				-- ⌨️   96 Languages for text input/output
			
 
				-- 🗣️  35 languages for speech output.
			
 
				+- ⌨️ 96 Languages for text input/output
			
 
				+- 🗣️ 35 languages for speech output.
			
 
				 
			
 
				 This unified model enables multiple tasks without relying on multiple separate models:
			
 
				 - Speech-to-speech translation (S2ST)
			
--- a/scripts/m4t/finetune/dataset.py
+++ b/scripts/m4t/finetune/dataset.py
@@ -16,7 +16,9 @@ from pathlib import Path
 
				 from stopes.hub import load_config
			
 
				 from stopes.speech.tokenizers import SpeechTokenizer, SpeechTokenizerConfig
			
 
				 
			
 
				-from seamless_communication.datasets.hugginface import Speech2SpeechFleursDatasetBuilder
			
 
				+from seamless_communication.datasets.huggingface import (
			
 
				+    Speech2SpeechFleursDatasetBuilder,
			
 
				+)
			
 
				 
			
 
				 logging.basicConfig(
			
 
				     level=logging.INFO,
			
--- a/scripts/m4t/finetune/trainer.py
+++ b/scripts/m4t/finetune/trainer.py
@@ -56,7 +56,7 @@ class FinetuneParams:
 
				     """ Get eval loss after each `eval_steps` training steps """
			
 
				 
			
 
				     patience: int = 3
			
 
				-    """ Terminate if eval loss didn not improve
			
 
				+    """ Terminate if eval loss did not improve
			
 
				     over the last `patience * eval_steps` training steps"""
			
 
				 
			
 
				     learning_rate: float = 1e-5
			
--- a/src/seamless_communication/datasets/huggingface.py
+++ b/src/seamless_communication/datasets/huggingface.py