Selaa lähdekoodia

Always output processed audio path in absolute path (#214)

Co-authored-by: Peng-Jen Chen <pipibjc@devfair0728.h2.fair>
pipibjc 1 vuosi sitten
vanhempi
commit
c352dc8b9c

+ 7 - 7
src/seamless_communication/cli/expressivity/data/prepare_mexpresso.py

@@ -13,6 +13,7 @@ import logging
 import multiprocessing as mp
 import os
 import pandas as pd
+import pathlib
 import re
 import seamless_communication  # need this to load dataset cards
 import torchaudio
@@ -148,8 +149,9 @@ def main() -> None:
     )
     parser.add_argument(
         "output_folder",
-        type=str,
-        help="Output folder for the downsampled Expresso En audios and combined manifest.",
+        type=lambda p: pathlib.Path(p).resolve(),  # always convert to absolute path
+        help="Output folder for the downsampled Expresso En audios and combined manifest. "
+        "The output folder path will be expanded to absolute path.",
     )
     parser.add_argument(
         "--existing-expresso-root",
@@ -183,9 +185,9 @@ def main() -> None:
             f"The English Expresso dataset is downloaded to {en_expresso_root_path}"
         )
         en_expresso_path = en_expresso_root_path / "expresso"
-    en_expresso_folder = f"{args.output_folder}/En_Expresso"
+    en_expresso_folder = args.output_folder / "En_Expresso"
     en_expresso_df = build_en_manifest_from_oss(
-        Path(en_expresso_path), Path(en_expresso_folder)
+        Path(en_expresso_path), en_expresso_folder
     )
 
     for subset in ["dev", "test"]:
@@ -212,9 +214,7 @@ def main() -> None:
             df["tgt_lang"] = lang
             # Check all the audio files exist
             assert all(os.path.isfile(audio) for audio in df["src_audio"].tolist())
-            output_manifest_path = (
-                f"{args.output_folder}/{subset}_mexpresso_eng_{lang}.tsv"
-            )
+            output_manifest_path = args.output_folder / f"{subset}_mexpresso_eng_{lang}.tsv"
             df[
                 [
                     "id",