소스 검색

Always output processed audio path in absolute path (#214)

Co-authored-by: Peng-Jen Chen <pipibjc@devfair0728.h2.fair>
pipibjc 1 년 전
부모
커밋
c352dc8b9c
1개의 변경된 파일7개의 추가작업 그리고 7개의 파일을 삭제
  1. 7 7
      src/seamless_communication/cli/expressivity/data/prepare_mexpresso.py

+ 7 - 7
src/seamless_communication/cli/expressivity/data/prepare_mexpresso.py

@@ -13,6 +13,7 @@ import logging
 import multiprocessing as mp
 import multiprocessing as mp
 import os
 import os
 import pandas as pd
 import pandas as pd
+import pathlib
 import re
 import re
 import seamless_communication  # need this to load dataset cards
 import seamless_communication  # need this to load dataset cards
 import torchaudio
 import torchaudio
@@ -148,8 +149,9 @@ def main() -> None:
     )
     )
     parser.add_argument(
     parser.add_argument(
         "output_folder",
         "output_folder",
-        type=str,
-        help="Output folder for the downsampled Expresso En audios and combined manifest.",
+        type=lambda p: pathlib.Path(p).resolve(),  # always convert to absolute path
+        help="Output folder for the downsampled Expresso En audios and combined manifest. "
+        "The output folder path will be expanded to absolute path.",
     )
     )
     parser.add_argument(
     parser.add_argument(
         "--existing-expresso-root",
         "--existing-expresso-root",
@@ -183,9 +185,9 @@ def main() -> None:
             f"The English Expresso dataset is downloaded to {en_expresso_root_path}"
             f"The English Expresso dataset is downloaded to {en_expresso_root_path}"
         )
         )
         en_expresso_path = en_expresso_root_path / "expresso"
         en_expresso_path = en_expresso_root_path / "expresso"
-    en_expresso_folder = f"{args.output_folder}/En_Expresso"
+    en_expresso_folder = args.output_folder / "En_Expresso"
     en_expresso_df = build_en_manifest_from_oss(
     en_expresso_df = build_en_manifest_from_oss(
-        Path(en_expresso_path), Path(en_expresso_folder)
+        Path(en_expresso_path), en_expresso_folder
     )
     )
 
 
     for subset in ["dev", "test"]:
     for subset in ["dev", "test"]:
@@ -212,9 +214,7 @@ def main() -> None:
             df["tgt_lang"] = lang
             df["tgt_lang"] = lang
             # Check all the audio files exist
             # Check all the audio files exist
             assert all(os.path.isfile(audio) for audio in df["src_audio"].tolist())
             assert all(os.path.isfile(audio) for audio in df["src_audio"].tolist())
-            output_manifest_path = (
-                f"{args.output_folder}/{subset}_mexpresso_eng_{lang}.tsv"
-            )
+            output_manifest_path = args.output_folder / f"{subset}_mexpresso_eng_{lang}.tsv"
             df[
             df[
                 [
                 [
                     "id",
                     "id",