|
@@ -61,10 +61,14 @@ def build_data_pipeline(
|
|
) -> DataPipeline:
|
|
) -> DataPipeline:
|
|
with open(args.data_file, "r") as f:
|
|
with open(args.data_file, "r") as f:
|
|
header = f.readline().strip("\n").split("\t")
|
|
header = f.readline().strip("\n").split("\t")
|
|
|
|
+ assert args.audio_field in header, f"Input file does not contain {args.audio_field} field"
|
|
|
|
|
|
n_parallel = 4
|
|
n_parallel = 4
|
|
|
|
|
|
- split_tsv = StrSplitter(names=header)
|
|
|
|
|
|
+ split_tsv = StrSplitter(
|
|
|
|
+ names=["id", "audio"],
|
|
|
|
+ indices=[header.index("id"), header.index(args.audio_field)],
|
|
|
|
+ )
|
|
|
|
|
|
pipeline_builder = read_text(args.data_file, rtrim=True).skip(1).map(split_tsv)
|
|
pipeline_builder = read_text(args.data_file, rtrim=True).skip(1).map(split_tsv)
|
|
|
|
|
|
@@ -134,6 +138,12 @@ def main() -> None:
|
|
help="Root directory for the audio filenames in the data file.",
|
|
help="Root directory for the audio filenames in the data file.",
|
|
default="",
|
|
default="",
|
|
)
|
|
)
|
|
|
|
+ parser.add_argument(
|
|
|
|
+ "--audio_field",
|
|
|
|
+ type=str,
|
|
|
|
+ help="Field that includes the input audio file paths.",
|
|
|
|
+ default="src_audio",
|
|
|
|
+ )
|
|
parser.add_argument(
|
|
parser.add_argument(
|
|
"--ref_field",
|
|
"--ref_field",
|
|
type=str,
|
|
type=str,
|