Kaynağa Gözat

Merge pull request #51 from fairinternal/commercial_vocoder_asset

Add new asset for the vocoder for commercial license.
Pierre Andrews 1 yıl önce
ebeveyn
işleme
8a1808944c

+ 201 - 0
src/seamless_communication/assets/cards/vocoder_commercial.yaml

@@ -0,0 +1,201 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+name: vocoder_commercial
+model_type: vocoder_code_hifigan
+model_arch: base
+checkpoint: "file://large_experiments/seamless/ust/krs/M4T_Vocoder/lang_36_commercial/km_10000/seed_1/g_00600000"
+model_config: {
+  "lang_spkr_idx_map": {
+      "multilingual": {
+        "arb": 0,
+        "ben": 1,
+        "cat": 2,
+        "ces": 3,
+        "cmn": 4,
+        "cym": 5,
+        "dan": 6,
+        "deu": 7,
+        "eng": 8,
+        "est": 9,
+        "fin": 10,
+        "fra": 11,
+        "hin": 12,
+        "ind": 13,
+        "ita": 14,
+        "jpn": 15,
+        "kor": 16,
+        "mlt": 17,
+        "nld": 18,
+        "pes": 19,
+        "pol": 20,
+        "por": 21,
+        "ron": 22,
+        "rus": 23,
+        "slk": 24,
+        "spa": 25,
+        "swe": 26,
+        "swh": 27,
+        "tel": 28,
+        "tgl": 29,
+        "tha": 30,
+        "tur": 31,
+        "ukr": 32,
+        "urd": 33,
+        "uzn": 34,
+        "vie": 35
+      },
+      "multispkr": {
+        "arb": [
+            0
+        ],
+        "ben": [
+            1
+        ],
+        "cat": [
+            2
+        ],
+        "ces": [
+            3
+        ],
+        "cmn": [
+            4,
+            5
+        ],
+        "cym": [
+            6
+        ],
+        "dan": [
+            7,
+            8
+        ],
+        "deu": [
+            9
+        ],
+        "eng": [
+            10
+        ],
+        "est": [
+            11,
+            12,
+            13
+        ],
+        "fin": [
+            14
+        ],
+        "fra": [
+            15
+        ],
+        "hin": [
+            16
+        ],
+        "ind": [
+            17,
+            24,
+            18,
+            20,
+            19,
+            21,
+            23,
+            27,
+            26,
+            22,
+            25
+        ],
+        "ita": [
+            29,
+            28
+        ],
+        "jpn": [
+            30
+        ],
+        "kor": [
+            31
+        ],
+        "mlt": [
+            32,
+            33,
+            34
+        ],
+        "nld": [
+            35,
+            37,
+            36
+        ],
+        "pes": [
+            38
+        ],
+        "pol": [
+            39
+        ],
+        "por": [
+            40
+        ],
+        "ron": [
+            41
+        ],
+        "rus": [
+            43,
+            42
+        ],
+        "slk": [
+            44
+        ],
+        "spa": [
+            45
+        ],
+        "swe": [
+            46,
+            48,
+            47
+        ],
+        "swh": [
+            49,
+            51,
+            50
+        ],
+        "tel": [
+            52
+        ],
+        "tgl": [
+            53
+        ],
+        "tha": [
+            54,
+            57,
+            58,
+            55,
+            56
+        ],
+        "tur": [
+            61,
+            60,
+            59
+        ],
+        "ukr": [
+            62
+        ],
+        "urd": [
+            63,
+            64,
+            65
+        ],
+        "uzn": [
+            66,
+            67,
+            68
+        ],
+        "vie": [
+            69,
+            70,
+            73,
+            74,
+            71,
+            72
+        ]
+    }
+  }
+}

+ 1 - 1
src/seamless_communication/models/unity/t2u_builder.py

@@ -190,7 +190,7 @@ def _base_nar() -> UnitYT2UConfig:
 
     return UnitYT2UConfig(
         model_dim=1024,
-        unit_max_seq_len=2048,
+        unit_max_seq_len=4096,
         unit_vocabulary_size=10082,
         unit_pad_idx=1,
         num_encoder_layers=6,