2 years ago · 4e7d540cf2
--- a/ggml/examples/unity/fairseq2.cpp
+++ b/ggml/examples/unity/fairseq2.cpp
@@ -1143,7 +1143,8 @@ extern "C" void _bootstrap_seqs_and_scores(
 
															     ggml_tensor* full_seqs,
														
 
															     ggml_tensor* scores,
														
 
															     ggml_tensor* encoder_output,
														
 
															-    ggml_tensor* encoder_padding_mask
														
 
															+    ggml_tensor* encoder_padding_mask,
														
 
															+    int n_threads
														
 
															 ) {
														
 
															     int prefix_seq_len = job.prefix_seq->ne[0];
														
 
															     int max_seq_len = scores->ne[0];
														
@@ -1184,7 +1185,7 @@ extern "C" void _bootstrap_seqs_and_scores(
 
															     ggml_tensor* lprobs = ggml_log_softmax(ctx, ggml_slice(ctx, logits, 1, 0, 1));
														
 
															     ggml_cgraph gf = ggml_build_forward(lprobs);
														
 
															-    ggml_graph_compute_with_ctx(ctx, &gf, 1);
														
 
															+    ggml_graph_compute_with_ctx(ctx, &gf, n_threads);
														
 
															     ggml_free(ctx);
														
 
															     full_seqs->type = GGML_TYPE_I32;
														
 
															     job.prefix_seq->type = GGML_TYPE_I32;
														
@@ -1324,7 +1325,8 @@ extern "C" Hypothesis* generate_sequence(
 
															     const SequenceGeneratorJob& job,
														
 
															     ggml_tensor* encoder_output,
														
 
															     ggml_tensor* encoder_padding_mask,
														
 
															-    ggml_context* result_ctx
														
 
															+    ggml_context* result_ctx, 
														
 
															+    int n_threads
														
 
															 ) {
														
 
															     std::vector<uint8_t> local_bufs[3] = {
														
 
															         std::vector<uint8_t>(1024 * 1024 * 1024),  // step_ctx
														
@@ -1361,7 +1363,7 @@ extern "C" Hypothesis* generate_sequence(
 
															     ggml_set_f32(scores, 0.0);
														
 
															     _bootstrap_seqs_and_scores(
														
 
															-        model, job, seqs, scores, encoder_output, encoder_padding_mask
														
 
															+        model, job, seqs, scores, encoder_output, encoder_padding_mask, n_threads
														
 
															     );
														
 
															     int prefix_seq_len = job.prefix_seq->ne[0];
														
 
															     int start_step = prefix_seq_len - 1;
														
@@ -1403,7 +1405,7 @@ extern "C" Hypothesis* generate_sequence(
 
															         // TODO: use ggml properly compute the tweaks
														
 
															         ggml_cgraph gf = ggml_build_forward(lprobs);
														
 
															         // printf("beam search step %d. Graph.n_nodes: %d\n", step_nr, gf.n_nodes);
														
 
															-        ggml_graph_compute_with_ctx(step_ctx, &gf, 1);
														
 
															+        ggml_graph_compute_with_ctx(step_ctx, &gf, n_threads);
														
 
															         ggml_detach(lprobs);
														
 
															         _tweak_lprobs(job, lprobs, step_nr, max_seq_len, vocab_size);
														
@@ -1425,7 +1427,7 @@ extern "C" Hypothesis* generate_sequence(
 
															         }
														
 
															         gf = ggml_build_forward(lprobs);
														
 
															-        ggml_graph_compute_with_ctx(step_ctx, &gf, 1);
														
 
															+        ggml_graph_compute_with_ctx(step_ctx, &gf, n_threads);
														
 
															         // Determine (beam, token) candidates for the next step.
														
 
															         // (N, 2 x B)
														
@@ -1470,7 +1472,7 @@ extern "C" Hypothesis* generate_sequence(
 
															             ggml_cgraph gf_reorder = ggml_build_forward(new_seqs);
														
 
															             ggml_build_forward_expand(&gf_reorder, new_scores);
														
 
															             reorder_kv_cache(model, step_ctx, &gf_reorder, beam_indices);
														
 
															-            ggml_graph_compute_with_ctx(step_ctx, &gf_reorder, 1);
														
 
															+            ggml_graph_compute_with_ctx(step_ctx, &gf_reorder, n_threads);
														
 
															             ggml_detach(new_seqs);
														
 
															             ggml_detach(new_scores);
														
 
															             new_seqs->type = GGML_TYPE_I32;
														
--- a/ggml/examples/unity/fairseq2.h
+++ b/ggml/examples/unity/fairseq2.h
@@ -297,7 +297,8 @@ extern "C" Hypothesis* generate_sequence(
 
															     const SequenceGeneratorJob& opts,
														
 
															     ggml_tensor* encoder_output,
														
 
															     ggml_tensor* encoder_padding_mask,
														
 
															-    ggml_context* result_ctx
														
 
															+    ggml_context* result_ctx,
														
 
															+    int threads
														
 
															 );
														
 
															 extern "C" void fairseq2_spm_tokenize(fairseq2_model* model, const char* text, ggml_tensor& out);
														
--- a/ggml/examples/unity/unity.cpp
+++ b/ggml/examples/unity/unity.cpp
@@ -115,7 +115,7 @@ Hypothesis* unity_decode(
 
															     ((int *)prefix_seq->data)[0]  = job.eos_idx;
														
 
															     ((int *)prefix_seq->data)[1]  = tgt_lang_idx;
														
 
															     job.prefix_seq = prefix_seq;
														
 
															-    return generate_sequence(model, job, encoder_output, nullptr, model.ctx);
														
 
															+    return generate_sequence(model, job, encoder_output, nullptr, model.ctx, n_threads);
														
 
															 }
														
 
															 int main(int argc, char ** argv) {
														
@@ -201,6 +201,7 @@ int main(int argc, char ** argv) {
 
															         int n = fairseq2_spm_detokenize(&model, tokens, (char*)&result_str);
														
 
															         std::cout << std::string((char*)&result_str, n) << std::endl;
														
 
															         ggml_free(model.ctx);
														
 
															+
														
 
															     }
														
 
															     return 0;
														
--- a/ggml/ggml_convert.py
+++ b/ggml/ggml_convert.py
@@ -11,7 +11,7 @@ import struct
 
															 from enum import Enum
														
 
															 from io import BufferedWriter
														
 
															 from pathlib import Path
														
 
															-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
														
 
															+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, Set, final
														
 
															 import torch
														
 
															 from fairseq2.assets import AssetCard
														
@@ -19,10 +19,147 @@ from fairseq2.models.transformer.frontend import TransformerEmbeddingFrontend
 
															 from fairseq2.nn import SinusoidalPositionEncoder
														
 
															 from fairseq2.nn.transformer import RelativePositionalEncoding
														
 
															 from seamless_communication.models import unity
														
 
															+from fairseq2.data.text import SentencePieceTokenizerBase
														
 
															+from fairseq2.data.typing import PathLike
														
 
															+from typing import Sequence
														
 
															+from fairseq2.data.text import SentencePieceEncoder, SentencePieceTokenizerBase
														
 
															+from fairseq2.typing import Device, finaloverride
														
 
															+from fairseq2.models.utils import TokenizerLoaderBase
														
 
															+from fairseq2.assets import asset_store, download_manager
														
 
															+from seamless_communication.models.unity.builder import UnitYConfig, create_unity_model
														
 
															+from fairseq2.models.utils import ModelLoader
														
 
															+from seamless_communication.models.unity.model import UnitYModel
														
 
															 import ggml
														
 
															 Preprocessor = Callable[[Any], Any]
														
 
															+SMALLER_MODELS = [
														
 
															+    "unity_nano",
														
 
															+    "unity_micro",
														
 
															+]  # Trained with fairseq2, with custom dict (not original NLLB ones)
														
 
															+
														
 
															+
														
 
															+@final
														
 
															+class NllbLikeTokenizer(SentencePieceTokenizerBase):
														
 
															+    """The only difference between this class and NllbTokenizer is it doesn't add a <pad> to control symbol list.
														
 
															+    Since NllbTokenizer is defined as final, we couldn't inherit from it directly. So copying ~everything"""
														
 
															+
														
 
															+    langs: Set[str]
														
 
															+    default_lang: str
														
 
															+
														
 
															+    def __init__(
														
 
															+        self, pathname: PathLike, langs: Sequence[str], default_lang: str
														
 
															+    ) -> None:
														
 
															+        """
														
 
															+        :param pathname:
														
 
															+            The pathname of the SentencePiece model file.
														
 
															+        :param langs:
														
 
															+            The list of supported languages.
														
 
															+        :param default_lang:
														
 
															+            The fall-back language if no language is specified.
														
 
															+        """
														
 
															+        # Each language is represented by a `__lang__` control symbol.
														
 
															+        control_symbols = [f"__{lang}__" for lang in langs]
														
 
															+
														
 
															+        # Internal control symbols that are not relevant for eval use.
														
 
															+        control_symbols.extend(["<MINED_DATA>", "<MMT_BT_DATA>", "<SMT_BT_DATA>"])
														
 
															+        super().__init__(pathname, control_symbols)
														
 
															+
														
 
															+        self.langs = set(langs)
														
 
															+
														
 
															+        self.default_lang = default_lang
														
 
															+
														
 
															+    @finaloverride
														
 
															+    def create_encoder(
														
 
															+        self,
														
 
															+        *,
														
 
															+        task: Optional[str] = None,
														
 
															+        lang: Optional[str] = None,
														
 
															+        mode: Optional[str] = None,
														
 
															+        device: Optional[Device] = None,
														
 
															+        pin_memory: bool = False,
														
 
															+    ) -> SentencePieceEncoder:
														
 
															+        """Create a token encoder.
														
 
															+
														
 
															+        :param task:
														
 
															+            Must be 'translation'. If ``None``, defaults to 'translation'.
														
 
															+        :param lang:
														
 
															+            A language from :attr:`langs`. If ``None``, defaults to
														
 
															+            :attr:`default_lang`.
														
 
															+        :param mode:
														
 
															+            Must be 'source' or 'target'. Set to 'source' if ``lang`` is the
														
 
															+            source language; set to 'target' if ``lang`` is the target language.
														
 
															+            If ``None``, defaults to 'source'.
														
 
															+        :param device:
														
 
															+            The device on which to construct tensors.
														
 
															+        :param pin_memory:
														
 
															+            If ``True``, uses pinned memory while constructing tensors.
														
 
															+        """
														
 
															+        if task is not None and task != "translation":
														
 
															+            raise ValueError(f"`task` must be 'translation', but is '{task}' instead.")
														
 
															+
														
 
															+        if lang is None:
														
 
															+            lang = self.default_lang
														
 
															+
														
 
															+        if lang not in self.langs:
														
 
															+            raise ValueError(
														
 
															+                f"`lang` must be a supported language, but is '{lang}' instead."
														
 
															+            )
														
 
															+
														
 
															+        if mode is None or mode == "source":
														
 
															+            # NLLB models expect a language token in place of BOS in source
														
 
															+            # sequences.
														
 
															+            prefix_tokens = [f"__{lang}__"]
														
 
															+            suffix_tokens = ["</s>"]
														
 
															+        elif mode == "source_mining":
														
 
															+            prefix_tokens = [f"__{lang}__", "<MINED_DATA>"]
														
 
															+            suffix_tokens = ["</s>"]
														
 
															+        elif mode == "source_mmt_bt":
														
 
															+            prefix_tokens = [f"__{lang}__", "<MMT_BT_DATA>"]
														
 
															+            suffix_tokens = ["</s>"]
														
 
															+        elif mode == "source_smt_bt":
														
 
															+            prefix_tokens = [f"__{lang}__", "<SMT_BT_DATA>"]
														
 
															+            suffix_tokens = ["</s>"]
														
 
															+        elif mode == "target":
														
 
															+            # Target sequences are expected to start with an EOS, followed by
														
 
															+            # the language token.
														
 
															+            prefix_tokens = ["</s>", f"__{lang}__"]
														
 
															+            suffix_tokens = []
														
 
															+        else:
														
 
															+            raise ValueError(
														
 
															+                f"`mode` must be 'source' or 'target', but is '{mode}' instead."
														
 
															+            )
														
 
															+
														
 
															+        return SentencePieceEncoder(
														
 
															+            self.model,
														
 
															+            prefix_tokens=prefix_tokens,
														
 
															+            suffix_tokens=suffix_tokens,
														
 
															+            device=device,
														
 
															+            pin_memory=pin_memory,
														
 
															+        )
														
 
															+
														
 
															+
														
 
															+load_unity_model_without_conversion = ModelLoader[UnitYModel, UnitYConfig](
														
 
															+    asset_store,
														
 
															+    download_manager,
														
 
															+    unity.load_unity_config,
														
 
															+    create_unity_model,
														
 
															+    None,
														
 
															+    restrict_checkpoints=False,
														
 
															+)
														
 
															+
														
 
															+
														
 
															+@final
														
 
															+class NllbLikeTokenizerLoader(TokenizerLoaderBase[NllbLikeTokenizer]):
														
 
															+    """Loads tokenizers used by NLLB models."""
														
 
															+
														
 
															+    @finaloverride
														
 
															+    def _load(self, pathname: Path, card: AssetCard) -> NllbLikeTokenizer:
														
 
															+        langs = card.field("langs").as_list(str)
														
 
															+
														
 
															+        default_lang = card.field("default_lang").as_(str)
														
 
															+
														
 
															+        return NllbLikeTokenizer(pathname, langs, default_lang)
														
 
															 def convert_model(
														
@@ -44,9 +181,20 @@ def convert_model(
 
															                     dataclasses.asdict(model_config), separator="__"
														
 
															                 )
														
 
															                 print(hparams)
														
 
															-            model = unity.load_unity_model(model_name)
														
 
															+            # Need the diverge here because current default in SC is to convert from fairseq1 ckpt format
														
 
															+            if model_name in SMALLER_MODELS:
														
 
															+                model = load_unity_model_without_conversion(model_name)
														
 
															+            else:
														
 
															+                model = unity.load_unity_model(model_name)
														
 
															             if vocab is None:
														
 
															-                tokenizer = unity.load_unity_text_tokenizer(model_name)
														
 
															+                # Need the diverge here because current default in SC is to add a separate <pad>
														
 
															+                # as control symbol in NllbTokenizer
														
 
															+                if model_name in SMALLER_MODELS:
														
 
															+                    tokenizer = NllbLikeTokenizerLoader(asset_store, download_manager)(
														
 
															+                        model_name
														
 
															+                    )
														
 
															+                else:
														
 
															+                    tokenizer = unity.load_unity_text_tokenizer(model_name)
														
 
															                 vocab = read_vocab(tokenizer)
														
 
															         else:
														
 
															             raise ValueError(f"Unsupported model type: {model_name}")
														
--- a/src/seamless_communication/cards/unity_micro.yaml
+++ b/src/seamless_communication/cards/unity_micro.yaml
@@ -0,0 +1,18 @@
 
															+# Copyright (c) Meta Platforms, Inc. and affiliates.
														
 
															+# All rights reserved.
														
 
															+#
														
 
															+# This source code is licensed under the BSD-style license found in the
														
 
															+# LICENSE file in the root directory of this source tree.
														
 
															+
														
 
															+name: unity_micro
														
 
															+base: unity_micro_tokenizer
														
 
															+model_arch: micro
														
 
															+checkpoint: "file:///large_experiments/seamless/ust/dnn/ggml_models/unity_micro_checkpoint_best.pt"
														
 
															+num_units: 10000
														
 
															+unit_langs:
														
 
															+  - eng
														
 
															+  - hin
														
 
															+  - por
														
 
															+  - rus
														
 
															+  - spa
														
 
															+
														
--- a/src/seamless_communication/cards/unity_micro_tokenizer.yaml
+++ b/src/seamless_communication/cards/unity_micro_tokenizer.yaml
@@ -0,0 +1,18 @@
 
															+# Copyright (c) Meta Platforms, Inc. and affiliates.
														
 
															+# All rights reserved.
														
 
															+#
														
 
															+# This source code is licensed under the BSD-style license found in the
														
 
															+# LICENSE file in the root directory of this source tree.
														
 
															+
														
 
															+name: unity_micro_tokenizer
														
 
															+model_type: unity
														
 
															+tokenizer: "file:///large_experiments/seamless/ust/dnn/ggml_models/5_5_20k.model"
														
 
															+default_lang: eng
														
 
															+langs:
														
 
															+  - eng
														
 
															+  - rus
														
 
															+  - por
														
 
															+  - hin
														
 
															+  - spa
														
 
															+
														
 
															+
														
--- a/src/seamless_communication/cards/unity_nano.yaml
+++ b/src/seamless_communication/cards/unity_nano.yaml
@@ -0,0 +1,17 @@
 
															+# Copyright (c) Meta Platforms, Inc. and affiliates.
														
 
															+# All rights reserved.
														
 
															+#
														
 
															+# This source code is licensed under the BSD-style license found in the
														
 
															+# LICENSE file in the root directory of this source tree.
														
 
															+
														
 
															+name: unity_nano
														
 
															+base: unity_micro_tokenizer
														
 
															+model_arch: nano
														
 
															+checkpoint: "file:///large_experiments/seamless/ust/dnn/ggml_models/unity_nano_checkpoint_best.pt"
														
 
															+num_units: 10000
														
 
															+unit_langs:
														
 
															+  - eng
														
 
															+  - hin
														
 
															+  - por
														
 
															+  - rus
														
 
															+  - spa
														
--- a/src/seamless_communication/models/unity/builder.py
+++ b/src/seamless_communication/models/unity/builder.py
@@ -6,7 +6,7 @@
 
															 from dataclasses import dataclass
														
 
															 from typing import Optional, Union
														
 
															-
														
 
															+from fairseq2.data import VocabularyInfo
														
 
															 from fairseq2.models.conformer import ConformerBlock, ConformerConvolution
														
 
															 from fairseq2.models.nllb import NllbBuilder, NllbConfig, nllb_archs
														
 
															 from fairseq2.models.utils.arch_registry import ArchitectureRegistry
														
@@ -161,6 +161,160 @@ def _medium() -> UnitYConfig:
 
															     )
														
 
															+@unity_arch("micro")
														
 
															+def _micro() -> UnitYConfig:
														
 
															+    return UnitYConfig(
														
 
															+        model_dim=512,
														
 
															+        w2v2_encoder_config=Wav2Vec2EncoderConfig(
														
 
															+            model_dim=512,
														
 
															+            max_seq_len=4096,
														
 
															+            feature_dim=160,
														
 
															+            use_fbank=True,
														
 
															+            first_pass_dropout_p=0.0,
														
 
															+            layer_norm_features=False,
														
 
															+            feature_extractor_layer_descs=[],
														
 
															+            feature_extractor_bias=False,
														
 
															+            feature_extractor_layer_norm_convs=False,
														
 
															+            feature_grad_scale=0,
														
 
															+            num_fbank_channels=80,
														
 
															+            fbank_stride=2,
														
 
															+            sample_fbank_every_k=1,
														
 
															+            pos_encoder_type="relative",
														
 
															+            pos_encoder_depth=1,
														
 
															+            pos_conv_kernel_size=128,
														
 
															+            num_pos_conv_groups=16,
														
 
															+            use_conformer=True,
														
 
															+            num_encoder_layers=6,
														
 
															+            num_encoder_attn_heads=16,
														
 
															+            ffn_inner_dim=512 * 4,
														
 
															+            dropout_p=0.0,
														
 
															+            attn_dropout_p=0.0,
														
 
															+            layer_drop_p=0.0,
														
 
															+            norm_order=TransformerNormOrder.POST,
														
 
															+            depthwise_conv_kernel_size=31,
														
 
															+        ),
														
 
															+        mt_model_config=NllbConfig(
														
 
															+            model_dim=512,
														
 
															+            max_seq_len=1024,
														
 
															+            vocab_info=VocabularyInfo(
														
 
															+                size=20010, unk_idx=3, bos_idx=0, eos_idx=2, pad_idx=1
														
 
															+            ),
														
 
															+            num_encoder_layers=1,
														
 
															+            num_decoder_layers=3,
														
 
															+            num_encoder_attn_heads=16,
														
 
															+            num_decoder_attn_heads=16,
														
 
															+            ffn_inner_dim=512 * 8,
														
 
															+            dropout_p=0.1,
														
 
															+        ),
														
 
															+        t2u_config=UnitYT2UConfig(
														
 
															+            model_dim=512,
														
 
															+            unit_max_seq_len=2048,
														
 
															+            target_vocab_info=VocabularyInfo(
														
 
															+                size=10082, unk_idx=3, bos_idx=0, eos_idx=2, pad_idx=1
														
 
															+            ),
														
 
															+            num_encoder_layers=1,
														
 
															+            num_decoder_layers=1,
														
 
															+            nar_decoder_frontend_config=None,
														
 
															+            nar_decoder_config=None,
														
 
															+            num_encoder_attn_heads=16,
														
 
															+            num_decoder_attn_heads=16,
														
 
															+            ffn_inner_dim=512 * 8,
														
 
															+            dropout_p=0.1,
														
 
															+            use_gelu=False,
														
 
															+            char_pad_idx=False,
														
 
															+            use_prosody_proj=False,
														
 
															+            prosody_encoder_dim=False,
														
 
															+        ),
														
 
															+        use_text_encoder=True,
														
 
															+        use_conformer_adaptor=False,
														
 
															+        num_adaptor_layers=1,
														
 
															+        adaptor_kernel_size=8,
														
 
															+        adaptor_stride=8,
														
 
															+        adaptor_layer_norm=True,
														
 
															+        adaptor_dropout_p=0.1,
														
 
															+        prosody_encoder_config=None,
														
 
															+        use_text_decoder=True,
														
 
															+        use_gelu=False,
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+@unity_arch("nano")
														
 
															+def _nano() -> UnitYConfig:
														
 
															+    return UnitYConfig(
														
 
															+        model_dim=256,
														
 
															+        w2v2_encoder_config=Wav2Vec2EncoderConfig(
														
 
															+            model_dim=256,
														
 
															+            max_seq_len=4096,
														
 
															+            feature_dim=160,
														
 
															+            use_fbank=True,
														
 
															+            first_pass_dropout_p=0.0,
														
 
															+            layer_norm_features=False,
														
 
															+            feature_extractor_layer_descs=[],
														
 
															+            feature_extractor_bias=False,
														
 
															+            feature_extractor_layer_norm_convs=False,
														
 
															+            feature_grad_scale=0,
														
 
															+            num_fbank_channels=80,
														
 
															+            fbank_stride=2,
														
 
															+            sample_fbank_every_k=1,
														
 
															+            pos_encoder_type="relative",
														
 
															+            pos_encoder_depth=1,
														
 
															+            pos_conv_kernel_size=128,
														
 
															+            num_pos_conv_groups=16,
														
 
															+            use_conformer=True,
														
 
															+            num_encoder_layers=6,
														
 
															+            num_encoder_attn_heads=16,
														
 
															+            ffn_inner_dim=256 * 4,
														
 
															+            dropout_p=0.0,
														
 
															+            attn_dropout_p=0.0,
														
 
															+            layer_drop_p=0.0,
														
 
															+            norm_order=TransformerNormOrder.POST,
														
 
															+            depthwise_conv_kernel_size=31,
														
 
															+        ),
														
 
															+        mt_model_config=NllbConfig(
														
 
															+            model_dim=256,
														
 
															+            max_seq_len=1024,
														
 
															+            vocab_info=VocabularyInfo(
														
 
															+                size=20010, unk_idx=3, bos_idx=0, eos_idx=2, pad_idx=1
														
 
															+            ),
														
 
															+            num_encoder_layers=1,
														
 
															+            num_decoder_layers=3,
														
 
															+            num_encoder_attn_heads=16,
														
 
															+            num_decoder_attn_heads=16,
														
 
															+            ffn_inner_dim=256 * 8,
														
 
															+            dropout_p=0.1,
														
 
															+        ),
														
 
															+        t2u_config=UnitYT2UConfig(
														
 
															+            model_dim=256,
														
 
															+            unit_max_seq_len=2048,
														
 
															+            target_vocab_info=VocabularyInfo(
														
 
															+                size=10082, unk_idx=3, bos_idx=0, eos_idx=2, pad_idx=1
														
 
															+            ),
														
 
															+            num_encoder_layers=1,
														
 
															+            num_decoder_layers=1,
														
 
															+            nar_decoder_frontend_config=None,
														
 
															+            nar_decoder_config=None,
														
 
															+            num_encoder_attn_heads=16,
														
 
															+            num_decoder_attn_heads=16,
														
 
															+            ffn_inner_dim=256 * 8,
														
 
															+            dropout_p=0.1,
														
 
															+            use_gelu=False,
														
 
															+            char_pad_idx=False,
														
 
															+            use_prosody_proj=False,
														
 
															+            prosody_encoder_dim=False,
														
 
															+        ),
														
 
															+        use_text_encoder=True,
														
 
															+        use_conformer_adaptor=False,
														
 
															+        num_adaptor_layers=1,
														
 
															+        adaptor_kernel_size=8,
														
 
															+        adaptor_stride=8,
														
 
															+        adaptor_layer_norm=True,
														
 
															+        adaptor_dropout_p=0.1,
														
 
															+        prosody_encoder_config=None,
														
 
															+        use_text_decoder=True,
														
 
															+        use_gelu=False,
														
 
															+    )
														
 
															+
														
 
															+
														
 
															 @unity_arch("base_v2")
														
 
															 def _base_v2() -> UnitYConfig:
														
 
															     w2v2_chunk_encoder_config = wav2vec2_chunk_archs.get_config("600m")