1 éve · 4e7d540cf2
--- a/ggml/examples/unity/fairseq2.cpp
+++ b/ggml/examples/unity/fairseq2.cpp
@@ -1143,7 +1143,8 @@ extern "C" void _bootstrap_seqs_and_scores(
 
				     ggml_tensor* full_seqs,
			
 
				     ggml_tensor* scores,
			
 
				     ggml_tensor* encoder_output,
			
 
				-    ggml_tensor* encoder_padding_mask
			
 
				+    ggml_tensor* encoder_padding_mask,
			
 
				+    int n_threads
			
 
				 ) {
			
 
				     int prefix_seq_len = job.prefix_seq->ne[0];
			
 
				     int max_seq_len = scores->ne[0];
			
@@ -1184,7 +1185,7 @@ extern "C" void _bootstrap_seqs_and_scores(
 
				     ggml_tensor* lprobs = ggml_log_softmax(ctx, ggml_slice(ctx, logits, 1, 0, 1));
			
 
				 
			
 
				     ggml_cgraph gf = ggml_build_forward(lprobs);
			
 
				-    ggml_graph_compute_with_ctx(ctx, &gf, 1);
			
 
				+    ggml_graph_compute_with_ctx(ctx, &gf, n_threads);
			
 
				     ggml_free(ctx);
			
 
				     full_seqs->type = GGML_TYPE_I32;
			
 
				     job.prefix_seq->type = GGML_TYPE_I32;
			
@@ -1324,7 +1325,8 @@ extern "C" Hypothesis* generate_sequence(
 
				     const SequenceGeneratorJob& job,
			
 
				     ggml_tensor* encoder_output,
			
 
				     ggml_tensor* encoder_padding_mask,
			
 
				-    ggml_context* result_ctx
			
 
				+    ggml_context* result_ctx, 
			
 
				+    int n_threads
			
 
				 ) {
			
 
				     std::vector<uint8_t> local_bufs[3] = {
			
 
				         std::vector<uint8_t>(1024 * 1024 * 1024),  // step_ctx
			
@@ -1361,7 +1363,7 @@ extern "C" Hypothesis* generate_sequence(
 
				     ggml_set_f32(scores, 0.0);
			
 
				 
			
 
				     _bootstrap_seqs_and_scores(
			
 
				-        model, job, seqs, scores, encoder_output, encoder_padding_mask
			
 
				+        model, job, seqs, scores, encoder_output, encoder_padding_mask, n_threads
			
 
				     );
			
 
				     int prefix_seq_len = job.prefix_seq->ne[0];
			
 
				     int start_step = prefix_seq_len - 1;
			
@@ -1403,7 +1405,7 @@ extern "C" Hypothesis* generate_sequence(
 
				         // TODO: use ggml properly compute the tweaks
			
 
				         ggml_cgraph gf = ggml_build_forward(lprobs);
			
 
				         // printf("beam search step %d. Graph.n_nodes: %d\n", step_nr, gf.n_nodes);
			
 
				-        ggml_graph_compute_with_ctx(step_ctx, &gf, 1);
			
 
				+        ggml_graph_compute_with_ctx(step_ctx, &gf, n_threads);
			
 
				         ggml_detach(lprobs);
			
 
				 
			
 
				         _tweak_lprobs(job, lprobs, step_nr, max_seq_len, vocab_size);
			
@@ -1425,7 +1427,7 @@ extern "C" Hypothesis* generate_sequence(
 
				         }
			
 
				 
			
 
				         gf = ggml_build_forward(lprobs);
			
 
				-        ggml_graph_compute_with_ctx(step_ctx, &gf, 1);
			
 
				+        ggml_graph_compute_with_ctx(step_ctx, &gf, n_threads);
			
 
				 
			
 
				         // Determine (beam, token) candidates for the next step.
			
 
				         // (N, 2 x B)
			
@@ -1470,7 +1472,7 @@ extern "C" Hypothesis* generate_sequence(
 
				             ggml_cgraph gf_reorder = ggml_build_forward(new_seqs);
			
 
				             ggml_build_forward_expand(&gf_reorder, new_scores);
			
 
				             reorder_kv_cache(model, step_ctx, &gf_reorder, beam_indices);
			
 
				-            ggml_graph_compute_with_ctx(step_ctx, &gf_reorder, 1);
			
 
				+            ggml_graph_compute_with_ctx(step_ctx, &gf_reorder, n_threads);
			
 
				             ggml_detach(new_seqs);
			
 
				             ggml_detach(new_scores);
			
 
				             new_seqs->type = GGML_TYPE_I32;
			
--- a/ggml/examples/unity/fairseq2.h
+++ b/ggml/examples/unity/fairseq2.h
@@ -297,7 +297,8 @@ extern "C" Hypothesis* generate_sequence(
 
				     const SequenceGeneratorJob& opts,
			
 
				     ggml_tensor* encoder_output,
			
 
				     ggml_tensor* encoder_padding_mask,
			
 
				-    ggml_context* result_ctx
			
 
				+    ggml_context* result_ctx,
			
 
				+    int threads
			
 
				 );
			
 
				 
			
 
				 extern "C" void fairseq2_spm_tokenize(fairseq2_model* model, const char* text, ggml_tensor& out);
			
--- a/ggml/examples/unity/unity.cpp
+++ b/ggml/examples/unity/unity.cpp
@@ -115,7 +115,7 @@ Hypothesis* unity_decode(
 
				     ((int *)prefix_seq->data)[0]  = job.eos_idx;
			
 
				     ((int *)prefix_seq->data)[1]  = tgt_lang_idx;
			
 
				     job.prefix_seq = prefix_seq;
			
 
				-    return generate_sequence(model, job, encoder_output, nullptr, model.ctx);
			
 
				+    return generate_sequence(model, job, encoder_output, nullptr, model.ctx, n_threads);
			
 
				 }
			
 
				 
			
 
				 int main(int argc, char ** argv) {
			
@@ -201,6 +201,7 @@ int main(int argc, char ** argv) {
 
				         int n = fairseq2_spm_detokenize(&model, tokens, (char*)&result_str);
			
 
				         std::cout << std::string((char*)&result_str, n) << std::endl;
			
 
				         ggml_free(model.ctx);
			
 
				+
			
 
				     }
			
 
				 
			
 
				     return 0;
			
--- a/ggml/ggml_convert.py
+++ b/ggml/ggml_convert.py
@@ -11,7 +11,7 @@ import struct
 
				 from enum import Enum
			
 
				 from io import BufferedWriter
			
 
				 from pathlib import Path
			
 
				-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
			
 
				+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, Set, final
			
 
				 
			
 
				 import torch
			
 
				 from fairseq2.assets import AssetCard
			
@@ -19,10 +19,147 @@ from fairseq2.models.transformer.frontend import TransformerEmbeddingFrontend
 
				 from fairseq2.nn import SinusoidalPositionEncoder
			
 
				 from fairseq2.nn.transformer import RelativePositionalEncoding
			
 
				 from seamless_communication.models import unity
			
 
				+from fairseq2.data.text import SentencePieceTokenizerBase
			
 
				+from fairseq2.data.typing import PathLike
			
 
				+from typing import Sequence
			
 
				+from fairseq2.data.text import SentencePieceEncoder, SentencePieceTokenizerBase
			
 
				+from fairseq2.typing import Device, finaloverride
			
 
				+from fairseq2.models.utils import TokenizerLoaderBase
			
 
				+from fairseq2.assets import asset_store, download_manager
			
 
				+from seamless_communication.models.unity.builder import UnitYConfig, create_unity_model
			
 
				+from fairseq2.models.utils import ModelLoader
			
 
				+from seamless_communication.models.unity.model import UnitYModel
			
 
				 
			
 
				 import ggml
			
 
				 
			
 
				 Preprocessor = Callable[[Any], Any]
			
 
				+SMALLER_MODELS = [
			
 
				+    "unity_nano",
			
 
				+    "unity_micro",
			
 
				+]  # Trained with fairseq2, with custom dict (not original NLLB ones)
			
 
				+
			
 
				+
			
 
				+@final
			
 
				+class NllbLikeTokenizer(SentencePieceTokenizerBase):
			
 
				+    """The only difference between this class and NllbTokenizer is it doesn't add a <pad> to control symbol list.
			
 
				+    Since NllbTokenizer is defined as final, we couldn't inherit from it directly. So copying ~everything"""
			
 
				+
			
 
				+    langs: Set[str]
			
 
				+    default_lang: str
			
 
				+
			
 
				+    def __init__(
			
 
				+        self, pathname: PathLike, langs: Sequence[str], default_lang: str
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        :param pathname:
			
 
				+            The pathname of the SentencePiece model file.
			
 
				+        :param langs:
			
 
				+            The list of supported languages.
			
 
				+        :param default_lang:
			
 
				+            The fall-back language if no language is specified.
			
 
				+        """
			
 
				+        # Each language is represented by a `__lang__` control symbol.
			
 
				+        control_symbols = [f"__{lang}__" for lang in langs]
			
 
				+
			
 
				+        # Internal control symbols that are not relevant for eval use.
			
 
				+        control_symbols.extend(["<MINED_DATA>", "<MMT_BT_DATA>", "<SMT_BT_DATA>"])
			
 
				+        super().__init__(pathname, control_symbols)
			
 
				+
			
 
				+        self.langs = set(langs)
			
 
				+
			
 
				+        self.default_lang = default_lang
			
 
				+
			
 
				+    @finaloverride
			
 
				+    def create_encoder(
			
 
				+        self,
			
 
				+        *,
			
 
				+        task: Optional[str] = None,
			
 
				+        lang: Optional[str] = None,
			
 
				+        mode: Optional[str] = None,
			
 
				+        device: Optional[Device] = None,
			
 
				+        pin_memory: bool = False,
			
 
				+    ) -> SentencePieceEncoder:
			
 
				+        """Create a token encoder.
			
 
				+
			
 
				+        :param task:
			
 
				+            Must be 'translation'. If ``None``, defaults to 'translation'.
			
 
				+        :param lang:
			
 
				+            A language from :attr:`langs`. If ``None``, defaults to
			
 
				+            :attr:`default_lang`.
			
 
				+        :param mode:
			
 
				+            Must be 'source' or 'target'. Set to 'source' if ``lang`` is the
			
 
				+            source language; set to 'target' if ``lang`` is the target language.
			
 
				+            If ``None``, defaults to 'source'.
			
 
				+        :param device:
			
 
				+            The device on which to construct tensors.
			
 
				+        :param pin_memory:
			
 
				+            If ``True``, uses pinned memory while constructing tensors.
			
 
				+        """
			
 
				+        if task is not None and task != "translation":
			
 
				+            raise ValueError(f"`task` must be 'translation', but is '{task}' instead.")
			
 
				+
			
 
				+        if lang is None:
			
 
				+            lang = self.default_lang
			
 
				+
			
 
				+        if lang not in self.langs:
			
 
				+            raise ValueError(
			
 
				+                f"`lang` must be a supported language, but is '{lang}' instead."
			
 
				+            )
			
 
				+
			
 
				+        if mode is None or mode == "source":
			
 
				+            # NLLB models expect a language token in place of BOS in source
			
 
				+            # sequences.
			
 
				+            prefix_tokens = [f"__{lang}__"]
			
 
				+            suffix_tokens = ["</s>"]
			
 
				+        elif mode == "source_mining":
			
 
				+            prefix_tokens = [f"__{lang}__", "<MINED_DATA>"]
			
 
				+            suffix_tokens = ["</s>"]
			
 
				+        elif mode == "source_mmt_bt":
			
 
				+            prefix_tokens = [f"__{lang}__", "<MMT_BT_DATA>"]
			
 
				+            suffix_tokens = ["</s>"]
			
 
				+        elif mode == "source_smt_bt":
			
 
				+            prefix_tokens = [f"__{lang}__", "<SMT_BT_DATA>"]
			
 
				+            suffix_tokens = ["</s>"]
			
 
				+        elif mode == "target":
			
 
				+            # Target sequences are expected to start with an EOS, followed by
			
 
				+            # the language token.
			
 
				+            prefix_tokens = ["</s>", f"__{lang}__"]
			
 
				+            suffix_tokens = []
			
 
				+        else:
			
 
				+            raise ValueError(
			
 
				+                f"`mode` must be 'source' or 'target', but is '{mode}' instead."
			
 
				+            )
			
 
				+
			
 
				+        return SentencePieceEncoder(
			
 
				+            self.model,
			
 
				+            prefix_tokens=prefix_tokens,
			
 
				+            suffix_tokens=suffix_tokens,
			
 
				+            device=device,
			
 
				+            pin_memory=pin_memory,
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+load_unity_model_without_conversion = ModelLoader[UnitYModel, UnitYConfig](
			
 
				+    asset_store,
			
 
				+    download_manager,
			
 
				+    unity.load_unity_config,
			
 
				+    create_unity_model,
			
 
				+    None,
			
 
				+    restrict_checkpoints=False,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+@final
			
 
				+class NllbLikeTokenizerLoader(TokenizerLoaderBase[NllbLikeTokenizer]):
			
 
				+    """Loads tokenizers used by NLLB models."""
			
 
				+
			
 
				+    @finaloverride
			
 
				+    def _load(self, pathname: Path, card: AssetCard) -> NllbLikeTokenizer:
			
 
				+        langs = card.field("langs").as_list(str)
			
 
				+
			
 
				+        default_lang = card.field("default_lang").as_(str)
			
 
				+
			
 
				+        return NllbLikeTokenizer(pathname, langs, default_lang)
			
 
				 
			
 
				 
			
 
				 def convert_model(
			
@@ -44,9 +181,20 @@ def convert_model(
 
				                     dataclasses.asdict(model_config), separator="__"
			
 
				                 )
			
 
				                 print(hparams)
			
 
				-            model = unity.load_unity_model(model_name)
			
 
				+            # Need the diverge here because current default in SC is to convert from fairseq1 ckpt format
			
 
				+            if model_name in SMALLER_MODELS:
			
 
				+                model = load_unity_model_without_conversion(model_name)
			
 
				+            else:
			
 
				+                model = unity.load_unity_model(model_name)
			
 
				             if vocab is None:
			
 
				-                tokenizer = unity.load_unity_text_tokenizer(model_name)
			
 
				+                # Need the diverge here because current default in SC is to add a separate <pad>
			
 
				+                # as control symbol in NllbTokenizer
			
 
				+                if model_name in SMALLER_MODELS:
			
 
				+                    tokenizer = NllbLikeTokenizerLoader(asset_store, download_manager)(
			
 
				+                        model_name
			
 
				+                    )
			
 
				+                else:
			
 
				+                    tokenizer = unity.load_unity_text_tokenizer(model_name)
			
 
				                 vocab = read_vocab(tokenizer)
			
 
				         else:
			
 
				             raise ValueError(f"Unsupported model type: {model_name}")
			
--- a/src/seamless_communication/cards/unity_micro.yaml
+++ b/src/seamless_communication/cards/unity_micro.yaml
@@ -0,0 +1,18 @@
 
				+# Copyright (c) Meta Platforms, Inc. and affiliates.
			
 
				+# All rights reserved.
			
 
				+#
			
 
				+# This source code is licensed under the BSD-style license found in the
			
 
				+# LICENSE file in the root directory of this source tree.
			
 
				+
			
 
				+name: unity_micro
			
 
				+base: unity_micro_tokenizer
			
 
				+model_arch: micro
			
 
				+checkpoint: "file:///large_experiments/seamless/ust/dnn/ggml_models/unity_micro_checkpoint_best.pt"
			
 
				+num_units: 10000
			
 
				+unit_langs:
			
 
				+  - eng
			
 
				+  - hin
			
 
				+  - por
			
 
				+  - rus
			
 
				+  - spa
			
 
				+
			
--- a/src/seamless_communication/cards/unity_micro_tokenizer.yaml
+++ b/src/seamless_communication/cards/unity_micro_tokenizer.yaml
@@ -0,0 +1,18 @@
 
				+# Copyright (c) Meta Platforms, Inc. and affiliates.
			
 
				+# All rights reserved.
			
 
				+#
			
 
				+# This source code is licensed under the BSD-style license found in the
			
 
				+# LICENSE file in the root directory of this source tree.
			
 
				+
			
 
				+name: unity_micro_tokenizer
			
 
				+model_type: unity
			
 
				+tokenizer: "file:///large_experiments/seamless/ust/dnn/ggml_models/5_5_20k.model"
			
 
				+default_lang: eng
			
 
				+langs:
			
 
				+  - eng
			
 
				+  - rus
			
 
				+  - por
			
 
				+  - hin
			
 
				+  - spa
			
 
				+
			
 
				+
			
--- a/src/seamless_communication/cards/unity_nano.yaml
+++ b/src/seamless_communication/cards/unity_nano.yaml
@@ -0,0 +1,17 @@
 
				+# Copyright (c) Meta Platforms, Inc. and affiliates.
			
 
				+# All rights reserved.
			
 
				+#
			
 
				+# This source code is licensed under the BSD-style license found in the
			
 
				+# LICENSE file in the root directory of this source tree.
			
 
				+
			
 
				+name: unity_nano
			
 
				+base: unity_micro_tokenizer
			
 
				+model_arch: nano
			
 
				+checkpoint: "file:///large_experiments/seamless/ust/dnn/ggml_models/unity_nano_checkpoint_best.pt"
			
 
				+num_units: 10000
			
 
				+unit_langs:
			
 
				+  - eng
			
 
				+  - hin
			
 
				+  - por
			
 
				+  - rus
			
 
				+  - spa
			
--- a/src/seamless_communication/models/unity/builder.py
+++ b/src/seamless_communication/models/unity/builder.py
@@ -6,7 +6,7 @@
 
				 
			
 
				 from dataclasses import dataclass
			
 
				 from typing import Optional, Union
			
 
				-
			
 
				+from fairseq2.data import VocabularyInfo
			
 
				 from fairseq2.models.conformer import ConformerBlock, ConformerConvolution
			
 
				 from fairseq2.models.nllb import NllbBuilder, NllbConfig, nllb_archs
			
 
				 from fairseq2.models.utils.arch_registry import ArchitectureRegistry
			
@@ -161,6 +161,160 @@ def _medium() -> UnitYConfig:
 
				     )
			
 
				 
			
 
				 
			
 
				+@unity_arch("micro")
			
 
				+def _micro() -> UnitYConfig:
			
 
				+    return UnitYConfig(
			
 
				+        model_dim=512,
			
 
				+        w2v2_encoder_config=Wav2Vec2EncoderConfig(
			
 
				+            model_dim=512,
			
 
				+            max_seq_len=4096,
			
 
				+            feature_dim=160,
			
 
				+            use_fbank=True,
			
 
				+            first_pass_dropout_p=0.0,
			
 
				+            layer_norm_features=False,
			
 
				+            feature_extractor_layer_descs=[],
			
 
				+            feature_extractor_bias=False,
			
 
				+            feature_extractor_layer_norm_convs=False,
			
 
				+            feature_grad_scale=0,
			
 
				+            num_fbank_channels=80,
			
 
				+            fbank_stride=2,
			
 
				+            sample_fbank_every_k=1,
			
 
				+            pos_encoder_type="relative",
			
 
				+            pos_encoder_depth=1,
			
 
				+            pos_conv_kernel_size=128,
			
 
				+            num_pos_conv_groups=16,
			
 
				+            use_conformer=True,
			
 
				+            num_encoder_layers=6,
			
 
				+            num_encoder_attn_heads=16,
			
 
				+            ffn_inner_dim=512 * 4,
			
 
				+            dropout_p=0.0,
			
 
				+            attn_dropout_p=0.0,
			
 
				+            layer_drop_p=0.0,
			
 
				+            norm_order=TransformerNormOrder.POST,
			
 
				+            depthwise_conv_kernel_size=31,
			
 
				+        ),
			
 
				+        mt_model_config=NllbConfig(
			
 
				+            model_dim=512,
			
 
				+            max_seq_len=1024,
			
 
				+            vocab_info=VocabularyInfo(
			
 
				+                size=20010, unk_idx=3, bos_idx=0, eos_idx=2, pad_idx=1
			
 
				+            ),
			
 
				+            num_encoder_layers=1,
			
 
				+            num_decoder_layers=3,
			
 
				+            num_encoder_attn_heads=16,
			
 
				+            num_decoder_attn_heads=16,
			
 
				+            ffn_inner_dim=512 * 8,
			
 
				+            dropout_p=0.1,
			
 
				+        ),
			
 
				+        t2u_config=UnitYT2UConfig(
			
 
				+            model_dim=512,
			
 
				+            unit_max_seq_len=2048,
			
 
				+            target_vocab_info=VocabularyInfo(
			
 
				+                size=10082, unk_idx=3, bos_idx=0, eos_idx=2, pad_idx=1
			
 
				+            ),
			
 
				+            num_encoder_layers=1,
			
 
				+            num_decoder_layers=1,
			
 
				+            nar_decoder_frontend_config=None,
			
 
				+            nar_decoder_config=None,
			
 
				+            num_encoder_attn_heads=16,
			
 
				+            num_decoder_attn_heads=16,
			
 
				+            ffn_inner_dim=512 * 8,
			
 
				+            dropout_p=0.1,
			
 
				+            use_gelu=False,
			
 
				+            char_pad_idx=False,
			
 
				+            use_prosody_proj=False,
			
 
				+            prosody_encoder_dim=False,
			
 
				+        ),
			
 
				+        use_text_encoder=True,
			
 
				+        use_conformer_adaptor=False,
			
 
				+        num_adaptor_layers=1,
			
 
				+        adaptor_kernel_size=8,
			
 
				+        adaptor_stride=8,
			
 
				+        adaptor_layer_norm=True,
			
 
				+        adaptor_dropout_p=0.1,
			
 
				+        prosody_encoder_config=None,
			
 
				+        use_text_decoder=True,
			
 
				+        use_gelu=False,
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+@unity_arch("nano")
			
 
				+def _nano() -> UnitYConfig:
			
 
				+    return UnitYConfig(
			
 
				+        model_dim=256,
			
 
				+        w2v2_encoder_config=Wav2Vec2EncoderConfig(
			
 
				+            model_dim=256,
			
 
				+            max_seq_len=4096,
			
 
				+            feature_dim=160,
			
 
				+            use_fbank=True,
			
 
				+            first_pass_dropout_p=0.0,
			
 
				+            layer_norm_features=False,
			
 
				+            feature_extractor_layer_descs=[],
			
 
				+            feature_extractor_bias=False,
			
 
				+            feature_extractor_layer_norm_convs=False,
			
 
				+            feature_grad_scale=0,
			
 
				+            num_fbank_channels=80,
			
 
				+            fbank_stride=2,
			
 
				+            sample_fbank_every_k=1,
			
 
				+            pos_encoder_type="relative",
			
 
				+            pos_encoder_depth=1,
			
 
				+            pos_conv_kernel_size=128,
			
 
				+            num_pos_conv_groups=16,
			
 
				+            use_conformer=True,
			
 
				+            num_encoder_layers=6,
			
 
				+            num_encoder_attn_heads=16,
			
 
				+            ffn_inner_dim=256 * 4,
			
 
				+            dropout_p=0.0,
			
 
				+            attn_dropout_p=0.0,
			
 
				+            layer_drop_p=0.0,
			
 
				+            norm_order=TransformerNormOrder.POST,
			
 
				+            depthwise_conv_kernel_size=31,
			
 
				+        ),
			
 
				+        mt_model_config=NllbConfig(
			
 
				+            model_dim=256,
			
 
				+            max_seq_len=1024,
			
 
				+            vocab_info=VocabularyInfo(
			
 
				+                size=20010, unk_idx=3, bos_idx=0, eos_idx=2, pad_idx=1
			
 
				+            ),
			
 
				+            num_encoder_layers=1,
			
 
				+            num_decoder_layers=3,
			
 
				+            num_encoder_attn_heads=16,
			
 
				+            num_decoder_attn_heads=16,
			
 
				+            ffn_inner_dim=256 * 8,
			
 
				+            dropout_p=0.1,
			
 
				+        ),
			
 
				+        t2u_config=UnitYT2UConfig(
			
 
				+            model_dim=256,
			
 
				+            unit_max_seq_len=2048,
			
 
				+            target_vocab_info=VocabularyInfo(
			
 
				+                size=10082, unk_idx=3, bos_idx=0, eos_idx=2, pad_idx=1
			
 
				+            ),
			
 
				+            num_encoder_layers=1,
			
 
				+            num_decoder_layers=1,
			
 
				+            nar_decoder_frontend_config=None,
			
 
				+            nar_decoder_config=None,
			
 
				+            num_encoder_attn_heads=16,
			
 
				+            num_decoder_attn_heads=16,
			
 
				+            ffn_inner_dim=256 * 8,
			
 
				+            dropout_p=0.1,
			
 
				+            use_gelu=False,
			
 
				+            char_pad_idx=False,
			
 
				+            use_prosody_proj=False,
			
 
				+            prosody_encoder_dim=False,
			
 
				+        ),
			
 
				+        use_text_encoder=True,
			
 
				+        use_conformer_adaptor=False,
			
 
				+        num_adaptor_layers=1,
			
 
				+        adaptor_kernel_size=8,
			
 
				+        adaptor_stride=8,
			
 
				+        adaptor_layer_norm=True,
			
 
				+        adaptor_dropout_p=0.1,
			
 
				+        prosody_encoder_config=None,
			
 
				+        use_text_decoder=True,
			
 
				+        use_gelu=False,
			
 
				+    )
			
 
				+
			
 
				+
			
 
				 @unity_arch("base_v2")
			
 
				 def _base_v2() -> UnitYConfig:
			
 
				     w2v2_chunk_encoder_config = wav2vec2_chunk_archs.get_config("600m")