Browse Source

Introduces create_raw_encoder in CharTokenizer (#65)

* Introduce create_raw_encoder
Can Balioglu 1 year ago
parent
commit
dc9dd18cae

+ 6 - 0
src/seamless_communication/models/tokenizer.py

@@ -111,6 +111,12 @@ class SPMTokenizer(TextTokenizer):
             pin_memory=pin_memory,
         )
 
+    @finaloverride
+    def create_raw_encoder(
+        self, *, device: Optional[Device] = None, pin_memory: bool = False
+    ) -> TextTokenEncoder:
+        return SentencePieceEncoder(self.model, device=device, pin_memory=pin_memory)
+
     @finaloverride
     def create_decoder(self) -> TextTokenDecoder:
         return SentencePieceDecoder(self.model)

+ 6 - 0
src/seamless_communication/models/unity/char_tokenizer.py

@@ -56,6 +56,12 @@ class CharTokenizer(TextTokenizer):
             pin_memory=pin_memory,
         )
 
+    @finaloverride
+    def create_raw_encoder(
+        self, *, device: Optional[Device] = None, pin_memory: bool = False
+    ) -> TextTokenEncoder:
+        return SentencePieceEncoder(self.model, device=device, pin_memory=pin_memory)
+
     @finaloverride
     def create_decoder(self) -> TextTokenDecoder:
         return SentencePieceDecoder(self.model)