Pārlūkot izejas kodu

Fix tests in unity.cpp & cleanup (#204)

* update paths to s3

* Fix & clean up test_unity.cpp.py

* revert unnecessary changes

* delete local test.wav

* remove python wrapper of KNF

* remove test audio sample
Ning 1 gadu atpakaļ
vecāks
revīzija
302b501515
23 mainītis faili ar 50 papildinājumiem un 1336 dzēšanām
  1. BIN
      ggml/LJ037-0171_sr16k_test.wav
  2. 0 2
      ggml/examples/kaldi-native-fbank/python/CMakeLists.txt
  3. 0 28
      ggml/examples/kaldi-native-fbank/python/csrc/CMakeLists.txt
  4. 0 57
      ggml/examples/kaldi-native-fbank/python/csrc/feature-fbank.cc
  5. 0 30
      ggml/examples/kaldi-native-fbank/python/csrc/feature-fbank.h
  6. 0 66
      ggml/examples/kaldi-native-fbank/python/csrc/feature-window.cc
  7. 0 30
      ggml/examples/kaldi-native-fbank/python/csrc/feature-window.h
  8. 0 37
      ggml/examples/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc
  9. 0 27
      ggml/examples/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h
  10. 0 58
      ggml/examples/kaldi-native-fbank/python/csrc/mel-computations.cc
  11. 0 30
      ggml/examples/kaldi-native-fbank/python/csrc/mel-computations.h
  12. 0 68
      ggml/examples/kaldi-native-fbank/python/csrc/online-feature.cc
  13. 0 30
      ggml/examples/kaldi-native-fbank/python/csrc/online-feature.h
  14. 0 134
      ggml/examples/kaldi-native-fbank/python/csrc/utils.cc
  15. 0 52
      ggml/examples/kaldi-native-fbank/python/csrc/utils.h
  16. 0 6
      ggml/examples/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py
  17. 0 31
      ggml/examples/kaldi-native-fbank/python/tests/CMakeLists.txt
  18. 0 198
      ggml/examples/kaldi-native-fbank/python/tests/test_fbank_options.py
  19. 0 119
      ggml/examples/kaldi-native-fbank/python/tests/test_frame_extraction_options.py
  20. 0 107
      ggml/examples/kaldi-native-fbank/python/tests/test_mel_bank_options.py
  21. 0 48
      ggml/examples/kaldi-native-fbank/python/tests/test_online_fbank.py
  22. BIN
      ggml/test_data/test.wav
  23. 50 178
      ggml/test_unity_cpp.py

BIN
ggml/LJ037-0171_sr16k_test.wav


+ 0 - 2
ggml/examples/kaldi-native-fbank/python/CMakeLists.txt

@@ -1,2 +0,0 @@
-add_subdirectory(csrc)
-add_subdirectory(tests)

+ 0 - 28
ggml/examples/kaldi-native-fbank/python/csrc/CMakeLists.txt

@@ -1,28 +0,0 @@
-pybind11_add_module(_kaldi_native_fbank
-  feature-fbank.cc
-  feature-window.cc
-  kaldi-native-fbank.cc
-  mel-computations.cc
-  online-feature.cc
-  utils.cc
-)
-
-if(APPLE)
-  execute_process(
-    COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
-    OUTPUT_STRIP_TRAILING_WHITESPACE
-    OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
-  )
-  message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
-  target_link_libraries(_kaldi_native_fbank PRIVATE "-Wl,-rpath,${PYTHON_SITE_PACKAGE_DIR}")
-endif()
-
-if(NOT WIN32)
-  target_link_libraries(_kaldi_native_fbank PRIVATE "-Wl,-rpath,${kaldi_native_fbank_rpath_origin}/kaldi_native_fbank/lib")
-endif()
-
-target_link_libraries(_kaldi_native_fbank PRIVATE kaldi-native-fbank-core)
-
-install(TARGETS _kaldi_native_fbank
-  DESTINATION ../
-)

+ 0 - 57
ggml/examples/kaldi-native-fbank/python/csrc/feature-fbank.cc

@@ -1,57 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/feature-fbank.h"
-
-#include <memory>
-#include <string>
-
-#include "feature-fbank.h"
-#include "kaldi-native-fbank/python/csrc/utils.h"
-
-namespace knf {
-
-static void PybindFbankOptions(py::module &m) {  // NOLINT
-  using PyClass = FbankOptions;
-  py::class_<PyClass>(m, "FbankOptions")
-      .def(py::init<>())
-      .def_readwrite("frame_opts", &PyClass::frame_opts)
-      .def_readwrite("mel_opts", &PyClass::mel_opts)
-      .def_readwrite("use_energy", &PyClass::use_energy)
-      .def_readwrite("energy_floor", &PyClass::energy_floor)
-      .def_readwrite("raw_energy", &PyClass::raw_energy)
-      .def_readwrite("htk_compat", &PyClass::htk_compat)
-      .def_readwrite("use_log_fbank", &PyClass::use_log_fbank)
-      .def_readwrite("use_power", &PyClass::use_power)
-      .def("__str__",
-           [](const PyClass &self) -> std::string { return self.ToString(); })
-      .def("as_dict",
-           [](const PyClass &self) -> py::dict { return AsDict(self); })
-      .def_static(
-          "from_dict",
-          [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); })
-      .def(py::pickle(
-          [](const PyClass &self) -> py::dict { return AsDict(self); },
-          [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); }));
-}
-
-void PybindFeatureFbank(py::module &m) {  // NOLINT
-  PybindFbankOptions(m);
-}
-
-}  // namespace knf

+ 0 - 30
ggml/examples/kaldi-native-fbank/python/csrc/feature-fbank.h

@@ -1,30 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
-
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-namespace knf {
-
-void PybindFeatureFbank(py::module &m);  // NOLINT
-
-}  // namespace knf
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_

+ 0 - 66
ggml/examples/kaldi-native-fbank/python/csrc/feature-window.cc

@@ -1,66 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/feature-window.h"
-
-#include <string>
-
-#include "feature-window.h"
-#include "kaldi-native-fbank/python/csrc/utils.h"
-
-namespace knf {
-
-static void PybindFrameExtractionOptions(py::module &m) {  // NOLINT
-  using PyClass = FrameExtractionOptions;
-  py::class_<PyClass>(m, "FrameExtractionOptions")
-      .def(py::init<>())
-      .def_readwrite("samp_freq", &PyClass::samp_freq)
-      .def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms)
-      .def_readwrite("frame_length_ms", &PyClass::frame_length_ms)
-      .def_readwrite("dither", &PyClass::dither)
-      .def_readwrite("preemph_coeff", &PyClass::preemph_coeff)
-      .def_readwrite("remove_dc_offset", &PyClass::remove_dc_offset)
-      .def_readwrite("window_type", &PyClass::window_type)
-      .def_readwrite("round_to_power_of_two", &PyClass::round_to_power_of_two)
-      .def_readwrite("blackman_coeff", &PyClass::blackman_coeff)
-      .def_readwrite("snip_edges", &PyClass::snip_edges)
-      .def("as_dict",
-           [](const PyClass &self) -> py::dict { return AsDict(self); })
-      .def_static("from_dict",
-                  [](py::dict dict) -> PyClass {
-                    return FrameExtractionOptionsFromDict(dict);
-                  })
-#if 0
-      .def_readwrite("allow_downsample",
-                     &PyClass::allow_downsample)
-      .def_readwrite("allow_upsample", &PyClass::allow_upsample)
-#endif
-      .def("__str__",
-           [](const PyClass &self) -> std::string { return self.ToString(); })
-      .def(py::pickle(
-          [](const PyClass &self) -> py::dict { return AsDict(self); },
-          [](py::dict dict) -> PyClass {
-            return FrameExtractionOptionsFromDict(dict);
-          }));
-}
-
-void PybindFeatureWindow(py::module &m) {  // NOLINT
-  PybindFrameExtractionOptions(m);
-}
-
-}  // namespace knf

+ 0 - 30
ggml/examples/kaldi-native-fbank/python/csrc/feature-window.h

@@ -1,30 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
-
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-namespace knf {
-
-void PybindFeatureWindow(py::module &m);  // NOLINT
-
-}  // namespace knf
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_

+ 0 - 37
ggml/examples/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc

@@ -1,37 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-#include "kaldi-native-fbank/python/csrc/feature-fbank.h"
-#include "kaldi-native-fbank/python/csrc/feature-window.h"
-#include "kaldi-native-fbank/python/csrc/mel-computations.h"
-#include "kaldi-native-fbank/python/csrc/online-feature.h"
-
-namespace knf {
-
-PYBIND11_MODULE(_kaldi_native_fbank, m) {
-  m.doc() = "Python wrapper for kaldi native fbank";
-  PybindFeatureWindow(m);
-  PybindMelComputations(m);
-  PybindFeatureFbank(m);
-
-  PybindOnlineFeature(m);
-}
-
-}  // namespace knf

+ 0 - 27
ggml/examples/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h

@@ -1,27 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
-
-#include "pybind11/numpy.h"
-#include "pybind11/pybind11.h"
-#include "pybind11/stl.h"
-namespace py = pybind11;
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_

+ 0 - 58
ggml/examples/kaldi-native-fbank/python/csrc/mel-computations.cc

@@ -1,58 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/mel-computations.h"
-
-#include <string>
-
-#include "mel-computations.h"
-#include "kaldi-native-fbank/python/csrc/utils.h"
-
-namespace knf {
-
-static void PybindMelBanksOptions(py::module &m) {  // NOLINT
-  using PyClass = MelBanksOptions;
-  py::class_<PyClass>(m, "MelBanksOptions")
-      .def(py::init<>())
-      .def_readwrite("num_bins", &PyClass::num_bins)
-      .def_readwrite("low_freq", &PyClass::low_freq)
-      .def_readwrite("high_freq", &PyClass::high_freq)
-      .def_readwrite("vtln_low", &PyClass::vtln_low)
-      .def_readwrite("vtln_high", &PyClass::vtln_high)
-      .def_readwrite("debug_mel", &PyClass::debug_mel)
-      .def_readwrite("htk_mode", &PyClass::htk_mode)
-      .def("__str__",
-           [](const PyClass &self) -> std::string { return self.ToString(); })
-      .def("as_dict",
-           [](const PyClass &self) -> py::dict { return AsDict(self); })
-      .def_static("from_dict",
-                  [](py::dict dict) -> PyClass {
-                    return MelBanksOptionsFromDict(dict);
-                  })
-      .def(py::pickle(
-          [](const PyClass &self) -> py::dict { return AsDict(self); },
-          [](py::dict dict) -> PyClass {
-            return MelBanksOptionsFromDict(dict);
-          }));
-}
-
-void PybindMelComputations(py::module &m) {  // NOLINT
-  PybindMelBanksOptions(m);
-}
-
-}  // namespace knf

+ 0 - 30
ggml/examples/kaldi-native-fbank/python/csrc/mel-computations.h

@@ -1,30 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
-
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-namespace knf {
-
-void PybindMelComputations(py::module &m);  // NOLINT
-
-}  // namespace knf
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_

+ 0 - 68
ggml/examples/kaldi-native-fbank/python/csrc/online-feature.cc

@@ -1,68 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/online-feature.h"
-
-#include <string>
-#include <vector>
-
-#include "online-feature.h"
-namespace knf {
-
-template <typename C>
-void PybindOnlineFeatureTpl(py::module &m,  // NOLINT
-                            const std::string &class_name,
-                            const std::string &class_help_doc = "") {
-  using PyClass = OnlineGenericBaseFeature<C>;
-  using Options = typename C::Options;
-  py::class_<PyClass>(m, class_name.c_str(), class_help_doc.c_str())
-      .def(py::init<const Options &>(), py::arg("opts"))
-      .def_property_readonly("dim", &PyClass::Dim)
-      .def_property_readonly("frame_shift_in_seconds",
-                             &PyClass::FrameShiftInSeconds)
-      .def_property_readonly("num_frames_ready", &PyClass::NumFramesReady)
-      .def("is_last_frame", &PyClass::IsLastFrame, py::arg("frame"))
-      .def(
-          "get_frame",
-          [](py::object obj, int32_t frame) {
-            auto *self = obj.cast<PyClass *>();
-            const float *f = self->GetFrame(frame);
-            return py::array_t<float>({self->Dim()},    // shape
-                                      {sizeof(float)},  // stride in bytes
-                                      f,                // ptr
-                                      obj);  // it will increase the reference
-                                             // count of **this** vector
-          },
-          py::arg("frame"))
-      .def(
-          "accept_waveform",
-          [](PyClass &self, float sampling_rate,
-             const std::vector<float> &waveform) {
-            self.AcceptWaveform(sampling_rate, waveform.data(),
-                                waveform.size());
-          },
-          py::arg("sampling_rate"), py::arg("waveform"),
-          py::call_guard<py::gil_scoped_release>())
-      .def("input_finished", &PyClass::InputFinished);
-}
-
-void PybindOnlineFeature(py::module &m) {  // NOLINT
-  PybindOnlineFeatureTpl<FbankComputer>(m, "OnlineFbank");
-}
-
-}  // namespace knf

+ 0 - 30
ggml/examples/kaldi-native-fbank/python/csrc/online-feature.h

@@ -1,30 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
-
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-namespace knf {
-
-void PybindOnlineFeature(py::module &m);  // NOLINT
-
-}  // namespace knf
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_

+ 0 - 134
ggml/examples/kaldi-native-fbank/python/csrc/utils.cc

@@ -1,134 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/utils.h"
-
-#include <string>
-
-#include "feature-window.h"
-
-#define FROM_DICT(type, key)         \
-  if (dict.contains(#key)) {         \
-    opts.key = py::type(dict[#key]); \
-  }
-
-#define AS_DICT(key) dict[#key] = opts.key
-
-namespace knf {
-
-FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict) {
-  FrameExtractionOptions opts;
-
-  FROM_DICT(float_, samp_freq);
-  FROM_DICT(float_, frame_shift_ms);
-  FROM_DICT(float_, frame_length_ms);
-  FROM_DICT(float_, dither);
-  FROM_DICT(float_, preemph_coeff);
-  FROM_DICT(bool_, remove_dc_offset);
-  FROM_DICT(str, window_type);
-  FROM_DICT(bool_, round_to_power_of_two);
-  FROM_DICT(float_, blackman_coeff);
-  FROM_DICT(bool_, snip_edges);
-
-  return opts;
-}
-
-py::dict AsDict(const FrameExtractionOptions &opts) {
-  py::dict dict;
-
-  AS_DICT(samp_freq);
-  AS_DICT(frame_shift_ms);
-  AS_DICT(frame_length_ms);
-  AS_DICT(dither);
-  AS_DICT(preemph_coeff);
-  AS_DICT(remove_dc_offset);
-  AS_DICT(window_type);
-  AS_DICT(round_to_power_of_two);
-  AS_DICT(blackman_coeff);
-  AS_DICT(snip_edges);
-
-  return dict;
-}
-
-MelBanksOptions MelBanksOptionsFromDict(py::dict dict) {
-  MelBanksOptions opts;
-
-  FROM_DICT(int_, num_bins);
-  FROM_DICT(float_, low_freq);
-  FROM_DICT(float_, high_freq);
-  FROM_DICT(float_, vtln_low);
-  FROM_DICT(float_, vtln_high);
-  FROM_DICT(bool_, debug_mel);
-  FROM_DICT(bool_, htk_mode);
-
-  return opts;
-}
-py::dict AsDict(const MelBanksOptions &opts) {
-  py::dict dict;
-
-  AS_DICT(num_bins);
-  AS_DICT(low_freq);
-  AS_DICT(high_freq);
-  AS_DICT(vtln_low);
-  AS_DICT(vtln_high);
-  AS_DICT(debug_mel);
-  AS_DICT(htk_mode);
-
-  return dict;
-}
-
-FbankOptions FbankOptionsFromDict(py::dict dict) {
-  FbankOptions opts;
-
-  if (dict.contains("frame_opts")) {
-    opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
-  }
-
-  if (dict.contains("mel_opts")) {
-    opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
-  }
-
-  FROM_DICT(bool_, use_energy);
-  FROM_DICT(float_, energy_floor);
-  FROM_DICT(bool_, raw_energy);
-  FROM_DICT(bool_, htk_compat);
-  FROM_DICT(bool_, use_log_fbank);
-  FROM_DICT(bool_, use_power);
-
-  return opts;
-}
-
-py::dict AsDict(const FbankOptions &opts) {
-  py::dict dict;
-
-  dict["frame_opts"] = AsDict(opts.frame_opts);
-  dict["mel_opts"] = AsDict(opts.mel_opts);
-  AS_DICT(use_energy);
-  AS_DICT(energy_floor);
-  AS_DICT(raw_energy);
-  AS_DICT(htk_compat);
-  AS_DICT(use_log_fbank);
-  AS_DICT(use_power);
-
-  return dict;
-}
-
-#undef FROM_DICT
-#undef AS_DICT
-
-}  // namespace knf

+ 0 - 52
ggml/examples/kaldi-native-fbank/python/csrc/utils.h

@@ -1,52 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
-
-#include "feature-fbank.h"
-#include "feature-window.h"
-#include "mel-computations.h"
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-/*
- * This file contains code about `from_dict` and
- * `as_dict` for various options in kaldi-native-fbank.
- *
- * Regarding `from_dict`, users don't need to provide
- * all the fields in the options. If some fields
- * are not provided, it just uses the default one.
- *
- * If the provided dict in `from_dict` is empty,
- * all fields use their default values.
- */
-
-namespace knf {
-
-FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
-py::dict AsDict(const FrameExtractionOptions &opts);
-
-MelBanksOptions MelBanksOptionsFromDict(py::dict dict);
-py::dict AsDict(const MelBanksOptions &opts);
-
-FbankOptions FbankOptionsFromDict(py::dict dict);
-py::dict AsDict(const FbankOptions &opts);
-
-}  // namespace knf
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_

+ 0 - 6
ggml/examples/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py

@@ -1,6 +0,0 @@
-from _kaldi_native_fbank import (
-    FrameExtractionOptions,
-    MelBanksOptions,
-    OnlineFbank,
-    FbankOptions,
-)

+ 0 - 31
ggml/examples/kaldi-native-fbank/python/tests/CMakeLists.txt

@@ -1,31 +0,0 @@
-function(kaldi_native_fbank_add_py_test source)
-  get_filename_component(name ${source} NAME_WE)
-  set(name "${name}_py")
-
-    message(STATUS "source: ${source}")
-
-  add_test(NAME ${name}
-    COMMAND
-      "${PYTHON_EXECUTABLE}"
-      "${CMAKE_CURRENT_SOURCE_DIR}/${source}"
-  )
-
-  get_filename_component(kaldi_native_fbank_path ${CMAKE_CURRENT_LIST_DIR} DIRECTORY)
-
-  set_property(TEST ${name}
-    PROPERTY ENVIRONMENT "PYTHONPATH=${kaldi_native_fbank_path}:$<TARGET_FILE_DIR:_kaldi_native_fbank>:$ENV{PYTHONPATH}"
-  )
-endfunction()
-
-# please sort the files in alphabetic order
-set(py_test_files
-  test_frame_extraction_options.py
-  test_mel_bank_options.py
-  test_fbank_options.py
-)
-
-if(KALDI_NATIVE_FBANK_BUILD_TESTS)
-  foreach(source IN LISTS py_test_files)
-    kaldi_native_fbank_add_py_test(${source})
-  endforeach()
-endif()

+ 0 - 198
ggml/examples/kaldi-native-fbank/python/tests/test_fbank_options.py

@@ -1,198 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
-
-
-import pickle
-
-import kaldi_native_fbank as knf
-
-
-def test_default():
-    opts = knf.FbankOptions()
-    assert opts.frame_opts.samp_freq == 16000
-    assert opts.frame_opts.frame_shift_ms == 10.0
-    assert opts.frame_opts.frame_length_ms == 25.0
-    assert opts.frame_opts.dither == 1.0
-    assert abs(opts.frame_opts.preemph_coeff - 0.97) < 1e-6
-    assert opts.frame_opts.remove_dc_offset is True
-    assert opts.frame_opts.window_type == "povey"
-    assert opts.frame_opts.round_to_power_of_two is True
-    assert abs(opts.frame_opts.blackman_coeff - 0.42) < 1e-6
-    assert opts.frame_opts.snip_edges is True
-
-    assert opts.mel_opts.num_bins == 23
-    assert opts.mel_opts.low_freq == 20
-    assert opts.mel_opts.high_freq == 0
-    assert opts.mel_opts.vtln_low == 100
-    assert opts.mel_opts.vtln_high == -500
-    assert opts.mel_opts.debug_mel is False
-    assert opts.mel_opts.htk_mode is False
-
-    assert opts.use_energy is False
-    assert opts.energy_floor == 0.0
-    assert opts.raw_energy is True
-    assert opts.htk_compat is False
-    assert opts.use_log_fbank is True
-    assert opts.use_power is True
-
-
-def test_set_get():
-    opts = knf.FbankOptions()
-    opts.use_energy = True
-    assert opts.use_energy is True
-
-    opts.energy_floor = 1
-    assert opts.energy_floor == 1
-
-    opts.raw_energy = False
-    assert opts.raw_energy is False
-
-    opts.htk_compat = True
-    assert opts.htk_compat is True
-
-    opts.use_log_fbank = False
-    assert opts.use_log_fbank is False
-
-    opts.use_power = False
-    assert opts.use_power is False
-
-
-def test_set_get_frame_opts():
-    opts = knf.FbankOptions()
-
-    opts.frame_opts.samp_freq = 44100
-    assert opts.frame_opts.samp_freq == 44100
-
-    opts.frame_opts.frame_shift_ms = 20.5
-    assert opts.frame_opts.frame_shift_ms == 20.5
-
-    opts.frame_opts.frame_length_ms = 1
-    assert opts.frame_opts.frame_length_ms == 1
-
-    opts.frame_opts.dither = 0.5
-    assert opts.frame_opts.dither == 0.5
-
-    opts.frame_opts.preemph_coeff = 0.25
-    assert opts.frame_opts.preemph_coeff == 0.25
-
-    opts.frame_opts.remove_dc_offset = False
-    assert opts.frame_opts.remove_dc_offset is False
-
-    opts.frame_opts.window_type = "hanning"
-    assert opts.frame_opts.window_type == "hanning"
-
-    opts.frame_opts.round_to_power_of_two = False
-    assert opts.frame_opts.round_to_power_of_two is False
-
-    opts.frame_opts.blackman_coeff = 0.25
-    assert opts.frame_opts.blackman_coeff == 0.25
-
-    opts.frame_opts.snip_edges = False
-    assert opts.frame_opts.snip_edges is False
-
-
-def test_set_get_mel_opts():
-    opts = knf.FbankOptions()
-
-    opts.mel_opts.num_bins = 100
-    assert opts.mel_opts.num_bins == 100
-
-    opts.mel_opts.low_freq = 22
-    assert opts.mel_opts.low_freq == 22
-
-    opts.mel_opts.high_freq = 1
-    assert opts.mel_opts.high_freq == 1
-
-    opts.mel_opts.vtln_low = 101
-    assert opts.mel_opts.vtln_low == 101
-
-    opts.mel_opts.vtln_high = -100
-    assert opts.mel_opts.vtln_high == -100
-
-    opts.mel_opts.debug_mel = True
-    assert opts.mel_opts.debug_mel is True
-
-    opts.mel_opts.htk_mode = True
-    assert opts.mel_opts.htk_mode is True
-
-
-def test_from_empty_dict():
-    opts = knf.FbankOptions.from_dict({})
-    opts2 = knf.FbankOptions()
-
-    assert str(opts) == str(opts2)
-
-
-def test_from_dict_partial():
-    d = {
-        "energy_floor": 10.5,
-        "htk_compat": True,
-        "mel_opts": {"num_bins": 80, "vtln_low": 1},
-        "frame_opts": {"window_type": "hanning"},
-    }
-    opts = knf.FbankOptions.from_dict(d)
-    assert opts.energy_floor == 10.5
-    assert opts.htk_compat is True
-    assert opts.mel_opts.num_bins == 80
-    assert opts.mel_opts.vtln_low == 1
-    assert opts.frame_opts.window_type == "hanning"
-
-    mel_opts = knf.MelBanksOptions.from_dict(d["mel_opts"])
-    assert str(opts.mel_opts) == str(mel_opts)
-
-
-def test_from_dict_full_and_as_dict():
-    opts = knf.FbankOptions()
-    opts.htk_compat = True
-    opts.mel_opts.num_bins = 80
-    opts.frame_opts.samp_freq = 10
-
-    d = opts.as_dict()
-    assert d["htk_compat"] is True
-    assert d["mel_opts"]["num_bins"] == 80
-    assert d["frame_opts"]["samp_freq"] == 10
-
-    mel_opts = knf.MelBanksOptions()
-    mel_opts.num_bins = 80
-    assert d["mel_opts"] == mel_opts.as_dict()
-
-    frame_opts = knf.FrameExtractionOptions()
-    frame_opts.samp_freq = 10
-    assert d["frame_opts"] == frame_opts.as_dict()
-
-    opts2 = knf.FbankOptions.from_dict(d)
-    assert str(opts2) == str(opts)
-
-    d["htk_compat"] = False
-    opts3 = knf.FbankOptions.from_dict(d)
-    assert opts3.htk_compat is False
-
-
-def test_pickle():
-    opts = knf.FbankOptions()
-    opts.use_energy = True
-    opts.use_power = False
-
-    opts.frame_opts.samp_freq = 44100
-    opts.mel_opts.num_bins = 100
-
-    data = pickle.dumps(opts)
-
-    opts2 = pickle.loads(data)
-    assert str(opts) == str(opts2)
-
-
-def main():
-    test_default()
-    test_set_get()
-    test_set_get_frame_opts()
-    test_set_get_mel_opts()
-    test_from_empty_dict()
-    test_from_dict_partial()
-    test_from_dict_full_and_as_dict()
-    test_pickle()
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 119
ggml/examples/kaldi-native-fbank/python/tests/test_frame_extraction_options.py

@@ -1,119 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
-
-import pickle
-
-import kaldi_native_fbank as knf
-
-
-def test_default():
-    opts = knf.FrameExtractionOptions()
-    assert opts.samp_freq == 16000
-    assert opts.frame_shift_ms == 10.0
-    assert opts.frame_length_ms == 25.0
-    assert opts.dither == 1.0
-    assert abs(opts.preemph_coeff - 0.97) < 1e-6
-    assert opts.remove_dc_offset is True
-    assert opts.window_type == "povey"
-    assert opts.round_to_power_of_two is True
-    assert abs(opts.blackman_coeff - 0.42) < 1e-6
-    assert opts.snip_edges is True
-
-
-def test_set_get():
-    opts = knf.FrameExtractionOptions()
-    opts.samp_freq = 44100
-    assert opts.samp_freq == 44100
-
-    opts.frame_shift_ms = 20.5
-    assert opts.frame_shift_ms == 20.5
-
-    opts.frame_length_ms = 1
-    assert opts.frame_length_ms == 1
-
-    opts.dither = 0.5
-    assert opts.dither == 0.5
-
-    opts.preemph_coeff = 0.25
-    assert opts.preemph_coeff == 0.25
-
-    opts.remove_dc_offset = False
-    assert opts.remove_dc_offset is False
-
-    opts.window_type = "hanning"
-    assert opts.window_type == "hanning"
-
-    opts.round_to_power_of_two = False
-    assert opts.round_to_power_of_two is False
-
-    opts.blackman_coeff = 0.25
-    assert opts.blackman_coeff == 0.25
-
-    opts.snip_edges = False
-    assert opts.snip_edges is False
-
-
-def test_from_empty_dict():
-    opts = knf.FrameExtractionOptions.from_dict({})
-    opts2 = knf.FrameExtractionOptions()
-
-    assert str(opts) == str(opts2)
-
-
-def test_from_dict_partial():
-    d = {"samp_freq": 10, "frame_shift_ms": 2}
-
-    opts = knf.FrameExtractionOptions.from_dict(d)
-
-    opts2 = knf.FrameExtractionOptions()
-    assert str(opts) != str(opts2)
-
-    opts2.samp_freq = 10
-    assert str(opts) != str(opts2)
-
-    opts2.frame_shift_ms = 2
-    assert str(opts) == str(opts2)
-
-    opts2.frame_shift_ms = 3
-    assert str(opts) != str(opts2)
-
-
-def test_from_dict_full_and_as_dict():
-    opts = knf.FrameExtractionOptions()
-    opts.samp_freq = 20
-    opts.frame_length_ms = 100
-
-    d = opts.as_dict()
-    for key, value in d.items():
-        assert value == getattr(opts, key)
-
-    opts2 = knf.FrameExtractionOptions.from_dict(d)
-    assert str(opts2) == str(opts)
-
-    d["window_type"] = "hanning"
-    opts3 = knf.FrameExtractionOptions.from_dict(d)
-    assert opts3.window_type == "hanning"
-
-
-def test_pickle():
-    opts = knf.FrameExtractionOptions()
-    opts.samp_freq = 44100
-    opts.dither = 5.5
-    data = pickle.dumps(opts)
-
-    opts2 = pickle.loads(data)
-    assert str(opts) == str(opts2)
-
-
-def main():
-    test_default()
-    test_set_get()
-    test_from_empty_dict()
-    test_from_dict_partial()
-    test_from_dict_full_and_as_dict()
-    test_pickle()
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 107
ggml/examples/kaldi-native-fbank/python/tests/test_mel_bank_options.py

@@ -1,107 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
-
-import pickle
-
-import kaldi_native_fbank as knf
-
-
-def test_default():
-    opts = knf.MelBanksOptions()
-    assert opts.num_bins == 25
-    assert opts.low_freq == 20
-    assert opts.high_freq == 0
-    assert opts.vtln_low == 100
-    assert opts.vtln_high == -500
-    assert opts.debug_mel is False
-    assert opts.htk_mode is False
-
-
-def test_set_get():
-    opts = knf.MelBanksOptions()
-    opts.num_bins = 100
-    assert opts.num_bins == 100
-
-    opts.low_freq = 22
-    assert opts.low_freq == 22
-
-    opts.high_freq = 1
-    assert opts.high_freq == 1
-
-    opts.vtln_low = 101
-    assert opts.vtln_low == 101
-
-    opts.vtln_high = -100
-    assert opts.vtln_high == -100
-
-    opts.debug_mel = True
-    assert opts.debug_mel is True
-
-    opts.htk_mode = True
-    assert opts.htk_mode is True
-
-
-def test_from_empty_dict():
-    opts = knf.MelBanksOptions.from_dict({})
-    opts2 = knf.MelBanksOptions()
-
-    assert str(opts) == str(opts2)
-
-
-def test_from_dict_partial():
-    d = {"num_bins": 10, "debug_mel": True}
-
-    opts = knf.MelBanksOptions.from_dict(d)
-
-    opts2 = knf.MelBanksOptions()
-    assert str(opts) != str(opts2)
-
-    opts2.num_bins = 10
-    assert str(opts) != str(opts2)
-
-    opts2.debug_mel = True
-    assert str(opts) == str(opts2)
-
-    opts2.debug_mel = False
-    assert str(opts) != str(opts2)
-
-
-def test_from_dict_full_and_as_dict():
-    opts = knf.MelBanksOptions()
-    opts.num_bins = 80
-    opts.vtln_high = 2
-
-    d = opts.as_dict()
-    for key, value in d.items():
-        assert value == getattr(opts, key)
-
-    opts2 = knf.MelBanksOptions.from_dict(d)
-    assert str(opts2) == str(opts)
-
-    d["htk_mode"] = True
-    opts3 = knf.MelBanksOptions.from_dict(d)
-    assert opts3.htk_mode is True
-
-
-def test_pickle():
-    opts = knf.MelBanksOptions()
-    opts.num_bins = 100
-    opts.low_freq = 22
-    data = pickle.dumps(opts)
-
-    opts2 = pickle.loads(data)
-    assert str(opts) == str(opts2)
-
-
-def main():
-    test_default()
-    test_set_get()
-    test_from_empty_dict()
-    test_from_dict_partial()
-    test_from_dict_full_and_as_dict()
-    test_pickle()
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 48
ggml/examples/kaldi-native-fbank/python/tests/test_online_fbank.py

@@ -1,48 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-
-try:
-    import kaldifeat
-except:
-    print("Please install kaldifeat first")
-    sys.exit(0)
-
-import kaldi_native_fbank as knf
-import torch
-
-
-def main():
-    sampling_rate = 16000
-    samples = torch.randn(16000 * 10)
-
-    opts = kaldifeat.FbankOptions()
-    opts.frame_opts.dither = 0
-    opts.mel_opts.num_bins = 80
-    opts.frame_opts.snip_edges = False
-    opts.mel_opts.debug_mel = False
-
-    online_fbank = kaldifeat.OnlineFbank(opts)
-
-    online_fbank.accept_waveform(sampling_rate, samples)
-
-    opts = knf.FbankOptions()
-    opts.frame_opts.dither = 0
-    opts.mel_opts.num_bins = 80
-    opts.frame_opts.snip_edges = False
-    opts.mel_opts.debug_mel = False
-
-    fbank = knf.OnlineFbank(opts)
-    fbank.accept_waveform(sampling_rate, samples.tolist())
-
-    assert online_fbank.num_frames_ready == fbank.num_frames_ready
-    for i in range(fbank.num_frames_ready):
-        f1 = online_fbank.get_frame(i)
-        f2 = torch.from_numpy(fbank.get_frame(i))
-        assert torch.allclose(f1, f2, atol=1e-3), (i, (f1 - f2).abs().max())
-
-
-if __name__ == "__main__":
-    torch.manual_seed(20220825)
-    main()
-    print("success")

BIN
ggml/test_data/test.wav


+ 50 - 178
ggml/test_unity_cpp.py

@@ -1,20 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
 import ctypes
 import functools
-import logging
-import sys
 from ctypes import c_void_p
 from pathlib import Path
 from typing import Any, Iterator, List, Tuple
 
 import fairseq2.nn
 import fairseq2.nn.transformer
-from fairseq2.nn.padding import PaddingMask
 import numpy as np
 import pytest
 import torch
 import torchaudio
 from fairseq2.data.audio import WaveformToFbankConverter
-from fairseq2.generation import SequenceGeneratorOptions
+from seamless_communication.inference import SequenceGeneratorOptions
 from fairseq2.models.wav2vec2.feature_extractor import Wav2Vec2FbankFeatureExtractor
 from seamless_communication.inference.translator import Modality, Translator
 
@@ -22,6 +25,7 @@ import ggml
 from ctypes_utils import NULLPTR, Ptr
 from ggml import NativeObj
 from ggml_convert import convert_model, read_layer_config
+import requests
 
 Ctx = ggml.ggml_context_p
 
@@ -32,11 +36,10 @@ FAIRSEQ2_CPP = Path(__file__).parent / "examples/unity/fairseq2.cpp"
 UNITY_FLASH_ATTN = "\n# define UNITY_FLASH_ATTN 0\n" not in FAIRSEQ2_CPP.read_text()
 
 DATA = Path(__file__).parent / "test_data"
-DATA_DEV = DATA / "dev"
-if not DATA_DEV.exists():
-    DATA_DEV = Path(
-        "/private/home/dnn/internal_sc/seamless_communication/ggml/examples/unity/dev"
-    )
+LOCAL_AUDIO_SAMPLE_PATH = DATA / "LJ037-0171_sr16k.wav"
+TEST_AUDIO_SAMPLE_URL = (
+    "https://dl.fbaipublicfiles.com/seamless/tests/LJ037-0171_sr16k.wav"
+)
 
 
 @pytest.fixture(name="ctx")
@@ -74,6 +77,14 @@ def load_pt_model() -> Any:
     return load_translator().model
 
 
+def download_sample_audio() -> Any:
+    response = requests.get(TEST_AUDIO_SAMPLE_URL, stream=True)
+    with open(DATA / "LJ037-0171_sr16k.wav", "wb") as file:
+        for chunk in response.iter_content(chunk_size=1024):
+            if chunk:
+                file.write(chunk)
+
+
 def test_convert_linear(tmp_path: Path) -> None:
     module = fairseq2.nn.Linear(16, 24, True)
 
@@ -352,9 +363,6 @@ def test_StandardTransformerEncoderLayer_forward(ctx: Ctx, g_model: c_void_p) ->
 
     gx = ggml.from_numpy(ctx, x)
     ggml.ggml_set_name(gx, b"x")
-    padding_mask = fairseq2.nn.padding.PaddingMask(torch.tensor([21, 21]), 21)
-    gpad = ggml.from_numpy(ctx, padding_mask.materialize())
-    ggml.ggml_set_name(gpad, b"padding_mask")
     gy = ggml.forward(
         "StandardTransformerEncoderLayer",
         g_model,
@@ -376,17 +384,11 @@ def test_StandardTransformerEncoderLayer_forward(ctx: Ctx, g_model: c_void_p) ->
 
 def test_StandardConformerEncoderLayer_forward(ctx: Ctx, g_model: c_void_p) -> None:
     pt_model = load_pt_model()
-    if not DATA_DEV.exists():
-        pytest.skip(reason=f"Folder {DATA_DEV} not found !")
-
-    x = torch.load(DATA_DEV / "seqs_before_conformer_block.pt")
-    padding_mask = PaddingMask(torch.ones(1, x.shape[1]), x.shape[1])
+    x = torch.rand(1, 137, 1024)
 
     layer = pt_model.speech_encoder.inner.layers[0]
     gx = ggml.from_numpy(ctx, x[0])
     ggml.ggml_set_name(gx, b"x")
-    gpad = ggml.from_numpy(ctx, padding_mask[0])
-    ggml.ggml_set_name(gpad, b"padding_mask")
     gy = ggml.forward(
         "StandardConformerEncoderLayer",
         g_model,
@@ -399,8 +401,8 @@ def test_StandardConformerEncoderLayer_forward(ctx: Ctx, g_model: c_void_p) -> N
 
     y = ggml.to_numpy(gy)
 
-    y_exp, _ = layer(x, padding_mask)
-    y_exp = y_exp.numpy()
+    y_exp, _ = layer(x, padding_mask=None)
+    y_exp = y_exp.squeeze(0).numpy()
     assert y.shape == y_exp.shape
     assert np.allclose(y_exp, y, atol=2e-3)
 
@@ -409,10 +411,8 @@ def test_StandardConformerEncoderAdaptorLayer_forward(
     ctx: Ctx, g_model: c_void_p
 ) -> None:
     pt_model = load_pt_model()
-    if not DATA_DEV.exists():
-        pytest.skip(reason=f"Folder {DATA_DEV} not found !")
-
-    x = torch.load(DATA_DEV / "seqs_before_adaptor.pt")
+    torch.random.manual_seed(0)
+    x = torch.rand(1, 137, 1024)
     layer = pt_model.speech_encoder.adaptor_layers[0]
     gx = ggml.from_numpy(ctx, x[0])
     ggml.ggml_set_name(gx, b"x")
@@ -467,7 +467,9 @@ def test_StandardTransformerEncoder_forward(ctx: Ctx, g_model: c_void_p) -> None
 
 def test_StandardConformerEncoder_forward(ctx: Ctx, g_model: c_void_p) -> None:
     pt_model = load_pt_model()
-    wav, _ = torchaudio.load(DATA / "test.wav")
+    if not LOCAL_AUDIO_SAMPLE_PATH.exists():
+        download_sample_audio()
+    wav, _ = torchaudio.load(LOCAL_AUDIO_SAMPLE_PATH)
     gx = ggml.from_numpy(ctx, wav * 2**15)  # Apply scale before sending into ggml!
     ggml.ggml_set_name(gx, b"x")
     gy = ggml.forward(
@@ -508,13 +510,10 @@ def test_StandardConformerEncoder_forward(ctx: Ctx, g_model: c_void_p) -> None:
         y_exp = np.load(cache)
 
     assert y.shape == y_exp.shape
-    assert np.allclose(
-        y_exp, y, atol=1e-2
-    )  # There are 10 elements in a 137*1024 tensor with error >1e-2
+    assert np.allclose(y_exp, y, atol=1e-2)
 
 
 def test_WaveformToFbank_forward(ctx: Ctx, g_model: c_void_p) -> None:
-    pt_model = load_pt_model()
     converter = WaveformToFbankConverter(
         num_mel_bins=80,
         waveform_scale=2**15,
@@ -522,7 +521,9 @@ def test_WaveformToFbank_forward(ctx: Ctx, g_model: c_void_p) -> None:
         standardize=True,
     )
     extractor = Wav2Vec2FbankFeatureExtractor(80, stride=2, sample_every_k=1)
-    wav, _ = torchaudio.load(DATA / "LJ037-0171_sr16k_test.wav")
+    if not LOCAL_AUDIO_SAMPLE_PATH.exists():
+        download_sample_audio()
+    wav, _ = torchaudio.load(LOCAL_AUDIO_SAMPLE_PATH)
     gx = ggml.from_numpy(ctx, wav * 2**15)  # Apply scale before sending into ggml!
     ggml.ggml_set_name(gx, b"x")
 
@@ -642,117 +643,13 @@ def test_StandardTransformerDecoder_forward(ctx: Ctx, g_model: c_void_p) -> None
     assert np.allclose(y_exp, y, atol=1e-4 if UNITY_FLASH_ATTN else 1e-3)
 
 
-def test_tokenizer(ctx: Ctx) -> None:
-    tokenizer = unity.load_unity_text_tokenizer("seamlessM4T_medium")
-    enc = tokenizer.create_encoder(task="translation", lang="eng", mode="source")
-
-    spm_path = DATA / "seamlessM4T_medium.spm.ggml"
-    # if not spm_path.exists():
-    if True:
-        vocab = ggml_convert.read_vocab(tokenizer)
-        ggml_convert.write_ggml_file(spm_path, {"spm_vocab_only": True}, {}, vocab, {})
-
-    g_model = ggml.load_fairseq2_ggml_file(spm_path)
-    ggml.lib.fairseq2_model_set_inference_ctx(g_model.ptr, ctx)
-
-    expected = enc("We are all in a yellow submarine.").tolist()[1:]
-    tokens = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_I32, 256)
-    ggml.fairseq2_spm_tokenize(
-        g_model.ptr, b"We are all in a yellow submarine.", tokens
-    )
-    res = ggml.to_numpy(tokens).tolist()
-    assert expected == res
-
-    out = ctypes.create_string_buffer(144)
-    ggml.fairseq2_spm_detokenize(g_model.ptr, tokens, out)
-    assert ctypes.string_at(out) == b"We are all in a yellow submarine."
-
-
-def test_t2tt(ctx: Ctx, g_model: c_void_p) -> None:
-    src_lang = "eng"
-    src_text = "We are all in a yellow submarine."
-    tgt_lang = "fra"
-    sample_file = DATA / "sample_input.npz"
-    beam_size = 2
-
-    if not sample_file.exists():
-        translator = load_translator()
-        device = translator.device
-        token_encoder = translator.text_tokenizer.create_encoder(
-            task="translation", lang=src_lang, mode="source", device=device
-        )
-        src = translator.collate(token_encoder(src_text))
-
-        text_out, _ = translator.get_prediction(
-            translator.model,
-            translator.text_tokenizer,
-            translator.unit_tokenizer,
-            src["seqs"],
-            None,
-            input_modality=Modality.TEXT,
-            output_modality=Modality.TEXT,
-            tgt_lang=tgt_lang,
-            text_generation_opts=SequenceGeneratorOptions(beam_size=beam_size),
-            unit_generation_opts=None,
-        )
-
-        tgt_text = str(text_out.sentences[0])
-        assert tgt_text == "Nous sommes tous dans un sous-marin jaune."
-        hypotheses = [
-            {
-                "seq": h.seq.tolist(),
-                "score": h.score.item(),
-                "step_scores": h.step_scores.numpy(),
-            }
-            for h in text_out.generator_output.results[0]
-        ]
-        np.savez(
-            sample_file,
-            encoder_output=text_out.encoder_output.numpy(),
-            hypotheses=hypotheses,
-        )
-
-    # allow_pickle to load the hyp dicts
-    text_out = np.load(sample_file, allow_pickle=True)
-    encoder_out = ggml.from_numpy(ctx, text_out["encoder_output"])
-    prefix_seq = np.array(text_out["hypotheses"][0]["seq"][:2]).astype(np.int32)
-    max_seq_len = max(len(h["seq"]) for h in text_out["hypotheses"])
-
-    opts = ggml.SequenceGeneratorOptions(
-        beam_size=beam_size,
-        min_seq_len=1,
-        soft_max_seq_len_a=1,
-        soft_max_seq_len_b=200,
-        hard_max_seq_len=int(max_seq_len * 1.5),
-        len_penalty=1.0,
-        unk_penalty=0.0,
-        normalize_scores=True,
-    )
-    job = ggml.SequenceGeneratorJob(
-        opts=opts,
-        prefix_seq=ggml.from_numpy(ctx, prefix_seq),
-        pad_idx=0,
-        unk_idx=1,
-        bos_idx=2,
-        eos_idx=3,
-        num_threads=16,
-    )
-
-    result_ptr = ggml.generate_sequence(g_model, job, encoder_out, NULLPTR, ctx)
-    results = [result_ptr[i] for i in range(beam_size) if result_ptr[i].seq != None]
-
-    # The step score error is big, this may negatively impact the beam search.
-    assert_hypotheses(
-        text_out["hypotheses"], results, score_rtol=1e-2, step_scores_rtol=0.1
-    )
-
-
 def test_s2tt(ctx: Ctx, g_model: c_void_p):
-    src_audio_wav, _ = torchaudio.load(DATA / "test.wav")
-    sample_file = DATA / "test.wav.npz"
+    if not LOCAL_AUDIO_SAMPLE_PATH.exists():
+        download_sample_audio()
+    src_audio_wav, _ = torchaudio.load(LOCAL_AUDIO_SAMPLE_PATH)
+    sample_file = DATA / "LJ037-0171_sr16k.wav.trans"
+    translator = load_translator()
     if not sample_file.exists():
-        translator = load_translator()
-        token_encoder = translator.text_tokenizer.create_encoder(task="translation")
         decoded_audio = {
             "waveform": src_audio_wav.t(),
             "sample_rate": 16000.0,
@@ -773,27 +670,13 @@ def test_s2tt(ctx: Ctx, g_model: c_void_p):
             unit_generation_opts=None,
         )
 
-        tgt_text = str(text_out.sentences[0])
-        assert tgt_text == "大家好 , 世界无主题。"
-        hypotheses = [
-            {
-                "seq": h.seq.tolist(),
-                "score": h.score.item(),
-                "step_scores": h.step_scores.numpy(),
-            }
-            for h in text_out.generator_output.results[0]
-        ]
-        np.savez(
-            sample_file,
-            encoder_output=text_out.encoder_output.numpy(),
-            hypotheses=hypotheses,
-        )
+        tgt_text = str(text_out[0])
+        assert tgt_text == "专家的检查和证据使该委员会得出了结论,可能有五次枪击."
+        with open(sample_file, "w") as f:
+            f.write(tgt_text)
 
-    exp = np.load(sample_file, allow_pickle=True)
-    encoder_out = ggml.from_numpy(ctx, exp["encoder_output"])
-    tgt_tokens = exp["hypotheses"][0]["seq"]
-    max_seq_len = max(len(h["seq"]) for h in exp["hypotheses"])
-    max_seq_len = int(max_seq_len * 1.5)
+    with open(sample_file, "r") as exp:
+        exp_tgt_text = exp.readlines()[0].strip()
 
     # Apply scale before sending into ggml!
     gx = ggml.from_numpy(ctx, src_audio_wav * 2**15)
@@ -813,7 +696,7 @@ def test_s2tt(ctx: Ctx, g_model: c_void_p):
         beam_size=beam_size,
         soft_max_seq_len_a=1,
         soft_max_seq_len_b=200,
-        hard_max_seq_len=max_seq_len,
+        hard_max_seq_len=500,
     )
     job = ggml.SequenceGeneratorJob(
         opts=opts,
@@ -825,20 +708,9 @@ def test_s2tt(ctx: Ctx, g_model: c_void_p):
     )
     result_ptr = ggml.generate_sequence(g_model, Ptr(job), encoder_out, NULLPTR, ctx)
     results = [result_ptr[i] for i in range(beam_size) if result_ptr[i].seq != None]
-    assert_hypotheses(exp["hypotheses"], results, score_rtol=1e-2, step_scores_rtol=0.1)
-
-
-def assert_hypotheses(
-    expected: List[Any],
-    results: List[Any],
-    *,
-    score_rtol: float,
-    step_scores_rtol: float,
-) -> None:
-    assert len(results) == len(expected)
-    for g_hyp, exp in zip(results, expected):
-        g_tokens = list(ggml.to_numpy(g_hyp.seq))
-        g_step_scores = ggml.to_numpy(g_hyp.step_scores)
-        assert g_tokens == exp["seq"]
-        assert g_hyp.score == pytest.approx(exp["score"], rel=score_rtol)
-        assert np.allclose(g_step_scores, exp["step_scores"], rtol=step_scores_rtol)
+    tokens = [
+        translator.text_tokenizer.model.index_to_token(id)
+        for id in ggml.to_numpy(results[0].seq).tolist()
+    ][2:-1]
+    tokens = "".join(tokens).replace("▁", " ")[1:]
+    assert tokens == exp_tgt_text