Эх сурвалжийг харах

Fix tests in unity.cpp & cleanup (#204)

* update paths to s3

* Fix & clean up test_unity.cpp.py

* revert unnecessary changes

* delete local test.wav

* remove python wrapper of KNF

* remove test audio sample
Ning 1 жил өмнө
parent
commit
302b501515
23 өөрчлөгдсөн 50 нэмэгдсэн , 1336 устгасан
  1. BIN
      ggml/LJ037-0171_sr16k_test.wav
  2. 0 2
      ggml/examples/kaldi-native-fbank/python/CMakeLists.txt
  3. 0 28
      ggml/examples/kaldi-native-fbank/python/csrc/CMakeLists.txt
  4. 0 57
      ggml/examples/kaldi-native-fbank/python/csrc/feature-fbank.cc
  5. 0 30
      ggml/examples/kaldi-native-fbank/python/csrc/feature-fbank.h
  6. 0 66
      ggml/examples/kaldi-native-fbank/python/csrc/feature-window.cc
  7. 0 30
      ggml/examples/kaldi-native-fbank/python/csrc/feature-window.h
  8. 0 37
      ggml/examples/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc
  9. 0 27
      ggml/examples/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h
  10. 0 58
      ggml/examples/kaldi-native-fbank/python/csrc/mel-computations.cc
  11. 0 30
      ggml/examples/kaldi-native-fbank/python/csrc/mel-computations.h
  12. 0 68
      ggml/examples/kaldi-native-fbank/python/csrc/online-feature.cc
  13. 0 30
      ggml/examples/kaldi-native-fbank/python/csrc/online-feature.h
  14. 0 134
      ggml/examples/kaldi-native-fbank/python/csrc/utils.cc
  15. 0 52
      ggml/examples/kaldi-native-fbank/python/csrc/utils.h
  16. 0 6
      ggml/examples/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py
  17. 0 31
      ggml/examples/kaldi-native-fbank/python/tests/CMakeLists.txt
  18. 0 198
      ggml/examples/kaldi-native-fbank/python/tests/test_fbank_options.py
  19. 0 119
      ggml/examples/kaldi-native-fbank/python/tests/test_frame_extraction_options.py
  20. 0 107
      ggml/examples/kaldi-native-fbank/python/tests/test_mel_bank_options.py
  21. 0 48
      ggml/examples/kaldi-native-fbank/python/tests/test_online_fbank.py
  22. BIN
      ggml/test_data/test.wav
  23. 50 178
      ggml/test_unity_cpp.py

BIN
ggml/LJ037-0171_sr16k_test.wav


+ 0 - 2
ggml/examples/kaldi-native-fbank/python/CMakeLists.txt

@@ -1,2 +0,0 @@
-add_subdirectory(csrc)
-add_subdirectory(tests)

+ 0 - 28
ggml/examples/kaldi-native-fbank/python/csrc/CMakeLists.txt

@@ -1,28 +0,0 @@
-pybind11_add_module(_kaldi_native_fbank
-  feature-fbank.cc
-  feature-window.cc
-  kaldi-native-fbank.cc
-  mel-computations.cc
-  online-feature.cc
-  utils.cc
-)
-
-if(APPLE)
-  execute_process(
-    COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
-    OUTPUT_STRIP_TRAILING_WHITESPACE
-    OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
-  )
-  message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
-  target_link_libraries(_kaldi_native_fbank PRIVATE "-Wl,-rpath,${PYTHON_SITE_PACKAGE_DIR}")
-endif()
-
-if(NOT WIN32)
-  target_link_libraries(_kaldi_native_fbank PRIVATE "-Wl,-rpath,${kaldi_native_fbank_rpath_origin}/kaldi_native_fbank/lib")
-endif()
-
-target_link_libraries(_kaldi_native_fbank PRIVATE kaldi-native-fbank-core)
-
-install(TARGETS _kaldi_native_fbank
-  DESTINATION ../
-)

+ 0 - 57
ggml/examples/kaldi-native-fbank/python/csrc/feature-fbank.cc

@@ -1,57 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/feature-fbank.h"
-
-#include <memory>
-#include <string>
-
-#include "feature-fbank.h"
-#include "kaldi-native-fbank/python/csrc/utils.h"
-
-namespace knf {
-
-static void PybindFbankOptions(py::module &m) {  // NOLINT
-  using PyClass = FbankOptions;
-  py::class_<PyClass>(m, "FbankOptions")
-      .def(py::init<>())
-      .def_readwrite("frame_opts", &PyClass::frame_opts)
-      .def_readwrite("mel_opts", &PyClass::mel_opts)
-      .def_readwrite("use_energy", &PyClass::use_energy)
-      .def_readwrite("energy_floor", &PyClass::energy_floor)
-      .def_readwrite("raw_energy", &PyClass::raw_energy)
-      .def_readwrite("htk_compat", &PyClass::htk_compat)
-      .def_readwrite("use_log_fbank", &PyClass::use_log_fbank)
-      .def_readwrite("use_power", &PyClass::use_power)
-      .def("__str__",
-           [](const PyClass &self) -> std::string { return self.ToString(); })
-      .def("as_dict",
-           [](const PyClass &self) -> py::dict { return AsDict(self); })
-      .def_static(
-          "from_dict",
-          [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); })
-      .def(py::pickle(
-          [](const PyClass &self) -> py::dict { return AsDict(self); },
-          [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); }));
-}
-
-void PybindFeatureFbank(py::module &m) {  // NOLINT
-  PybindFbankOptions(m);
-}
-
-}  // namespace knf

+ 0 - 30
ggml/examples/kaldi-native-fbank/python/csrc/feature-fbank.h

@@ -1,30 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
-
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-namespace knf {
-
-void PybindFeatureFbank(py::module &m);  // NOLINT
-
-}  // namespace knf
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_

+ 0 - 66
ggml/examples/kaldi-native-fbank/python/csrc/feature-window.cc

@@ -1,66 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/feature-window.h"
-
-#include <string>
-
-#include "feature-window.h"
-#include "kaldi-native-fbank/python/csrc/utils.h"
-
-namespace knf {
-
-static void PybindFrameExtractionOptions(py::module &m) {  // NOLINT
-  using PyClass = FrameExtractionOptions;
-  py::class_<PyClass>(m, "FrameExtractionOptions")
-      .def(py::init<>())
-      .def_readwrite("samp_freq", &PyClass::samp_freq)
-      .def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms)
-      .def_readwrite("frame_length_ms", &PyClass::frame_length_ms)
-      .def_readwrite("dither", &PyClass::dither)
-      .def_readwrite("preemph_coeff", &PyClass::preemph_coeff)
-      .def_readwrite("remove_dc_offset", &PyClass::remove_dc_offset)
-      .def_readwrite("window_type", &PyClass::window_type)
-      .def_readwrite("round_to_power_of_two", &PyClass::round_to_power_of_two)
-      .def_readwrite("blackman_coeff", &PyClass::blackman_coeff)
-      .def_readwrite("snip_edges", &PyClass::snip_edges)
-      .def("as_dict",
-           [](const PyClass &self) -> py::dict { return AsDict(self); })
-      .def_static("from_dict",
-                  [](py::dict dict) -> PyClass {
-                    return FrameExtractionOptionsFromDict(dict);
-                  })
-#if 0
-      .def_readwrite("allow_downsample",
-                     &PyClass::allow_downsample)
-      .def_readwrite("allow_upsample", &PyClass::allow_upsample)
-#endif
-      .def("__str__",
-           [](const PyClass &self) -> std::string { return self.ToString(); })
-      .def(py::pickle(
-          [](const PyClass &self) -> py::dict { return AsDict(self); },
-          [](py::dict dict) -> PyClass {
-            return FrameExtractionOptionsFromDict(dict);
-          }));
-}
-
-void PybindFeatureWindow(py::module &m) {  // NOLINT
-  PybindFrameExtractionOptions(m);
-}
-
-}  // namespace knf

+ 0 - 30
ggml/examples/kaldi-native-fbank/python/csrc/feature-window.h

@@ -1,30 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
-
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-namespace knf {
-
-void PybindFeatureWindow(py::module &m);  // NOLINT
-
-}  // namespace knf
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_

+ 0 - 37
ggml/examples/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc

@@ -1,37 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-#include "kaldi-native-fbank/python/csrc/feature-fbank.h"
-#include "kaldi-native-fbank/python/csrc/feature-window.h"
-#include "kaldi-native-fbank/python/csrc/mel-computations.h"
-#include "kaldi-native-fbank/python/csrc/online-feature.h"
-
-namespace knf {
-
-PYBIND11_MODULE(_kaldi_native_fbank, m) {
-  m.doc() = "Python wrapper for kaldi native fbank";
-  PybindFeatureWindow(m);
-  PybindMelComputations(m);
-  PybindFeatureFbank(m);
-
-  PybindOnlineFeature(m);
-}
-
-}  // namespace knf

+ 0 - 27
ggml/examples/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h

@@ -1,27 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
-
-#include "pybind11/numpy.h"
-#include "pybind11/pybind11.h"
-#include "pybind11/stl.h"
-namespace py = pybind11;
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_

+ 0 - 58
ggml/examples/kaldi-native-fbank/python/csrc/mel-computations.cc

@@ -1,58 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/mel-computations.h"
-
-#include <string>
-
-#include "mel-computations.h"
-#include "kaldi-native-fbank/python/csrc/utils.h"
-
-namespace knf {
-
-static void PybindMelBanksOptions(py::module &m) {  // NOLINT
-  using PyClass = MelBanksOptions;
-  py::class_<PyClass>(m, "MelBanksOptions")
-      .def(py::init<>())
-      .def_readwrite("num_bins", &PyClass::num_bins)
-      .def_readwrite("low_freq", &PyClass::low_freq)
-      .def_readwrite("high_freq", &PyClass::high_freq)
-      .def_readwrite("vtln_low", &PyClass::vtln_low)
-      .def_readwrite("vtln_high", &PyClass::vtln_high)
-      .def_readwrite("debug_mel", &PyClass::debug_mel)
-      .def_readwrite("htk_mode", &PyClass::htk_mode)
-      .def("__str__",
-           [](const PyClass &self) -> std::string { return self.ToString(); })
-      .def("as_dict",
-           [](const PyClass &self) -> py::dict { return AsDict(self); })
-      .def_static("from_dict",
-                  [](py::dict dict) -> PyClass {
-                    return MelBanksOptionsFromDict(dict);
-                  })
-      .def(py::pickle(
-          [](const PyClass &self) -> py::dict { return AsDict(self); },
-          [](py::dict dict) -> PyClass {
-            return MelBanksOptionsFromDict(dict);
-          }));
-}
-
-void PybindMelComputations(py::module &m) {  // NOLINT
-  PybindMelBanksOptions(m);
-}
-
-}  // namespace knf

+ 0 - 30
ggml/examples/kaldi-native-fbank/python/csrc/mel-computations.h

@@ -1,30 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
-
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-namespace knf {
-
-void PybindMelComputations(py::module &m);  // NOLINT
-
-}  // namespace knf
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_

+ 0 - 68
ggml/examples/kaldi-native-fbank/python/csrc/online-feature.cc

@@ -1,68 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/online-feature.h"
-
-#include <string>
-#include <vector>
-
-#include "online-feature.h"
-namespace knf {
-
-template <typename C>
-void PybindOnlineFeatureTpl(py::module &m,  // NOLINT
-                            const std::string &class_name,
-                            const std::string &class_help_doc = "") {
-  using PyClass = OnlineGenericBaseFeature<C>;
-  using Options = typename C::Options;
-  py::class_<PyClass>(m, class_name.c_str(), class_help_doc.c_str())
-      .def(py::init<const Options &>(), py::arg("opts"))
-      .def_property_readonly("dim", &PyClass::Dim)
-      .def_property_readonly("frame_shift_in_seconds",
-                             &PyClass::FrameShiftInSeconds)
-      .def_property_readonly("num_frames_ready", &PyClass::NumFramesReady)
-      .def("is_last_frame", &PyClass::IsLastFrame, py::arg("frame"))
-      .def(
-          "get_frame",
-          [](py::object obj, int32_t frame) {
-            auto *self = obj.cast<PyClass *>();
-            const float *f = self->GetFrame(frame);
-            return py::array_t<float>({self->Dim()},    // shape
-                                      {sizeof(float)},  // stride in bytes
-                                      f,                // ptr
-                                      obj);  // it will increase the reference
-                                             // count of **this** vector
-          },
-          py::arg("frame"))
-      .def(
-          "accept_waveform",
-          [](PyClass &self, float sampling_rate,
-             const std::vector<float> &waveform) {
-            self.AcceptWaveform(sampling_rate, waveform.data(),
-                                waveform.size());
-          },
-          py::arg("sampling_rate"), py::arg("waveform"),
-          py::call_guard<py::gil_scoped_release>())
-      .def("input_finished", &PyClass::InputFinished);
-}
-
-void PybindOnlineFeature(py::module &m) {  // NOLINT
-  PybindOnlineFeatureTpl<FbankComputer>(m, "OnlineFbank");
-}
-
-}  // namespace knf

+ 0 - 30
ggml/examples/kaldi-native-fbank/python/csrc/online-feature.h

@@ -1,30 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
-
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-namespace knf {
-
-void PybindOnlineFeature(py::module &m);  // NOLINT
-
-}  // namespace knf
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_

+ 0 - 134
ggml/examples/kaldi-native-fbank/python/csrc/utils.cc

@@ -1,134 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kaldi-native-fbank/python/csrc/utils.h"
-
-#include <string>
-
-#include "feature-window.h"
-
-#define FROM_DICT(type, key)         \
-  if (dict.contains(#key)) {         \
-    opts.key = py::type(dict[#key]); \
-  }
-
-#define AS_DICT(key) dict[#key] = opts.key
-
-namespace knf {
-
-FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict) {
-  FrameExtractionOptions opts;
-
-  FROM_DICT(float_, samp_freq);
-  FROM_DICT(float_, frame_shift_ms);
-  FROM_DICT(float_, frame_length_ms);
-  FROM_DICT(float_, dither);
-  FROM_DICT(float_, preemph_coeff);
-  FROM_DICT(bool_, remove_dc_offset);
-  FROM_DICT(str, window_type);
-  FROM_DICT(bool_, round_to_power_of_two);
-  FROM_DICT(float_, blackman_coeff);
-  FROM_DICT(bool_, snip_edges);
-
-  return opts;
-}
-
-py::dict AsDict(const FrameExtractionOptions &opts) {
-  py::dict dict;
-
-  AS_DICT(samp_freq);
-  AS_DICT(frame_shift_ms);
-  AS_DICT(frame_length_ms);
-  AS_DICT(dither);
-  AS_DICT(preemph_coeff);
-  AS_DICT(remove_dc_offset);
-  AS_DICT(window_type);
-  AS_DICT(round_to_power_of_two);
-  AS_DICT(blackman_coeff);
-  AS_DICT(snip_edges);
-
-  return dict;
-}
-
-MelBanksOptions MelBanksOptionsFromDict(py::dict dict) {
-  MelBanksOptions opts;
-
-  FROM_DICT(int_, num_bins);
-  FROM_DICT(float_, low_freq);
-  FROM_DICT(float_, high_freq);
-  FROM_DICT(float_, vtln_low);
-  FROM_DICT(float_, vtln_high);
-  FROM_DICT(bool_, debug_mel);
-  FROM_DICT(bool_, htk_mode);
-
-  return opts;
-}
-py::dict AsDict(const MelBanksOptions &opts) {
-  py::dict dict;
-
-  AS_DICT(num_bins);
-  AS_DICT(low_freq);
-  AS_DICT(high_freq);
-  AS_DICT(vtln_low);
-  AS_DICT(vtln_high);
-  AS_DICT(debug_mel);
-  AS_DICT(htk_mode);
-
-  return dict;
-}
-
-FbankOptions FbankOptionsFromDict(py::dict dict) {
-  FbankOptions opts;
-
-  if (dict.contains("frame_opts")) {
-    opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
-  }
-
-  if (dict.contains("mel_opts")) {
-    opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
-  }
-
-  FROM_DICT(bool_, use_energy);
-  FROM_DICT(float_, energy_floor);
-  FROM_DICT(bool_, raw_energy);
-  FROM_DICT(bool_, htk_compat);
-  FROM_DICT(bool_, use_log_fbank);
-  FROM_DICT(bool_, use_power);
-
-  return opts;
-}
-
-py::dict AsDict(const FbankOptions &opts) {
-  py::dict dict;
-
-  dict["frame_opts"] = AsDict(opts.frame_opts);
-  dict["mel_opts"] = AsDict(opts.mel_opts);
-  AS_DICT(use_energy);
-  AS_DICT(energy_floor);
-  AS_DICT(raw_energy);
-  AS_DICT(htk_compat);
-  AS_DICT(use_log_fbank);
-  AS_DICT(use_power);
-
-  return dict;
-}
-
-#undef FROM_DICT
-#undef AS_DICT
-
-}  // namespace knf

+ 0 - 52
ggml/examples/kaldi-native-fbank/python/csrc/utils.h

@@ -1,52 +0,0 @@
-/**
- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
- *
- * See LICENSE for clarification regarding multiple authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
-#define KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
-
-#include "feature-fbank.h"
-#include "feature-window.h"
-#include "mel-computations.h"
-#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
-
-/*
- * This file contains code about `from_dict` and
- * `as_dict` for various options in kaldi-native-fbank.
- *
- * Regarding `from_dict`, users don't need to provide
- * all the fields in the options. If some fields
- * are not provided, it just uses the default one.
- *
- * If the provided dict in `from_dict` is empty,
- * all fields use their default values.
- */
-
-namespace knf {
-
-FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
-py::dict AsDict(const FrameExtractionOptions &opts);
-
-MelBanksOptions MelBanksOptionsFromDict(py::dict dict);
-py::dict AsDict(const MelBanksOptions &opts);
-
-FbankOptions FbankOptionsFromDict(py::dict dict);
-py::dict AsDict(const FbankOptions &opts);
-
-}  // namespace knf
-
-#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_

+ 0 - 6
ggml/examples/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py

@@ -1,6 +0,0 @@
-from _kaldi_native_fbank import (
-    FrameExtractionOptions,
-    MelBanksOptions,
-    OnlineFbank,
-    FbankOptions,
-)

+ 0 - 31
ggml/examples/kaldi-native-fbank/python/tests/CMakeLists.txt

@@ -1,31 +0,0 @@
-function(kaldi_native_fbank_add_py_test source)
-  get_filename_component(name ${source} NAME_WE)
-  set(name "${name}_py")
-
-    message(STATUS "source: ${source}")
-
-  add_test(NAME ${name}
-    COMMAND
-      "${PYTHON_EXECUTABLE}"
-      "${CMAKE_CURRENT_SOURCE_DIR}/${source}"
-  )
-
-  get_filename_component(kaldi_native_fbank_path ${CMAKE_CURRENT_LIST_DIR} DIRECTORY)
-
-  set_property(TEST ${name}
-    PROPERTY ENVIRONMENT "PYTHONPATH=${kaldi_native_fbank_path}:$<TARGET_FILE_DIR:_kaldi_native_fbank>:$ENV{PYTHONPATH}"
-  )
-endfunction()
-
-# please sort the files in alphabetic order
-set(py_test_files
-  test_frame_extraction_options.py
-  test_mel_bank_options.py
-  test_fbank_options.py
-)
-
-if(KALDI_NATIVE_FBANK_BUILD_TESTS)
-  foreach(source IN LISTS py_test_files)
-    kaldi_native_fbank_add_py_test(${source})
-  endforeach()
-endif()

+ 0 - 198
ggml/examples/kaldi-native-fbank/python/tests/test_fbank_options.py

@@ -1,198 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
-
-
-import pickle
-
-import kaldi_native_fbank as knf
-
-
-def test_default():
-    opts = knf.FbankOptions()
-    assert opts.frame_opts.samp_freq == 16000
-    assert opts.frame_opts.frame_shift_ms == 10.0
-    assert opts.frame_opts.frame_length_ms == 25.0
-    assert opts.frame_opts.dither == 1.0
-    assert abs(opts.frame_opts.preemph_coeff - 0.97) < 1e-6
-    assert opts.frame_opts.remove_dc_offset is True
-    assert opts.frame_opts.window_type == "povey"
-    assert opts.frame_opts.round_to_power_of_two is True
-    assert abs(opts.frame_opts.blackman_coeff - 0.42) < 1e-6
-    assert opts.frame_opts.snip_edges is True
-
-    assert opts.mel_opts.num_bins == 23
-    assert opts.mel_opts.low_freq == 20
-    assert opts.mel_opts.high_freq == 0
-    assert opts.mel_opts.vtln_low == 100
-    assert opts.mel_opts.vtln_high == -500
-    assert opts.mel_opts.debug_mel is False
-    assert opts.mel_opts.htk_mode is False
-
-    assert opts.use_energy is False
-    assert opts.energy_floor == 0.0
-    assert opts.raw_energy is True
-    assert opts.htk_compat is False
-    assert opts.use_log_fbank is True
-    assert opts.use_power is True
-
-
-def test_set_get():
-    opts = knf.FbankOptions()
-    opts.use_energy = True
-    assert opts.use_energy is True
-
-    opts.energy_floor = 1
-    assert opts.energy_floor == 1
-
-    opts.raw_energy = False
-    assert opts.raw_energy is False
-
-    opts.htk_compat = True
-    assert opts.htk_compat is True
-
-    opts.use_log_fbank = False
-    assert opts.use_log_fbank is False
-
-    opts.use_power = False
-    assert opts.use_power is False
-
-
-def test_set_get_frame_opts():
-    opts = knf.FbankOptions()
-
-    opts.frame_opts.samp_freq = 44100
-    assert opts.frame_opts.samp_freq == 44100
-
-    opts.frame_opts.frame_shift_ms = 20.5
-    assert opts.frame_opts.frame_shift_ms == 20.5
-
-    opts.frame_opts.frame_length_ms = 1
-    assert opts.frame_opts.frame_length_ms == 1
-
-    opts.frame_opts.dither = 0.5
-    assert opts.frame_opts.dither == 0.5
-
-    opts.frame_opts.preemph_coeff = 0.25
-    assert opts.frame_opts.preemph_coeff == 0.25
-
-    opts.frame_opts.remove_dc_offset = False
-    assert opts.frame_opts.remove_dc_offset is False
-
-    opts.frame_opts.window_type = "hanning"
-    assert opts.frame_opts.window_type == "hanning"
-
-    opts.frame_opts.round_to_power_of_two = False
-    assert opts.frame_opts.round_to_power_of_two is False
-
-    opts.frame_opts.blackman_coeff = 0.25
-    assert opts.frame_opts.blackman_coeff == 0.25
-
-    opts.frame_opts.snip_edges = False
-    assert opts.frame_opts.snip_edges is False
-
-
-def test_set_get_mel_opts():
-    opts = knf.FbankOptions()
-
-    opts.mel_opts.num_bins = 100
-    assert opts.mel_opts.num_bins == 100
-
-    opts.mel_opts.low_freq = 22
-    assert opts.mel_opts.low_freq == 22
-
-    opts.mel_opts.high_freq = 1
-    assert opts.mel_opts.high_freq == 1
-
-    opts.mel_opts.vtln_low = 101
-    assert opts.mel_opts.vtln_low == 101
-
-    opts.mel_opts.vtln_high = -100
-    assert opts.mel_opts.vtln_high == -100
-
-    opts.mel_opts.debug_mel = True
-    assert opts.mel_opts.debug_mel is True
-
-    opts.mel_opts.htk_mode = True
-    assert opts.mel_opts.htk_mode is True
-
-
-def test_from_empty_dict():
-    opts = knf.FbankOptions.from_dict({})
-    opts2 = knf.FbankOptions()
-
-    assert str(opts) == str(opts2)
-
-
-def test_from_dict_partial():
-    d = {
-        "energy_floor": 10.5,
-        "htk_compat": True,
-        "mel_opts": {"num_bins": 80, "vtln_low": 1},
-        "frame_opts": {"window_type": "hanning"},
-    }
-    opts = knf.FbankOptions.from_dict(d)
-    assert opts.energy_floor == 10.5
-    assert opts.htk_compat is True
-    assert opts.mel_opts.num_bins == 80
-    assert opts.mel_opts.vtln_low == 1
-    assert opts.frame_opts.window_type == "hanning"
-
-    mel_opts = knf.MelBanksOptions.from_dict(d["mel_opts"])
-    assert str(opts.mel_opts) == str(mel_opts)
-
-
-def test_from_dict_full_and_as_dict():
-    opts = knf.FbankOptions()
-    opts.htk_compat = True
-    opts.mel_opts.num_bins = 80
-    opts.frame_opts.samp_freq = 10
-
-    d = opts.as_dict()
-    assert d["htk_compat"] is True
-    assert d["mel_opts"]["num_bins"] == 80
-    assert d["frame_opts"]["samp_freq"] == 10
-
-    mel_opts = knf.MelBanksOptions()
-    mel_opts.num_bins = 80
-    assert d["mel_opts"] == mel_opts.as_dict()
-
-    frame_opts = knf.FrameExtractionOptions()
-    frame_opts.samp_freq = 10
-    assert d["frame_opts"] == frame_opts.as_dict()
-
-    opts2 = knf.FbankOptions.from_dict(d)
-    assert str(opts2) == str(opts)
-
-    d["htk_compat"] = False
-    opts3 = knf.FbankOptions.from_dict(d)
-    assert opts3.htk_compat is False
-
-
-def test_pickle():
-    opts = knf.FbankOptions()
-    opts.use_energy = True
-    opts.use_power = False
-
-    opts.frame_opts.samp_freq = 44100
-    opts.mel_opts.num_bins = 100
-
-    data = pickle.dumps(opts)
-
-    opts2 = pickle.loads(data)
-    assert str(opts) == str(opts2)
-
-
-def main():
-    test_default()
-    test_set_get()
-    test_set_get_frame_opts()
-    test_set_get_mel_opts()
-    test_from_empty_dict()
-    test_from_dict_partial()
-    test_from_dict_full_and_as_dict()
-    test_pickle()
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 119
ggml/examples/kaldi-native-fbank/python/tests/test_frame_extraction_options.py

@@ -1,119 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
-
-import pickle
-
-import kaldi_native_fbank as knf
-
-
-def test_default():
-    opts = knf.FrameExtractionOptions()
-    assert opts.samp_freq == 16000
-    assert opts.frame_shift_ms == 10.0
-    assert opts.frame_length_ms == 25.0
-    assert opts.dither == 1.0
-    assert abs(opts.preemph_coeff - 0.97) < 1e-6
-    assert opts.remove_dc_offset is True
-    assert opts.window_type == "povey"
-    assert opts.round_to_power_of_two is True
-    assert abs(opts.blackman_coeff - 0.42) < 1e-6
-    assert opts.snip_edges is True
-
-
-def test_set_get():
-    opts = knf.FrameExtractionOptions()
-    opts.samp_freq = 44100
-    assert opts.samp_freq == 44100
-
-    opts.frame_shift_ms = 20.5
-    assert opts.frame_shift_ms == 20.5
-
-    opts.frame_length_ms = 1
-    assert opts.frame_length_ms == 1
-
-    opts.dither = 0.5
-    assert opts.dither == 0.5
-
-    opts.preemph_coeff = 0.25
-    assert opts.preemph_coeff == 0.25
-
-    opts.remove_dc_offset = False
-    assert opts.remove_dc_offset is False
-
-    opts.window_type = "hanning"
-    assert opts.window_type == "hanning"
-
-    opts.round_to_power_of_two = False
-    assert opts.round_to_power_of_two is False
-
-    opts.blackman_coeff = 0.25
-    assert opts.blackman_coeff == 0.25
-
-    opts.snip_edges = False
-    assert opts.snip_edges is False
-
-
-def test_from_empty_dict():
-    opts = knf.FrameExtractionOptions.from_dict({})
-    opts2 = knf.FrameExtractionOptions()
-
-    assert str(opts) == str(opts2)
-
-
-def test_from_dict_partial():
-    d = {"samp_freq": 10, "frame_shift_ms": 2}
-
-    opts = knf.FrameExtractionOptions.from_dict(d)
-
-    opts2 = knf.FrameExtractionOptions()
-    assert str(opts) != str(opts2)
-
-    opts2.samp_freq = 10
-    assert str(opts) != str(opts2)
-
-    opts2.frame_shift_ms = 2
-    assert str(opts) == str(opts2)
-
-    opts2.frame_shift_ms = 3
-    assert str(opts) != str(opts2)
-
-
-def test_from_dict_full_and_as_dict():
-    opts = knf.FrameExtractionOptions()
-    opts.samp_freq = 20
-    opts.frame_length_ms = 100
-
-    d = opts.as_dict()
-    for key, value in d.items():
-        assert value == getattr(opts, key)
-
-    opts2 = knf.FrameExtractionOptions.from_dict(d)
-    assert str(opts2) == str(opts)
-
-    d["window_type"] = "hanning"
-    opts3 = knf.FrameExtractionOptions.from_dict(d)
-    assert opts3.window_type == "hanning"
-
-
-def test_pickle():
-    opts = knf.FrameExtractionOptions()
-    opts.samp_freq = 44100
-    opts.dither = 5.5
-    data = pickle.dumps(opts)
-
-    opts2 = pickle.loads(data)
-    assert str(opts) == str(opts2)
-
-
-def main():
-    test_default()
-    test_set_get()
-    test_from_empty_dict()
-    test_from_dict_partial()
-    test_from_dict_full_and_as_dict()
-    test_pickle()
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 107
ggml/examples/kaldi-native-fbank/python/tests/test_mel_bank_options.py

@@ -1,107 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
-
-import pickle
-
-import kaldi_native_fbank as knf
-
-
-def test_default():
-    opts = knf.MelBanksOptions()
-    assert opts.num_bins == 25
-    assert opts.low_freq == 20
-    assert opts.high_freq == 0
-    assert opts.vtln_low == 100
-    assert opts.vtln_high == -500
-    assert opts.debug_mel is False
-    assert opts.htk_mode is False
-
-
-def test_set_get():
-    opts = knf.MelBanksOptions()
-    opts.num_bins = 100
-    assert opts.num_bins == 100
-
-    opts.low_freq = 22
-    assert opts.low_freq == 22
-
-    opts.high_freq = 1
-    assert opts.high_freq == 1
-
-    opts.vtln_low = 101
-    assert opts.vtln_low == 101
-
-    opts.vtln_high = -100
-    assert opts.vtln_high == -100
-
-    opts.debug_mel = True
-    assert opts.debug_mel is True
-
-    opts.htk_mode = True
-    assert opts.htk_mode is True
-
-
-def test_from_empty_dict():
-    opts = knf.MelBanksOptions.from_dict({})
-    opts2 = knf.MelBanksOptions()
-
-    assert str(opts) == str(opts2)
-
-
-def test_from_dict_partial():
-    d = {"num_bins": 10, "debug_mel": True}
-
-    opts = knf.MelBanksOptions.from_dict(d)
-
-    opts2 = knf.MelBanksOptions()
-    assert str(opts) != str(opts2)
-
-    opts2.num_bins = 10
-    assert str(opts) != str(opts2)
-
-    opts2.debug_mel = True
-    assert str(opts) == str(opts2)
-
-    opts2.debug_mel = False
-    assert str(opts) != str(opts2)
-
-
-def test_from_dict_full_and_as_dict():
-    opts = knf.MelBanksOptions()
-    opts.num_bins = 80
-    opts.vtln_high = 2
-
-    d = opts.as_dict()
-    for key, value in d.items():
-        assert value == getattr(opts, key)
-
-    opts2 = knf.MelBanksOptions.from_dict(d)
-    assert str(opts2) == str(opts)
-
-    d["htk_mode"] = True
-    opts3 = knf.MelBanksOptions.from_dict(d)
-    assert opts3.htk_mode is True
-
-
-def test_pickle():
-    opts = knf.MelBanksOptions()
-    opts.num_bins = 100
-    opts.low_freq = 22
-    data = pickle.dumps(opts)
-
-    opts2 = pickle.loads(data)
-    assert str(opts) == str(opts2)
-
-
-def main():
-    test_default()
-    test_set_get()
-    test_from_empty_dict()
-    test_from_dict_partial()
-    test_from_dict_full_and_as_dict()
-    test_pickle()
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 48
ggml/examples/kaldi-native-fbank/python/tests/test_online_fbank.py

@@ -1,48 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-
-try:
-    import kaldifeat
-except:
-    print("Please install kaldifeat first")
-    sys.exit(0)
-
-import kaldi_native_fbank as knf
-import torch
-
-
-def main():
-    sampling_rate = 16000
-    samples = torch.randn(16000 * 10)
-
-    opts = kaldifeat.FbankOptions()
-    opts.frame_opts.dither = 0
-    opts.mel_opts.num_bins = 80
-    opts.frame_opts.snip_edges = False
-    opts.mel_opts.debug_mel = False
-
-    online_fbank = kaldifeat.OnlineFbank(opts)
-
-    online_fbank.accept_waveform(sampling_rate, samples)
-
-    opts = knf.FbankOptions()
-    opts.frame_opts.dither = 0
-    opts.mel_opts.num_bins = 80
-    opts.frame_opts.snip_edges = False
-    opts.mel_opts.debug_mel = False
-
-    fbank = knf.OnlineFbank(opts)
-    fbank.accept_waveform(sampling_rate, samples.tolist())
-
-    assert online_fbank.num_frames_ready == fbank.num_frames_ready
-    for i in range(fbank.num_frames_ready):
-        f1 = online_fbank.get_frame(i)
-        f2 = torch.from_numpy(fbank.get_frame(i))
-        assert torch.allclose(f1, f2, atol=1e-3), (i, (f1 - f2).abs().max())
-
-
-if __name__ == "__main__":
-    torch.manual_seed(20220825)
-    main()
-    print("success")

BIN
ggml/test_data/test.wav


+ 50 - 178
ggml/test_unity_cpp.py

@@ -1,20 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
 import ctypes
 import ctypes
 import functools
 import functools
-import logging
-import sys
 from ctypes import c_void_p
 from ctypes import c_void_p
 from pathlib import Path
 from pathlib import Path
 from typing import Any, Iterator, List, Tuple
 from typing import Any, Iterator, List, Tuple
 
 
 import fairseq2.nn
 import fairseq2.nn
 import fairseq2.nn.transformer
 import fairseq2.nn.transformer
-from fairseq2.nn.padding import PaddingMask
 import numpy as np
 import numpy as np
 import pytest
 import pytest
 import torch
 import torch
 import torchaudio
 import torchaudio
 from fairseq2.data.audio import WaveformToFbankConverter
 from fairseq2.data.audio import WaveformToFbankConverter
-from fairseq2.generation import SequenceGeneratorOptions
+from seamless_communication.inference import SequenceGeneratorOptions
 from fairseq2.models.wav2vec2.feature_extractor import Wav2Vec2FbankFeatureExtractor
 from fairseq2.models.wav2vec2.feature_extractor import Wav2Vec2FbankFeatureExtractor
 from seamless_communication.inference.translator import Modality, Translator
 from seamless_communication.inference.translator import Modality, Translator
 
 
@@ -22,6 +25,7 @@ import ggml
 from ctypes_utils import NULLPTR, Ptr
 from ctypes_utils import NULLPTR, Ptr
 from ggml import NativeObj
 from ggml import NativeObj
 from ggml_convert import convert_model, read_layer_config
 from ggml_convert import convert_model, read_layer_config
+import requests
 
 
 Ctx = ggml.ggml_context_p
 Ctx = ggml.ggml_context_p
 
 
@@ -32,11 +36,10 @@ FAIRSEQ2_CPP = Path(__file__).parent / "examples/unity/fairseq2.cpp"
 UNITY_FLASH_ATTN = "\n# define UNITY_FLASH_ATTN 0\n" not in FAIRSEQ2_CPP.read_text()
 UNITY_FLASH_ATTN = "\n# define UNITY_FLASH_ATTN 0\n" not in FAIRSEQ2_CPP.read_text()
 
 
 DATA = Path(__file__).parent / "test_data"
 DATA = Path(__file__).parent / "test_data"
-DATA_DEV = DATA / "dev"
-if not DATA_DEV.exists():
-    DATA_DEV = Path(
-        "/private/home/dnn/internal_sc/seamless_communication/ggml/examples/unity/dev"
-    )
+LOCAL_AUDIO_SAMPLE_PATH = DATA / "LJ037-0171_sr16k.wav"
+TEST_AUDIO_SAMPLE_URL = (
+    "https://dl.fbaipublicfiles.com/seamless/tests/LJ037-0171_sr16k.wav"
+)
 
 
 
 
 @pytest.fixture(name="ctx")
 @pytest.fixture(name="ctx")
@@ -74,6 +77,14 @@ def load_pt_model() -> Any:
     return load_translator().model
     return load_translator().model
 
 
 
 
+def download_sample_audio() -> Any:
+    response = requests.get(TEST_AUDIO_SAMPLE_URL, stream=True)
+    with open(DATA / "LJ037-0171_sr16k.wav", "wb") as file:
+        for chunk in response.iter_content(chunk_size=1024):
+            if chunk:
+                file.write(chunk)
+
+
 def test_convert_linear(tmp_path: Path) -> None:
 def test_convert_linear(tmp_path: Path) -> None:
     module = fairseq2.nn.Linear(16, 24, True)
     module = fairseq2.nn.Linear(16, 24, True)
 
 
@@ -352,9 +363,6 @@ def test_StandardTransformerEncoderLayer_forward(ctx: Ctx, g_model: c_void_p) ->
 
 
     gx = ggml.from_numpy(ctx, x)
     gx = ggml.from_numpy(ctx, x)
     ggml.ggml_set_name(gx, b"x")
     ggml.ggml_set_name(gx, b"x")
-    padding_mask = fairseq2.nn.padding.PaddingMask(torch.tensor([21, 21]), 21)
-    gpad = ggml.from_numpy(ctx, padding_mask.materialize())
-    ggml.ggml_set_name(gpad, b"padding_mask")
     gy = ggml.forward(
     gy = ggml.forward(
         "StandardTransformerEncoderLayer",
         "StandardTransformerEncoderLayer",
         g_model,
         g_model,
@@ -376,17 +384,11 @@ def test_StandardTransformerEncoderLayer_forward(ctx: Ctx, g_model: c_void_p) ->
 
 
 def test_StandardConformerEncoderLayer_forward(ctx: Ctx, g_model: c_void_p) -> None:
 def test_StandardConformerEncoderLayer_forward(ctx: Ctx, g_model: c_void_p) -> None:
     pt_model = load_pt_model()
     pt_model = load_pt_model()
-    if not DATA_DEV.exists():
-        pytest.skip(reason=f"Folder {DATA_DEV} not found !")
-
-    x = torch.load(DATA_DEV / "seqs_before_conformer_block.pt")
-    padding_mask = PaddingMask(torch.ones(1, x.shape[1]), x.shape[1])
+    x = torch.rand(1, 137, 1024)
 
 
     layer = pt_model.speech_encoder.inner.layers[0]
     layer = pt_model.speech_encoder.inner.layers[0]
     gx = ggml.from_numpy(ctx, x[0])
     gx = ggml.from_numpy(ctx, x[0])
     ggml.ggml_set_name(gx, b"x")
     ggml.ggml_set_name(gx, b"x")
-    gpad = ggml.from_numpy(ctx, padding_mask[0])
-    ggml.ggml_set_name(gpad, b"padding_mask")
     gy = ggml.forward(
     gy = ggml.forward(
         "StandardConformerEncoderLayer",
         "StandardConformerEncoderLayer",
         g_model,
         g_model,
@@ -399,8 +401,8 @@ def test_StandardConformerEncoderLayer_forward(ctx: Ctx, g_model: c_void_p) -> N
 
 
     y = ggml.to_numpy(gy)
     y = ggml.to_numpy(gy)
 
 
-    y_exp, _ = layer(x, padding_mask)
-    y_exp = y_exp.numpy()
+    y_exp, _ = layer(x, padding_mask=None)
+    y_exp = y_exp.squeeze(0).numpy()
     assert y.shape == y_exp.shape
     assert y.shape == y_exp.shape
     assert np.allclose(y_exp, y, atol=2e-3)
     assert np.allclose(y_exp, y, atol=2e-3)
 
 
@@ -409,10 +411,8 @@ def test_StandardConformerEncoderAdaptorLayer_forward(
     ctx: Ctx, g_model: c_void_p
     ctx: Ctx, g_model: c_void_p
 ) -> None:
 ) -> None:
     pt_model = load_pt_model()
     pt_model = load_pt_model()
-    if not DATA_DEV.exists():
-        pytest.skip(reason=f"Folder {DATA_DEV} not found !")
-
-    x = torch.load(DATA_DEV / "seqs_before_adaptor.pt")
+    torch.random.manual_seed(0)
+    x = torch.rand(1, 137, 1024)
     layer = pt_model.speech_encoder.adaptor_layers[0]
     layer = pt_model.speech_encoder.adaptor_layers[0]
     gx = ggml.from_numpy(ctx, x[0])
     gx = ggml.from_numpy(ctx, x[0])
     ggml.ggml_set_name(gx, b"x")
     ggml.ggml_set_name(gx, b"x")
@@ -467,7 +467,9 @@ def test_StandardTransformerEncoder_forward(ctx: Ctx, g_model: c_void_p) -> None
 
 
 def test_StandardConformerEncoder_forward(ctx: Ctx, g_model: c_void_p) -> None:
 def test_StandardConformerEncoder_forward(ctx: Ctx, g_model: c_void_p) -> None:
     pt_model = load_pt_model()
     pt_model = load_pt_model()
-    wav, _ = torchaudio.load(DATA / "test.wav")
+    if not LOCAL_AUDIO_SAMPLE_PATH.exists():
+        download_sample_audio()
+    wav, _ = torchaudio.load(LOCAL_AUDIO_SAMPLE_PATH)
     gx = ggml.from_numpy(ctx, wav * 2**15)  # Apply scale before sending into ggml!
     gx = ggml.from_numpy(ctx, wav * 2**15)  # Apply scale before sending into ggml!
     ggml.ggml_set_name(gx, b"x")
     ggml.ggml_set_name(gx, b"x")
     gy = ggml.forward(
     gy = ggml.forward(
@@ -508,13 +510,10 @@ def test_StandardConformerEncoder_forward(ctx: Ctx, g_model: c_void_p) -> None:
         y_exp = np.load(cache)
         y_exp = np.load(cache)
 
 
     assert y.shape == y_exp.shape
     assert y.shape == y_exp.shape
-    assert np.allclose(
-        y_exp, y, atol=1e-2
-    )  # There are 10 elements in a 137*1024 tensor with error >1e-2
+    assert np.allclose(y_exp, y, atol=1e-2)
 
 
 
 
 def test_WaveformToFbank_forward(ctx: Ctx, g_model: c_void_p) -> None:
 def test_WaveformToFbank_forward(ctx: Ctx, g_model: c_void_p) -> None:
-    pt_model = load_pt_model()
     converter = WaveformToFbankConverter(
     converter = WaveformToFbankConverter(
         num_mel_bins=80,
         num_mel_bins=80,
         waveform_scale=2**15,
         waveform_scale=2**15,
@@ -522,7 +521,9 @@ def test_WaveformToFbank_forward(ctx: Ctx, g_model: c_void_p) -> None:
         standardize=True,
         standardize=True,
     )
     )
     extractor = Wav2Vec2FbankFeatureExtractor(80, stride=2, sample_every_k=1)
     extractor = Wav2Vec2FbankFeatureExtractor(80, stride=2, sample_every_k=1)
-    wav, _ = torchaudio.load(DATA / "LJ037-0171_sr16k_test.wav")
+    if not LOCAL_AUDIO_SAMPLE_PATH.exists():
+        download_sample_audio()
+    wav, _ = torchaudio.load(LOCAL_AUDIO_SAMPLE_PATH)
     gx = ggml.from_numpy(ctx, wav * 2**15)  # Apply scale before sending into ggml!
     gx = ggml.from_numpy(ctx, wav * 2**15)  # Apply scale before sending into ggml!
     ggml.ggml_set_name(gx, b"x")
     ggml.ggml_set_name(gx, b"x")
 
 
@@ -642,117 +643,13 @@ def test_StandardTransformerDecoder_forward(ctx: Ctx, g_model: c_void_p) -> None
     assert np.allclose(y_exp, y, atol=1e-4 if UNITY_FLASH_ATTN else 1e-3)
     assert np.allclose(y_exp, y, atol=1e-4 if UNITY_FLASH_ATTN else 1e-3)
 
 
 
 
-def test_tokenizer(ctx: Ctx) -> None:
-    tokenizer = unity.load_unity_text_tokenizer("seamlessM4T_medium")
-    enc = tokenizer.create_encoder(task="translation", lang="eng", mode="source")
-
-    spm_path = DATA / "seamlessM4T_medium.spm.ggml"
-    # if not spm_path.exists():
-    if True:
-        vocab = ggml_convert.read_vocab(tokenizer)
-        ggml_convert.write_ggml_file(spm_path, {"spm_vocab_only": True}, {}, vocab, {})
-
-    g_model = ggml.load_fairseq2_ggml_file(spm_path)
-    ggml.lib.fairseq2_model_set_inference_ctx(g_model.ptr, ctx)
-
-    expected = enc("We are all in a yellow submarine.").tolist()[1:]
-    tokens = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_I32, 256)
-    ggml.fairseq2_spm_tokenize(
-        g_model.ptr, b"We are all in a yellow submarine.", tokens
-    )
-    res = ggml.to_numpy(tokens).tolist()
-    assert expected == res
-
-    out = ctypes.create_string_buffer(144)
-    ggml.fairseq2_spm_detokenize(g_model.ptr, tokens, out)
-    assert ctypes.string_at(out) == b"We are all in a yellow submarine."
-
-
-def test_t2tt(ctx: Ctx, g_model: c_void_p) -> None:
-    src_lang = "eng"
-    src_text = "We are all in a yellow submarine."
-    tgt_lang = "fra"
-    sample_file = DATA / "sample_input.npz"
-    beam_size = 2
-
-    if not sample_file.exists():
-        translator = load_translator()
-        device = translator.device
-        token_encoder = translator.text_tokenizer.create_encoder(
-            task="translation", lang=src_lang, mode="source", device=device
-        )
-        src = translator.collate(token_encoder(src_text))
-
-        text_out, _ = translator.get_prediction(
-            translator.model,
-            translator.text_tokenizer,
-            translator.unit_tokenizer,
-            src["seqs"],
-            None,
-            input_modality=Modality.TEXT,
-            output_modality=Modality.TEXT,
-            tgt_lang=tgt_lang,
-            text_generation_opts=SequenceGeneratorOptions(beam_size=beam_size),
-            unit_generation_opts=None,
-        )
-
-        tgt_text = str(text_out.sentences[0])
-        assert tgt_text == "Nous sommes tous dans un sous-marin jaune."
-        hypotheses = [
-            {
-                "seq": h.seq.tolist(),
-                "score": h.score.item(),
-                "step_scores": h.step_scores.numpy(),
-            }
-            for h in text_out.generator_output.results[0]
-        ]
-        np.savez(
-            sample_file,
-            encoder_output=text_out.encoder_output.numpy(),
-            hypotheses=hypotheses,
-        )
-
-    # allow_pickle to load the hyp dicts
-    text_out = np.load(sample_file, allow_pickle=True)
-    encoder_out = ggml.from_numpy(ctx, text_out["encoder_output"])
-    prefix_seq = np.array(text_out["hypotheses"][0]["seq"][:2]).astype(np.int32)
-    max_seq_len = max(len(h["seq"]) for h in text_out["hypotheses"])
-
-    opts = ggml.SequenceGeneratorOptions(
-        beam_size=beam_size,
-        min_seq_len=1,
-        soft_max_seq_len_a=1,
-        soft_max_seq_len_b=200,
-        hard_max_seq_len=int(max_seq_len * 1.5),
-        len_penalty=1.0,
-        unk_penalty=0.0,
-        normalize_scores=True,
-    )
-    job = ggml.SequenceGeneratorJob(
-        opts=opts,
-        prefix_seq=ggml.from_numpy(ctx, prefix_seq),
-        pad_idx=0,
-        unk_idx=1,
-        bos_idx=2,
-        eos_idx=3,
-        num_threads=16,
-    )
-
-    result_ptr = ggml.generate_sequence(g_model, job, encoder_out, NULLPTR, ctx)
-    results = [result_ptr[i] for i in range(beam_size) if result_ptr[i].seq != None]
-
-    # The step score error is big, this may negatively impact the beam search.
-    assert_hypotheses(
-        text_out["hypotheses"], results, score_rtol=1e-2, step_scores_rtol=0.1
-    )
-
-
 def test_s2tt(ctx: Ctx, g_model: c_void_p):
 def test_s2tt(ctx: Ctx, g_model: c_void_p):
-    src_audio_wav, _ = torchaudio.load(DATA / "test.wav")
-    sample_file = DATA / "test.wav.npz"
+    if not LOCAL_AUDIO_SAMPLE_PATH.exists():
+        download_sample_audio()
+    src_audio_wav, _ = torchaudio.load(LOCAL_AUDIO_SAMPLE_PATH)
+    sample_file = DATA / "LJ037-0171_sr16k.wav.trans"
+    translator = load_translator()
     if not sample_file.exists():
     if not sample_file.exists():
-        translator = load_translator()
-        token_encoder = translator.text_tokenizer.create_encoder(task="translation")
         decoded_audio = {
         decoded_audio = {
             "waveform": src_audio_wav.t(),
             "waveform": src_audio_wav.t(),
             "sample_rate": 16000.0,
             "sample_rate": 16000.0,
@@ -773,27 +670,13 @@ def test_s2tt(ctx: Ctx, g_model: c_void_p):
             unit_generation_opts=None,
             unit_generation_opts=None,
         )
         )
 
 
-        tgt_text = str(text_out.sentences[0])
-        assert tgt_text == "大家好 , 世界无主题。"
-        hypotheses = [
-            {
-                "seq": h.seq.tolist(),
-                "score": h.score.item(),
-                "step_scores": h.step_scores.numpy(),
-            }
-            for h in text_out.generator_output.results[0]
-        ]
-        np.savez(
-            sample_file,
-            encoder_output=text_out.encoder_output.numpy(),
-            hypotheses=hypotheses,
-        )
+        tgt_text = str(text_out[0])
+        assert tgt_text == "专家的检查和证据使该委员会得出了结论,可能有五次枪击."
+        with open(sample_file, "w") as f:
+            f.write(tgt_text)
 
 
-    exp = np.load(sample_file, allow_pickle=True)
-    encoder_out = ggml.from_numpy(ctx, exp["encoder_output"])
-    tgt_tokens = exp["hypotheses"][0]["seq"]
-    max_seq_len = max(len(h["seq"]) for h in exp["hypotheses"])
-    max_seq_len = int(max_seq_len * 1.5)
+    with open(sample_file, "r") as exp:
+        exp_tgt_text = exp.readlines()[0].strip()
 
 
     # Apply scale before sending into ggml!
     # Apply scale before sending into ggml!
     gx = ggml.from_numpy(ctx, src_audio_wav * 2**15)
     gx = ggml.from_numpy(ctx, src_audio_wav * 2**15)
@@ -813,7 +696,7 @@ def test_s2tt(ctx: Ctx, g_model: c_void_p):
         beam_size=beam_size,
         beam_size=beam_size,
         soft_max_seq_len_a=1,
         soft_max_seq_len_a=1,
         soft_max_seq_len_b=200,
         soft_max_seq_len_b=200,
-        hard_max_seq_len=max_seq_len,
+        hard_max_seq_len=500,
     )
     )
     job = ggml.SequenceGeneratorJob(
     job = ggml.SequenceGeneratorJob(
         opts=opts,
         opts=opts,
@@ -825,20 +708,9 @@ def test_s2tt(ctx: Ctx, g_model: c_void_p):
     )
     )
     result_ptr = ggml.generate_sequence(g_model, Ptr(job), encoder_out, NULLPTR, ctx)
     result_ptr = ggml.generate_sequence(g_model, Ptr(job), encoder_out, NULLPTR, ctx)
     results = [result_ptr[i] for i in range(beam_size) if result_ptr[i].seq != None]
     results = [result_ptr[i] for i in range(beam_size) if result_ptr[i].seq != None]
-    assert_hypotheses(exp["hypotheses"], results, score_rtol=1e-2, step_scores_rtol=0.1)
-
-
-def assert_hypotheses(
-    expected: List[Any],
-    results: List[Any],
-    *,
-    score_rtol: float,
-    step_scores_rtol: float,
-) -> None:
-    assert len(results) == len(expected)
-    for g_hyp, exp in zip(results, expected):
-        g_tokens = list(ggml.to_numpy(g_hyp.seq))
-        g_step_scores = ggml.to_numpy(g_hyp.step_scores)
-        assert g_tokens == exp["seq"]
-        assert g_hyp.score == pytest.approx(exp["score"], rel=score_rtol)
-        assert np.allclose(g_step_scores, exp["step_scores"], rtol=step_scores_rtol)
+    tokens = [
+        translator.text_tokenizer.model.index_to_token(id)
+        for id in ggml.to_numpy(results[0].seq).tolist()
+    ][2:-1]
+    tokens = "".join(tokens).replace("▁", " ")[1:]
+    assert tokens == exp_tgt_text