mel-computations.h 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. /**
  2. * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
  3. *
  4. * See LICENSE for clarification regarding multiple authors
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. // This file is copied/modified from kaldi/src/feat/mel-computations.h
  19. #ifndef KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
  20. #define KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
  21. #include <cmath>
  22. #include <string>
  23. #include <utility>
  24. #include <vector>
  25. #include "feature-window.h"
  26. namespace knf {
  27. struct MelBanksOptions {
  28. int32_t num_bins = 25; // e.g. 25; number of triangular bins
  29. float low_freq = 20; // e.g. 20; lower frequency cutoff
  30. // an upper frequency cutoff; 0 -> no cutoff, negative
  31. // ->added to the Nyquist frequency to get the cutoff.
  32. float high_freq = 0;
  33. float vtln_low = 100; // vtln lower cutoff of warping function.
  34. // vtln upper cutoff of warping function: if negative, added
  35. // to the Nyquist frequency to get the cutoff.
  36. float vtln_high = -500;
  37. bool debug_mel = false;
  38. // htk_mode is a "hidden" config, it does not show up on command line.
  39. // Enables more exact compatibility with HTK, for testing purposes. Affects
  40. // mel-energy flooring and reproduces a bug in HTK.
  41. bool htk_mode = false;
  42. std::string ToString() const {
  43. std::ostringstream os;
  44. os << "num_bins: " << num_bins << "\n";
  45. os << "low_freq: " << low_freq << "\n";
  46. os << "high_freq: " << high_freq << "\n";
  47. os << "vtln_low: " << vtln_low << "\n";
  48. os << "vtln_high: " << vtln_high << "\n";
  49. os << "debug_mel: " << debug_mel << "\n";
  50. os << "htk_mode: " << htk_mode << "\n";
  51. return os.str();
  52. }
  53. };
  54. std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts);
  55. class MelBanks {
  56. public:
  57. static inline float InverseMelScale(float mel_freq) {
  58. return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
  59. }
  60. static inline float MelScale(float freq) {
  61. return 1127.0f * logf(1.0f + freq / 700.0f);
  62. }
  63. static float VtlnWarpFreq(
  64. float vtln_low_cutoff,
  65. float vtln_high_cutoff, // discontinuities in warp func
  66. float low_freq,
  67. float high_freq, // upper+lower frequency cutoffs in
  68. // the mel computation
  69. float vtln_warp_factor, float freq);
  70. static float VtlnWarpMelFreq(float vtln_low_cutoff, float vtln_high_cutoff,
  71. float low_freq, float high_freq,
  72. float vtln_warp_factor, float mel_freq);
  73. // TODO(fangjun): Remove vtln_warp_factor
  74. MelBanks(const MelBanksOptions &opts,
  75. const FrameExtractionOptions &frame_opts, float vtln_warp_factor);
  76. /// Compute Mel energies (note: not log energies).
  77. /// At input, "fft_energies" contains the FFT energies (not log).
  78. ///
  79. /// @param fft_energies 1-D array of size num_fft_bins/2+1
  80. /// @param mel_energies_out 1-D array of size num_mel_bins
  81. void Compute(const float *fft_energies, float *mel_energies_out) const;
  82. int32_t NumBins() const { return bins_.size(); }
  83. private:
  84. // center frequencies of bins, numbered from 0 ... num_bins-1.
  85. // Needed by GetCenterFreqs().
  86. std::vector<float> center_freqs_;
  87. // the "bins_" vector is a vector, one for each bin, of a pair:
  88. // (the first nonzero fft-bin), (the vector of weights).
  89. std::vector<std::pair<int32_t, std::vector<float>>> bins_;
  90. // TODO(fangjun): Remove debug_ and htk_mode_
  91. bool debug_;
  92. bool htk_mode_;
  93. };
  94. } // namespace knf
  95. #endif // KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_