feature-fbank.h 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. /**
  2. * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
  3. *
  4. * See LICENSE for clarification regarding multiple authors
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. // This file is copied/modified from kaldi/src/feat/feature-fbank.h
  19. #ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
  20. #define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
  21. #include <map>
  22. #include <string>
  23. #include <vector>
  24. #include "feature-window.h"
  25. #include "mel-computations.h"
  26. #include "rfft.h"
  27. namespace knf {
  28. struct FbankOptions {
  29. FrameExtractionOptions frame_opts;
  30. MelBanksOptions mel_opts;
  31. // append an extra dimension with energy to the filter banks
  32. bool use_energy = false;
  33. float energy_floor = 0.0f; // active iff use_energy==true
  34. // If true, compute log_energy before preemphasis and windowing
  35. // If false, compute log_energy after preemphasis ans windowing
  36. bool raw_energy = true; // active iff use_energy==true
  37. // If true, put energy last (if using energy)
  38. // If false, put energy first
  39. bool htk_compat = false; // active iff use_energy==true
  40. // if true (default), produce log-filterbank, else linear
  41. bool use_log_fbank = true;
  42. // if true (default), use power in filterbank
  43. // analysis, else magnitude.
  44. bool use_power = true;
  45. FbankOptions() { mel_opts.num_bins = 23; }
  46. std::string ToString() const {
  47. std::ostringstream os;
  48. os << "frame_opts: \n";
  49. os << frame_opts << "\n";
  50. os << "\n";
  51. os << "mel_opts: \n";
  52. os << mel_opts << "\n";
  53. os << "use_energy: " << use_energy << "\n";
  54. os << "energy_floor: " << energy_floor << "\n";
  55. os << "raw_energy: " << raw_energy << "\n";
  56. os << "htk_compat: " << htk_compat << "\n";
  57. os << "use_log_fbank: " << use_log_fbank << "\n";
  58. os << "use_power: " << use_power << "\n";
  59. return os.str();
  60. }
  61. };
  62. std::ostream &operator<<(std::ostream &os, const FbankOptions &opts);
  63. class FbankComputer {
  64. public:
  65. using Options = FbankOptions;
  66. explicit FbankComputer(const FbankOptions &opts);
  67. ~FbankComputer();
  68. int32_t Dim() const {
  69. return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
  70. }
  71. // if true, compute log_energy_pre_window but after dithering and dc removal
  72. bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
  73. const FrameExtractionOptions &GetFrameOptions() const {
  74. return opts_.frame_opts;
  75. }
  76. const FbankOptions &GetOptions() const { return opts_; }
  77. /**
  78. Function that computes one frame of features from
  79. one frame of signal.
  80. @param [in] signal_raw_log_energy The log-energy of the frame of the signal
  81. prior to windowing and pre-emphasis, or
  82. log(numeric_limits<float>::min()), whichever is greater. Must be
  83. ignored by this function if this class returns false from
  84. this->NeedsRawLogEnergy().
  85. @param [in] vtln_warp The VTLN warping factor that the user wants
  86. to be applied when computing features for this utterance. Will
  87. normally be 1.0, meaning no warping is to be done. The value will
  88. be ignored for feature types that don't support VLTN, such as
  89. spectrogram features.
  90. @param [in] signal_frame One frame of the signal,
  91. as extracted using the function ExtractWindow() using the options
  92. returned by this->GetFrameOptions(). The function will use the
  93. vector as a workspace, which is why it's a non-const pointer.
  94. @param [out] feature Pointer to a vector of size this->Dim(), to which
  95. the computed feature will be written. It should be pre-allocated.
  96. */
  97. void Compute(float signal_raw_log_energy, float vtln_warp,
  98. std::vector<float> *signal_frame, float *feature);
  99. private:
  100. const MelBanks *GetMelBanks(float vtln_warp);
  101. FbankOptions opts_;
  102. float log_energy_floor_;
  103. std::map<float, MelBanks *> mel_banks_; // float is VTLN coefficient.
  104. Rfft rfft_;
  105. };
  106. } // namespace knf
  107. #endif // KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_