feature-fbank.cc 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. /**
  2. * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
  3. *
  4. * See LICENSE for clarification regarding multiple authors
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. // This file is copied/modified from kaldi/src/feat/feature-fbank.cc
  19. //
  20. #include "feature-fbank.h"
  21. #include <algorithm>
  22. #include <cmath>
  23. #include <limits>
  24. #include <vector>
  25. #include "feature-functions.h"
  26. namespace knf {
  27. static void Sqrt(float *in_out, int32_t n) {
  28. for (int32_t i = 0; i != n; ++i) {
  29. in_out[i] = std::sqrt(in_out[i]);
  30. }
  31. }
  32. std::ostream &operator<<(std::ostream &os, const FbankOptions &opts) {
  33. os << opts.ToString();
  34. return os;
  35. }
  36. FbankComputer::FbankComputer(const FbankOptions &opts)
  37. : opts_(opts), rfft_(opts.frame_opts.PaddedWindowSize()) {
  38. if (opts.energy_floor > 0.0f) {
  39. log_energy_floor_ = logf(opts.energy_floor);
  40. }
  41. // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
  42. // [note: this call caches it.]
  43. GetMelBanks(1.0f);
  44. }
  45. FbankComputer::~FbankComputer() {
  46. for (auto iter = mel_banks_.begin(); iter != mel_banks_.end(); ++iter)
  47. delete iter->second;
  48. }
  49. const MelBanks *FbankComputer::GetMelBanks(float vtln_warp) {
  50. MelBanks *this_mel_banks = nullptr;
  51. // std::map<float, MelBanks *>::iterator iter = mel_banks_.find(vtln_warp);
  52. auto iter = mel_banks_.find(vtln_warp);
  53. if (iter == mel_banks_.end()) {
  54. this_mel_banks = new MelBanks(opts_.mel_opts, opts_.frame_opts, vtln_warp);
  55. mel_banks_[vtln_warp] = this_mel_banks;
  56. } else {
  57. this_mel_banks = iter->second;
  58. }
  59. return this_mel_banks;
  60. }
  61. void FbankComputer::Compute(float signal_raw_log_energy, float vtln_warp,
  62. std::vector<float> *signal_frame, float *feature) {
  63. const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
  64. KNF_CHECK_EQ(signal_frame->size(), opts_.frame_opts.PaddedWindowSize());
  65. // Compute energy after window function (not the raw one).
  66. if (opts_.use_energy && !opts_.raw_energy) {
  67. signal_raw_log_energy = std::log(
  68. std::max<float>(InnerProduct(signal_frame->data(), signal_frame->data(),
  69. signal_frame->size()),
  70. std::numeric_limits<float>::epsilon()));
  71. }
  72. rfft_.Compute(signal_frame->data()); // signal_frame is modified in-place
  73. ComputePowerSpectrum(signal_frame);
  74. // Use magnitude instead of power if requested.
  75. if (!opts_.use_power) {
  76. Sqrt(signal_frame->data(), signal_frame->size() / 2 + 1);
  77. }
  78. int32_t mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);
  79. // Its length is opts_.mel_opts.num_bins
  80. float *mel_energies = feature + mel_offset;
  81. // Sum with mel filter banks over the power spectrum
  82. mel_banks.Compute(signal_frame->data(), mel_energies);
  83. if (opts_.use_log_fbank) {
  84. // Avoid log of zero (which should be prevented anyway by dithering).
  85. for (int32_t i = 0; i != opts_.mel_opts.num_bins; ++i) {
  86. auto t = std::max(mel_energies[i], std::numeric_limits<float>::epsilon());
  87. mel_energies[i] = std::log(t);
  88. }
  89. }
  90. // Copy energy as first value (or the last, if htk_compat == true).
  91. if (opts_.use_energy) {
  92. if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) {
  93. signal_raw_log_energy = log_energy_floor_;
  94. }
  95. int32_t energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0;
  96. feature[energy_index] = signal_raw_log_energy;
  97. }
  98. }
  99. } // namespace knf