online-feature.h 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /**
  2. * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
  3. *
  4. * See LICENSE for clarification regarding multiple authors
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. // The content in this file is copied/modified from
  19. // This file is copied/modified from kaldi/src/feat/online-feature.h
  20. #ifndef KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
  21. #define KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
  22. #include <cstdint>
  23. #include <deque>
  24. #include <vector>
  25. #include "feature-fbank.h"
  26. namespace knf {
  27. /// This class serves as a storage for feature vectors with an option to limit
  28. /// the memory usage by removing old elements. The deleted frames indices are
  29. /// "remembered" so that regardless of the MAX_ITEMS setting, the user always
  30. /// provides the indices as if no deletion was being performed.
  31. /// This is useful when processing very long recordings which would otherwise
  32. /// cause the memory to eventually blow up when the features are not being
  33. /// removed.
  34. class RecyclingVector {
  35. public:
  36. /// By default it does not remove any elements.
  37. explicit RecyclingVector(int32_t items_to_hold = -1);
  38. ~RecyclingVector() = default;
  39. RecyclingVector(const RecyclingVector &) = delete;
  40. RecyclingVector &operator=(const RecyclingVector &) = delete;
  41. // The pointer is owned by RecyclingVector
  42. // Users should not free it
  43. const float *At(int32_t index) const;
  44. void PushBack(std::vector<float> item);
  45. /// This method returns the size as if no "recycling" had happened,
  46. /// i.e. equivalent to the number of times the PushBack method has been
  47. /// called.
  48. int32_t Size() const;
  49. // discard the first n frames
  50. void Pop(int32_t n);
  51. private:
  52. std::deque<std::vector<float>> items_;
  53. int32_t items_to_hold_;
  54. int32_t first_available_index_;
  55. };
  56. /// This is a templated class for online feature extraction;
  57. /// it's templated on a class like MfccComputer or PlpComputer
  58. /// that does the basic feature extraction.
  59. template <class C>
  60. class OnlineGenericBaseFeature {
  61. public:
  62. // Constructor from options class
  63. explicit OnlineGenericBaseFeature(const typename C::Options &opts);
  64. int32_t Dim() const { return computer_.Dim(); }
  65. float FrameShiftInSeconds() const {
  66. return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
  67. }
  68. int32_t NumFramesReady() const { return features_.Size(); }
  69. // Note: IsLastFrame() will only ever return true if you have called
  70. // InputFinished() (and this frame is the last frame).
  71. bool IsLastFrame(int32_t frame) const {
  72. return input_finished_ && frame == NumFramesReady() - 1;
  73. }
  74. const float *GetFrame(int32_t frame) const { return features_.At(frame); }
  75. // This would be called from the application, when you get
  76. // more wave data. Note: the sampling_rate is only provided so
  77. // the code can assert that it matches the sampling rate
  78. // expected in the options.
  79. //
  80. // @param sampling_rate The sampling_rate of the input waveform
  81. // @param waveform Pointer to a 1-D array of size n
  82. // @param n Number of entries in waveform
  83. void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n);
  84. // InputFinished() tells the class you won't be providing any
  85. // more waveform. This will help flush out the last frame or two
  86. // of features, in the case where snip-edges == false; it also
  87. // affects the return value of IsLastFrame().
  88. void InputFinished();
  89. // discard the first n frames
  90. void Pop(int32_t n) { features_.Pop(n); }
  91. private:
  92. // This function computes any additional feature frames that it is possible to
  93. // compute from 'waveform_remainder_', which at this point may contain more
  94. // than just a remainder-sized quantity (because AcceptWaveform() appends to
  95. // waveform_remainder_ before calling this function). It adds these feature
  96. // frames to features_, and shifts off any now-unneeded samples of input from
  97. // waveform_remainder_ while incrementing waveform_offset_ by the same amount.
  98. void ComputeFeatures();
  99. C computer_; // class that does the MFCC or PLP or filterbank computation
  100. FeatureWindowFunction window_function_;
  101. // features_ is the Mfcc or Plp or Fbank features that we have already
  102. // computed.
  103. RecyclingVector features_;
  104. // True if the user has called "InputFinished()"
  105. bool input_finished_;
  106. // waveform_offset_ is the number of samples of waveform that we have
  107. // already discarded, i.e. that were prior to 'waveform_remainder_'.
  108. int64_t waveform_offset_;
  109. // waveform_remainder_ is a short piece of waveform that we may need to keep
  110. // after extracting all the whole frames we can (whatever length of feature
  111. // will be required for the next phase of computation).
  112. // It is a 1-D tensor
  113. std::vector<float> waveform_remainder_;
  114. };
  115. using OnlineFbank = OnlineGenericBaseFeature<FbankComputer>;
  116. } // namespace knf
  117. #endif // KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_