common.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. // Various helper functions and utilities
  2. #pragma once
  3. #include <string>
  4. #include <map>
  5. #include <vector>
  6. #include <random>
  7. #include <thread>
  8. #define COMMON_SAMPLE_RATE 16000
  9. //
  10. // GPT CLI argument parsing
  11. //
  12. struct gpt_params {
  13. int32_t seed = -1; // RNG seed
  14. int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
  15. int32_t n_predict = 200; // new tokens to predict
  16. int32_t n_batch = 8; // batch size for prompt processing
  17. // sampling parameters
  18. int32_t top_k = 40;
  19. float top_p = 0.9f;
  20. float temp = 0.9f;
  21. int32_t repeat_last_n = 64;
  22. float repeat_penalty = 1.00f;
  23. std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path
  24. std::string prompt = "";
  25. std::string token_test = "";
  26. bool interactive = false;
  27. int32_t interactive_port = -1;
  28. int32_t n_gpu_layers = 0;
  29. };
  30. bool unity_params_parse(int argc, char ** argv, unity_params & params);
  31. bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
  32. void unity_print_usage(int /*argc*/, char ** argv, const unity_params & params);
  33. void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
  34. std::string gpt_random_prompt(std::mt19937 & rng);
  35. //
  36. // Vocab utils
  37. //
  38. std::string trim(const std::string & s);
  39. std::string replace(
  40. const std::string & s,
  41. const std::string & from,
  42. const std::string & to);
  43. struct gpt_vocab {
  44. using id = int32_t;
  45. using token = std::string;
  46. std::map<token, id> token_to_id;
  47. std::map<id, token> id_to_token;
  48. std::vector<std::string> special_tokens;
  49. void add_special_token(const std::string & token);
  50. };
  51. // poor-man's JSON parsing
  52. std::map<std::string, int32_t> json_parse(const std::string & fname);
  53. std::string convert_to_utf8(const std::wstring & input);
  54. std::wstring convert_to_wstring(const std::string & input);
  55. void gpt_split_words(std::string str, std::vector<std::string>& words);
  56. // split text into tokens
  57. //
  58. // ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53
  59. //
  60. // Regex (Python):
  61. // r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
  62. //
  63. // Regex (C++):
  64. // R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)"
  65. //
  66. std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text);
  67. // test outputs of gpt_tokenize
  68. //
  69. // - compare with tokens generated by the huggingface tokenizer
  70. // - test cases are chosen based on the model's main language (under 'prompt' directory)
  71. // - if all sentences are tokenized identically, print 'All tests passed.'
  72. // - otherwise, print sentence, huggingface tokens, ggml tokens
  73. //
  74. void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test);
  75. // load the tokens from encoder.json
  76. bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab);
  77. // sample next token given probabilities for each embedding
  78. //
  79. // - consider only the top K tokens
  80. // - from them, consider only the top tokens with cumulative probability > P
  81. //
  82. // TODO: not sure if this implementation is correct
  83. // TODO: temperature is not implemented
  84. //
  85. gpt_vocab::id gpt_sample_top_k_top_p(
  86. const gpt_vocab & vocab,
  87. const float * logits,
  88. int top_k,
  89. double top_p,
  90. double temp,
  91. std::mt19937 & rng);
  92. gpt_vocab::id gpt_sample_top_k_top_p_repeat(
  93. const gpt_vocab & vocab,
  94. const float * logits,
  95. const int32_t * last_n_tokens_data,
  96. size_t last_n_tokens_data_size,
  97. int top_k,
  98. double top_p,
  99. double temp,
  100. int repeat_last_n,
  101. float repeat_penalty,
  102. std::mt19937 & rng);
  103. //
  104. // Audio utils
  105. //
  106. // Read WAV audio file and store the PCM data into pcmf32
  107. // The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
  108. // If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
  109. bool read_wav(
  110. const std::string & fname,
  111. std::vector<float> & pcmf32,
  112. std::vector<std::vector<float>> & pcmf32s,
  113. bool stereo);
  114. // Apply a high-pass frequency filter to PCM audio
  115. // Suppresses frequencies below cutoff Hz
  116. void high_pass_filter(
  117. std::vector<float> & data,
  118. float cutoff,
  119. float sample_rate);
  120. // Basic voice activity detection (VAD) using audio energy adaptive threshold
  121. bool vad_simple(
  122. std::vector<float> & pcmf32,
  123. int sample_rate,
  124. int last_ms,
  125. float vad_thold,
  126. float freq_thold,
  127. bool verbose);
  128. // compute similarity between two strings using Levenshtein distance
  129. float similarity(const std::string & s0, const std::string & s1);
  130. //
  131. // SAM argument parsing
  132. //
  133. struct sam_params {
  134. int32_t seed = -1; // RNG seed
  135. int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
  136. std::string model = "models/sam-vit-b/ggml-model-f16.bin"; // model path
  137. std::string fname_inp = "img.jpg";
  138. std::string fname_out = "img.out";
  139. };
  140. bool sam_params_parse(int argc, char ** argv, sam_params & params);
  141. void sam_print_usage(int argc, char ** argv, const sam_params & params);