demo.cpp 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #include <iostream>
  2. #include <vector>
  3. #include <string>
  4. #include <cmath>
  5. #include <sndfile.h>
  6. #include <nlohmann/json.hpp>
  7. #include "signalstats_wrapper.h"
  8. using json = nlohmann::json;
  9. // 将音频数据按指定时长分段
  10. std::vector<std::vector<double>> split_audio(const std::vector<double>& audio, int sample_rate, double segment_duration) {
  11. int samples_per_segment = static_cast<int>(sample_rate * segment_duration);
  12. int num_segments = static_cast<int>(std::ceil(static_cast<double>(audio.size()) / samples_per_segment));
  13. std::vector<std::vector<double>> segments;
  14. for (int i = 0; i < num_segments; ++i) {
  15. int start = i * samples_per_segment;
  16. int end = std::min(start + samples_per_segment, static_cast<int>(audio.size()));
  17. segments.push_back(std::vector<double>(audio.begin() + start, audio.begin() + end));
  18. }
  19. return segments;
  20. }
  21. int main(int argc, char* argv[]) {
  22. if (argc != 2) {
  23. std::cerr << "Usage: " << argv[0] << " <audio_file>" << std::endl;
  24. return 1;
  25. }
  26. // 打开音频文件
  27. SF_INFO sf_info;
  28. SNDFILE* file = sf_open(argv[1], SFM_READ, &sf_info);
  29. if (!file) {
  30. std::cerr << "Error: Could not open audio file: " << argv[1] << std::endl;
  31. std::cerr << "Error message: " << sf_strerror(nullptr) << std::endl;
  32. return 1;
  33. }
  34. // 读取音频数据
  35. std::vector<double> buffer(sf_info.frames * sf_info.channels);
  36. sf_count_t count = sf_readf_double(file, buffer.data(), sf_info.frames);
  37. sf_close(file);
  38. if (count != sf_info.frames) {
  39. std::cerr << "Error: Could not read all frames from audio file" << std::endl;
  40. return 1;
  41. }
  42. // 分离声道
  43. std::vector<std::vector<double>> channels(sf_info.channels);
  44. for (int ch = 0; ch < sf_info.channels; ++ch) {
  45. channels[ch].resize(sf_info.frames);
  46. for (sf_count_t i = 0; i < sf_info.frames; ++i) {
  47. channels[ch][i] = buffer[i * sf_info.channels + ch];
  48. }
  49. }
  50. // 初始化Python解释器
  51. signalstats_wrapper::initialize();
  52. // 按0.5秒分段检测每个声道
  53. const double segment_duration = 0.5; // 分段时长(秒)
  54. json result;
  55. result["channels"] = json::array();
  56. for (int ch = 0; ch < sf_info.channels; ++ch) {
  57. json channel_result;
  58. channel_result["channel"] = ch;
  59. channel_result["segments"] = json::array();
  60. // 将声道数据分段
  61. auto segments = split_audio(channels[ch], sf_info.samplerate, segment_duration);
  62. // 检测每个分段
  63. for (size_t i = 0; i < segments.size(); ++i) {
  64. json segment_result;
  65. json output;
  66. signalstats_wrapper::detect_signal_wrapper(
  67. output,
  68. segments[i],
  69. sf_info.samplerate,
  70. 3e-3, // silence_threshold
  71. -70.0, // db_threshold
  72. -70.0, // cv_threshold
  73. {"tukey", "0.25"}, // window_params
  74. 256, // nperseg
  75. 32, // noverlap
  76. 256, // nfft
  77. false // debug
  78. );
  79. segment_result["start_time"] = i * segment_duration;
  80. segment_result["end_time"] = std::min((i + 1) * segment_duration,
  81. static_cast<double>(sf_info.frames) / sf_info.samplerate);
  82. segment_result["detection_result"] = output;
  83. channel_result["segments"].push_back(segment_result);
  84. }
  85. result["channels"].push_back(channel_result);
  86. }
  87. // 输出检测结果
  88. std::cout << result.dump(2) << std::endl;
  89. signalstats_wrapper::finalize();
  90. return 0;
  91. }