#include #include #include #include #include #include #include "signalstats_wrapper.h" using json = nlohmann::json; // 将音频数据按指定时长分段 std::vector> split_audio(const std::vector& audio, int sample_rate, double segment_duration) { int samples_per_segment = static_cast(sample_rate * segment_duration); int num_segments = static_cast(std::ceil(static_cast(audio.size()) / samples_per_segment)); std::vector> segments; for (int i = 0; i < num_segments; ++i) { int start = i * samples_per_segment; int end = std::min(start + samples_per_segment, static_cast(audio.size())); segments.push_back(std::vector(audio.begin() + start, audio.begin() + end)); } return segments; } int main(int argc, char* argv[]) { if (argc != 2) { std::cerr << "Usage: " << argv[0] << " " << std::endl; return 1; } // 打开音频文件 SF_INFO sf_info; SNDFILE* file = sf_open(argv[1], SFM_READ, &sf_info); if (!file) { std::cerr << "Error: Could not open audio file: " << argv[1] << std::endl; std::cerr << "Error message: " << sf_strerror(nullptr) << std::endl; return 1; } // 读取音频数据 std::vector buffer(sf_info.frames * sf_info.channels); sf_count_t count = sf_readf_double(file, buffer.data(), sf_info.frames); sf_close(file); if (count != sf_info.frames) { std::cerr << "Error: Could not read all frames from audio file" << std::endl; return 1; } // 分离声道 std::vector> channels(sf_info.channels); for (int ch = 0; ch < sf_info.channels; ++ch) { channels[ch].resize(sf_info.frames); for (sf_count_t i = 0; i < sf_info.frames; ++i) { channels[ch][i] = buffer[i * sf_info.channels + ch]; } } // 初始化Python解释器 signalstats_wrapper::initialize(); // 按0.5秒分段检测每个声道 const double segment_duration = 0.5; // 分段时长(秒) json result; result["channels"] = json::array(); for (int ch = 0; ch < sf_info.channels; ++ch) { json channel_result; channel_result["channel"] = ch; channel_result["segments"] = json::array(); // 将声道数据分段 auto segments = split_audio(channels[ch], sf_info.samplerate, segment_duration); // 检测每个分段 for (size_t i = 0; i < segments.size(); ++i) { json segment_result; json output; signalstats::detect_signal( output, segments[i], sf_info.samplerate, 3e-3, // silence_threshold -70.0, // db_threshold -70.0, // cv_threshold {"tukey", "0.25"}, // window_params 256, // nperseg 32, // noverlap 256, // nfft false // debug ); segment_result["start_time"] = i * segment_duration; segment_result["end_time"] = std::min((i + 1) * segment_duration, static_cast(sf_info.frames) / sf_info.samplerate); segment_result["detection_result"] = output; channel_result["segments"].push_back(segment_result); } result["channels"].push_back(channel_result); } // 输出检测结果 std::cout << result.dump(2) << std::endl; signalstats_wrapper::finalize(); return 0; }