123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- #include <iostream>
- #include <vector>
- #include <string>
- #include <cmath>
- #include <sndfile.h>
- #include <nlohmann/json.hpp>
- #include "signalstats_wrapper.h"
- using json = nlohmann::json;
- // 将音频数据按指定时长分段
- std::vector<std::vector<double>> split_audio(const std::vector<double>& audio, int sample_rate, double segment_duration) {
- int samples_per_segment = static_cast<int>(sample_rate * segment_duration);
- int num_segments = static_cast<int>(std::ceil(static_cast<double>(audio.size()) / samples_per_segment));
- std::vector<std::vector<double>> segments;
- for (int i = 0; i < num_segments; ++i) {
- int start = i * samples_per_segment;
- int end = std::min(start + samples_per_segment, static_cast<int>(audio.size()));
- segments.push_back(std::vector<double>(audio.begin() + start, audio.begin() + end));
- }
- return segments;
- }
- int main(int argc, char* argv[]) {
- if (argc != 2) {
- std::cerr << "Usage: " << argv[0] << " <audio_file>" << std::endl;
- return 1;
- }
- // 打开音频文件
- SF_INFO sf_info;
- SNDFILE* file = sf_open(argv[1], SFM_READ, &sf_info);
- if (!file) {
- std::cerr << "Error: Could not open audio file: " << argv[1] << std::endl;
- std::cerr << "Error message: " << sf_strerror(nullptr) << std::endl;
- return 1;
- }
- // 读取音频数据
- std::vector<double> buffer(sf_info.frames * sf_info.channels);
- sf_count_t count = sf_readf_double(file, buffer.data(), sf_info.frames);
- sf_close(file);
- if (count != sf_info.frames) {
- std::cerr << "Error: Could not read all frames from audio file" << std::endl;
- return 1;
- }
- // 分离声道
- std::vector<std::vector<double>> channels(sf_info.channels);
- for (int ch = 0; ch < sf_info.channels; ++ch) {
- channels[ch].resize(sf_info.frames);
- for (sf_count_t i = 0; i < sf_info.frames; ++i) {
- channels[ch][i] = buffer[i * sf_info.channels + ch];
- }
- }
- // 初始化Python解释器
- signalstats_wrapper::initialize();
- // 按0.5秒分段检测每个声道
- const double segment_duration = 0.5; // 分段时长(秒)
- json result;
- result["channels"] = json::array();
- for (int ch = 0; ch < sf_info.channels; ++ch) {
- json channel_result;
- channel_result["channel"] = ch;
- channel_result["segments"] = json::array();
- // 将声道数据分段
- auto segments = split_audio(channels[ch], sf_info.samplerate, segment_duration);
- // 检测每个分段
- for (size_t i = 0; i < segments.size(); ++i) {
- json segment_result;
- json output;
- signalstats_wrapper::detect_signal_wrapper(
- output,
- segments[i],
- sf_info.samplerate,
- 3e-3, // silence_threshold
- -70.0, // db_threshold
- -70.0, // cv_threshold
- {"tukey", "0.25"}, // window_params
- 256, // nperseg
- 32, // noverlap
- 256, // nfft
- false // debug
- );
- segment_result["start_time"] = i * segment_duration;
- segment_result["end_time"] = std::min((i + 1) * segment_duration,
- static_cast<double>(sf_info.frames) / sf_info.samplerate);
- segment_result["detection_result"] = output;
- channel_result["segments"].push_back(segment_result);
- }
- result["channels"].push_back(channel_result);
- }
- // 输出检测结果
- std::cout << result.dump(2) << std::endl;
-
- signalstats_wrapper::finalize();
- return 0;
- }
|