/* C++ program that uses the FFTW library (and its MFFM C++ wrapper) to analyse an AIFF audio file. It produces a Csound orchestra file (for resynthesis). It uses the short-time Fourier transform (STFT) with a 4-term Blackman-Harris time window (but you can change that). Only opens AIFF files, but any samplerate and any number of channels. Analyses only the FIRST channel. Phase information from the STFT analysis is thrown away, only amplitudes are used. Creates a Csound file, with starttimes, durations and linear amplitudes. Before compilation, make sure following packages are installed: libfftw3 libfftw3-dev (libfftw3-doc Just handy to have the documentaion on your own machine) mffm-fftw-dev mffm-fftw1 Type 'make' on the commandline to compile (or see the Makefile). Examples to run: ./fftw2csound (prints usage) ./fftw2csound 2000 500 4096 sound13.aiff COPYLEFT 2020 2021 Schreck Ensemble */ #include // For writing a text outputfile. #include #include // For std::setprecision and such. #include // For cos and atan and such. #include // For atoi / to parse numerical arguments. #include // For strlen and strcpy. using namespace std; #include // FFTW C++ wrapper (system-wide installed lib). #include "rdaiff.hpp" // To read aiff audiofiles (included in source dir). //------------------------------------------------------------------------------ // Appends a single line, 'i1 bla bla etcetera', to the Csound output score. // Hack this function to apply time-stretching, pitch-shifting, or a combi- // nation of both (with perhaps other tricks) during resynthesis in Csound. static void csoundline(ofstream& of, // Reference to output file. double starttime, double duration, double frequency, double amplitude) { // Skip all resynthesis-wavelets below tresholds: if ((amplitude > 30.0) && (frequency > 30.0)) of << "i1 " // p1 << starttime << " " // p2 Multiplying times and durations << duration << " " // p3 gives time-stretching/compression. << frequency << " " // p4 Multiplying frq gives pitch-shift. << amplitude << '\n'; // p5 } //------------------------------------------------------------------------------ int main(int argc, // Number of arguments (first is name of program). char** argv) // Array of C-strings. { const char* usage = "Usage: fftw2csound \n\ where is the input time window size in samples, not bound to a power of 2.\n\ is the hop size, should be about 1/5th (or less) than the window size\n\ when using a Blackman-Harris shaped window.\n\ Ratio WINS : HOPS determines the overlap.\n\ is the FFT input size. It may be much larger than the window size\n\ (to increase resolution), but not smaller. Not bound to powers of 2.\n\ is the pathname of the input AIFF file to be anaysed.\n\ Only the first channel of a multichannel file is read.\n\n\ Version 0.03 (november 17, 2021) - Pieter Suurmond.\n"; int e = 0; // Error return code to the operating system. int wins; // Window size. int hops; int ffts; if (argc != 5) { cout << usage; return 1; } wins = atoi(argv[1]); if (wins < 4) { cout << "First argument should at least be 4!\n" << usage; return 1; } hops = atoi(argv[2]); if (hops < 1) { cout << "Second argument should at least be 1!\n" << usage; return 1; } ffts = atoi(argv[3]); if (ffts < wins) { cout << "Third argument may not be less than !\n" << usage; return 1; } if ((4 * hops) > wins) cout << "\ WARNING: should be about one fourth of , or less. Otherwise,\n\ input information may get skipped over. Decrease or increase to\n\ prevent this warning message.\n"; // But continue, analyze this way. try { // Try to open the specified audiofile and setup reading buffers: RDAIFF af(argv[4]); // 4th arg is pathname of audiofile. float chanbuf[af.channels()]; // Single sampleframe, all channels. float circbuf[wins]; // Many samples, but monophonic. for (int i = 0; i < wins; i++) // Clear buffer before using it. circbuf[i] = 0.0; // Pre-calculate a Blackman-Harris window with the specified size: float window[wins]; double f = 8.0*atan(1.0) / (double)(wins-1); // -1 for center/symmetry. for (int i = 0; i < wins; i++) { double fi = f * (double)i; window[i] = 0.35875 - 0.48829 * cos(fi) + 0.14128 * cos(fi * 2.0) - 0.01168 * cos(fi * 3.0); //cout << i << ' ' << window[i] << '\n'; Seems OK for odd and even. } // Setup FFT buffers. (See perhaps /usr/share/doc/mffm-fftw-dev/) realFFTData fftData(ffts); // Allocate FFT-memory internally. realFFT rfft(&fftData); // Create output (CSound) textfile: int len = strlen(af.name()); char fname[len + 5]; // 4 extra chars + a trailing 0. strcpy(fname, af.name()); strcpy(fname + len, ".sco"); // Append suffix: Csound score. ofstream of(fname); of << setprecision(6) << fixed << // Fixed floating point notation "; Created by fftw2csound.\n\n"; // with 6 decimals. // Constants used during output-mapping to Csound: double duration = (double)wins / af.samplerate(); // In seconds. double basefreq = af.samplerate() / (double)ffts; // In hertz. double amplific = 400000.0 / (double)ffts; // Two counters (1 would be better: todo?): long long samplecount = 0; int idx = 0; // Index within circular buffer array. while (af.frames_togo() >= hops) // Skips last input chunk when incomplete! { // Also skips the final X sanalysis-frames! // Feed the circular buffer with incoming new samples: for (int i = 0; i < hops; i++) { af.read(chanbuf, 1); // Only 1 frame per call for simplicity. circbuf[(idx + i) % wins] = chanbuf[0]; // Select first channel! } idx = (idx + hops) % wins; // Because both HOPS and WINS are not necessarily // powers of 2, we have to reduce idx before it // wraps by some power of 2^8 (16, 32... bits)! // Copy whole circular buffer, windowed!, to the FFT input buffer: for (int i = 0; i < wins; i++) fftData.in[i] = window[i] * circbuf[(idx + i) % wins]; for (int i = wins; i < ffts; i++) // Append zeros if necessary. fftData.in[i] = 0.0; // Forward Fourier transform using the FFTW library, via MFFM: // See /usr/share/doc/mffm-fftw-dev/examples/realFFTExample.cc. rfft.fwdTransform(); // Knows its own in- and output-data! fftData.compPowerSpec(); // Compute squared values! fftData.sqrtPowerSpec(); // Convert to linear values. // Write to output file: only use magnitudes, throw away phases. // First and last bin will be accessed via i-1 and i+1. double start = samplecount / af.samplerate(); for (int i = 1; i < (ffts+1)/2; i++) { // Find spectral peaks: if ((fftData.power_spectrum[i-1] <= fftData.power_spectrum[i]) && (fftData.power_spectrum[i+1] <= fftData.power_spectrum[i])) { // Use parabolic interpolation to guess the maximum: double a2 = fftData.power_spectrum[i-1] -2.0*fftData.power_spectrum[i] + fftData.power_spectrum[i+1]; double b2 = fftData.power_spectrum[i-1] - fftData.power_spectrum[i+1]; double x = 0.0; // When all 3 bins equal: take the middle one. if (a2 != 0.0) x = 0.5 * b2 / a2; double y = fftData.power_spectrum[i] -0.25 * b2 * x; x += (double)i; csoundline(of, start, duration, // Secs. basefreq * x, // Freq. amplific * y); // Ampl. } } of << '\n'; samplecount += hops; } of.close(); // Close Csound output file. af.close(); // One may forget: gets called automatically by destructor. cout << "Done. Written file " << fname << '\n'; } catch (const char* err_txt) // Catch and display RDAIFF-errors. { cout << err_txt << '\n'; e = 1; } return e; }