portaudio, fftw, libsndfile
(編集中)
昔作ったHRTFヘッドホンにバグが発見されたようなので、デバッグがてら公開予定。久しぶりにPCオーディオの世界を調べてみると、進歩していないようで意外と進んでいる。32bit-floatオーディオフォーマットが標準で使われるようになるといいなぁ。
portaudio
http://www.portaudio.com/download.html pa_stable_v19_20111121.tgz
インストール方法 http://terurou.hateblo.jp/entry/2013/07/29/080512
openFrameworksで標準のRtAudioを使わない理由は、WindowsでWASAPIが使えないからです。
libsndfile
http://www.mega-nerd.com/libsndfile/
libsndfile-1.0.25-w64-setup.exe
使い方 http://r9y9.hatenablog.com/entry/2012/05/13/192453
fftw
http://www.fftw.org/install/windows.html fftw-3.3-libs-visual-studio-2010.zip
tar.gzのソースを解凍してその中にfftw-3.3-libsを移動。slnファイルを開き、各プロジェクトのプロパティを編集し、”すべての構成” でWindows7.1SDKの代わりにVisual Studio 2012を指定する。
ソリューションエクスプローラで、benchとbenchfのlibbench2のaligned-main.cを削除して、ビルド。benchのとり方は、2^n乗のnを引数にして、benchf.exe 16など。
# 使ってみたい。 1 Stockham FFT 入門
[参考文献]
松下耕二郎, "信号処理のためのプログラミング入門 ~オーディオファイルの読み込み・書き込みからリアルタイム信号処理まで~"
ただし、この本はあまり出来が良くないので、無理に買う必要はありません。
ASIO4ALL
WASAPI
HRTFデータベース
MIT KEMAR
http://sound.media.mit.edu/resources/KEMAR.html
9 Degrees of Freedom - Razor IMU
https://www.sparkfun.com/products/10736
ハニング窓
音信号を不連続につなぐと、高い周波数成分を含むことになりますので非常に耳障りなノイズが発生します。それを防ぐために、滑らかにつなぐ工夫が必要です。
void HanningWindow(float* window, int size) {
for(int i=0; i<size; i++){
window[i] = 0.5f - 0.5f*cosf(2.0f * PI * static_cast<float>(i) / size);
}
}
畳み込み演算
もしかしたら便利なライブラリで計算できるようになっているかも知れませんが、お勉強のために実装例を載せます。
/*! 音源とインパルス応答の畳み込みの処理クラス
* @file ConvolutionProcess.cpp
* @author Yoshihiro Sato
* @date 2011.06.08
* @brief Signal Processing Class for a source and an impluse convolutioin
*/
#include "define.h"
#include "ConvolutionProcess.h"
#include <windows.h>
#include <malloc.h>
#include <memory.h>
#include <iostream>
using namespace std;
#include <xmmintrin.h> // SSE
#include <pmmintrin.h> // SSE3
//#include <omp.h>//!
extern voidHanningWindow(float* window, int size);
/*! コンストラクタ */
ConvolutionProcess::ConvolutionProcess(const int ch) : nChannels(ch) {
outputBuffer = NULL;
window_function = NULL;
init();
}
/*! デストラクタ */
ConvolutionProcess::~ConvolutionProcess() {
_aligned_free(outputBuffer);
for(int i=LASTx3; i<NEXT; i++){
for(int ch=0; ch<nChannels; ch++){
_aligned_free(delayline[i][ch]);
}
free(delayline[i]);
}
free(delayline);
}
/*! メモリの初期化 */
void ConvolutionProcess::init() {
// 出力用バッファ
outputBuffer = (float*)_aligned_malloc(nFrameLength*nChannels*sizeof(float), 16);
if(!outputBuffer){
getchar();
exit(3);
}
memset(outputBuffer, 0, nFrameLength*nChannels*sizeof(float));
// ディレイライン計算用バッファ
delayline = (float***)malloc(5*sizeof(float**));
for(int i=LASTx3; i<=NEXT; i++){
delayline[i] = (float**)malloc(nChannels*sizeof(float*));
if(!delayline[i]){
getchar();
exit(3);
}
for(int ch=0; ch<nChannels; ch++){
delayline[i][ch] = (float*)_aligned_malloc(2*nFrameLength*sizeof(float), 16);
if(!delayline[i][ch]){
getchar();
exit(3);
}
memset(delayline[i][ch], 0, 2*nFrameLength*sizeof(float));
}
}
// 音源保存用バッファ
source_buffer = (float*)_aligned_malloc(nFrameLength*sizeof(float), 16);
if(!source_buffer){
getchar();
exit(3);
}
memset(source_buffer, 0, nFrameLength*sizeof(float));
// FFT
fft_source_current = new FFT*[nChannels];
fft_source_next = new FFT*[nChannels];
for(int ch=0; ch<nChannels; ch++){
fft_source_current[ch] = new FFT();
fft_source_current[ch]->setScaling(1);
fft_source_next[ch] = new FFT();
fft_source_next[ch]->setScaling(1);
spectrum_current[ch] = fft_source_current[ch]->Spectrum();
spectrum_next[ch] = fft_source_next[ch]->Spectrum();
timesignal_current[ch] = fft_source_current[ch]->TimeSignal();
timesignal_next[ch] = fft_source_next[ch]->TimeSignal();
}
// 畳み込み計算結果保存用バッファ
convolution_current = (fftwf_complex**)malloc(nChannels*sizeof(fftwf_complex*));
for(int ch=0; ch<nChannels; ch++){
convolution_current[ch] = (fftwf_complex*)fftwf_malloc(2*nFrameLength*sizeof(fftwf_complex));
if(!convolution_current[ch]){
getchar();
exit(3);
}
memset(convolution_current[ch], 0, 2*nFrameLength*sizeof(fftwf_complex));
}
convolution_next = (fftwf_complex**)malloc(nChannels*sizeof(fftwf_complex*));
for(int ch=0; ch<nChannels; ch++){
convolution_next[ch] = (fftwf_complex*)fftwf_malloc(2*nFrameLength*sizeof(fftwf_complex));
if(!convolution_next[ch]){
getchar();
exit(3);
}
memset(convolution_next[ch], 0, 2*nFrameLength*sizeof(fftwf_complex));
}
// 窓関数
window_function = (float*)_aligned_malloc(2*nFrameLength*sizeof(float),16);
if(!window_function){
getchar();
exit(3);
}
memset(window_function, 0, 2*nFrameLength*sizeof(float));
HanningWindow(window_function, nFrameLength);//KBDWindow(window_function, nFrameLength, 4.0);
}
void ConvolutionProcess::flush() {
memset(outputBuffer, 0, 2*nFrameLength*sizeof(float));
memset(source_buffer, 0, nFrameLength*sizeof(float));
for(int i=LASTx3; i<=NEXT; i++){
for(int ch=0; ch<nChannels; ch++){
memset(delayline[i][ch], 0, 2*nFrameLength*sizeof(float));
}
}
}
void ConvolutionProcess::execute() {
// #pragma omp parallel for // 無い方が速い
for(int ch=0; ch<nChannels; ch++){
// ディレイラインに音源を流し込む
#ifndef _USE_SSE // 0.00002ぐらい
//current bufferは前半分が前回読み込んだデータの残り、後ろ半分が次フレームの前半を格納
for(int i=0; i<nFrameLength/2; i++){
delayline[NEXT][ch][i] = source[i]*window_function[i];
delayline[CURRENT][ch][i] = source_buffer[i+nFrameLength/2]*window_function[i];//前半部
}
for(int i=nFrameLength/2; i<nFrameqLength; i++){
delayline[NEXT][ch][i] = source[i]*window_function[i];
delayline[CURRENT][ch][i] = source[i-nFrameLength/2]*window_function[i];//後半部
}
#else
//current bufferは前半分が前回読み込んだデータの残り、後ろ半分が次フレームの前半を格納
__m128 a;
__m128 b;
for(int i=0; i<(nFrameLength>>1); i+=16){
// NEXT前半
a = _mm_load_ps(&source[i]);
b = _mm_load_ps(&window_function[i]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[NEXT][ch][i], a);
a = _mm_load_ps(&source[i+4]);
b = _mm_load_ps(&window_function[i+4]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[NEXT][ch][i+4], a);
a = _mm_load_ps(&source[i+8]);
b = _mm_load_ps(&window_function[i+8]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[NEXT][ch][i+8], a);
a = _mm_load_ps(&source[i+12]);
b = _mm_load_ps(&window_function[i+12]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[NEXT][ch][i+12], a);
// CURRENT前半
a = _mm_load_ps(&source_buffer[i+(nFrameLength>>1)]);
b = _mm_load_ps(&window_function[i]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[CURRENT][ch][i], a);
a = _mm_load_ps(&source_buffer[i+4+(nFrameLength>>1)]);
b = _mm_load_ps(&window_function[i+4]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[CURRENT][ch][i+4], a);
a = _mm_load_ps(&source_buffer[i+8+(nFrameLength>>1)]);
b = _mm_load_ps(&window_function[i+8]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[CURRENT][ch][i+8], a);
a = _mm_load_ps(&source_buffer[i+12+(nFrameLength>>1)]);
b = _mm_load_ps(&window_function[i+12]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[CURRENT][ch][i+12], a);
}
for(int i=(nFrameLength>>1); i<nFrameLength; i+=16){
// NEXT後半
a = _mm_load_ps(&source[i]);
b = _mm_load_ps(&window_function[i]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[NEXT][ch][i], a);
a = _mm_load_ps(&source[i+4]);
b = _mm_load_ps(&window_function[i+4]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[NEXT][ch][i+4], a);
a = _mm_load_ps(&source[i+8]);
b = _mm_load_ps(&window_function[i+8]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[NEXT][ch][i+8], a);
a = _mm_load_ps(&source[i+12]);
b = _mm_load_ps(&window_function[i+12]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[NEXT][ch][i+12], a);
// CURRENT後半
a = _mm_load_ps(&source[i-(nFrameLength>>1)]);
b = _mm_load_ps(&window_function[i]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[CURRENT][ch][i], a);
a = _mm_load_ps(&source[i+4-(nFrameLength>>1)]);
b = _mm_load_ps(&window_function[i+4]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[CURRENT][ch][i+4], a);
a = _mm_load_ps(&source[i+8-(nFrameLength>>1)]);
b = _mm_load_ps(&window_function[i+8]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[CURRENT][ch][i+8], a);
a = _mm_load_ps(&source[i+12-(nFrameLength>>1)]);
b = _mm_load_ps(&window_function[i+12]);
a = _mm_mul_ps(a, b);
_mm_store_ps(&delayline[CURRENT][ch][i+12], a);
}
#endif
#ifdef ___USE_TIMER
LARGE_INTEGER freq;
LARGE_INTEGER begin;
LARGE_INTEGER end;
QueryPerformanceFrequency( &freq );
QueryPerformanceCounter( &begin );
#endif
// 畳み込み
switch(*p_spectrum_type) {
case 0://方向に応じたHRTFデータの畳み込み
// フーリエ変換
fft_source_current[ch]->forward(delayline[CURRENT][ch], nFrameLength);
fft_source_next[ch]->forward(delayline[NEXT][ch], nFrameLength);
//畳み込み
convolution(convolution_current[ch], spectrum_current[ch], *p_spectrum_impulse[ch], nFrameLength);
convolution(convolution_next[ch], spectrum_next[ch], *p_spectrum_impulse[ch], nFrameLength);
// フーリエ逆変換
fft_source_current[ch]->inverse(convolution_current[ch], nFrameLength);
fft_source_next[ch]->inverse(convolution_next[ch], nFrameLength);
break;
case 1: //残響成分の畳み込み
// フーリエ変換
fft_source_current[ch]->forward(delayline[CURRENT][ch], nFrameLength);
fft_source_next[ch]->forward(delayline[NEXT][ch], nFrameLength);
//畳み込み(残響成分スペクトラムは外部から更新)
convolution(convolution_current[ch], spectrum_current[ch], spectrum_reverberation, nFrameLength);
convolution(convolution_next[ch], spectrum_next[ch], spectrum_reverberation, nFrameLength);
// フーリエ逆変換
fft_source_current[ch]->inverse(convolution_current[ch], nFrameLength);
fft_source_next[ch]->inverse(convolution_next[ch], nFrameLength);
break;
case 2: //畳み込み処理をしない
//time_signalにdelayLineからそのままコピー
memcpy( (float*)timesignal_current[ch], delayline[CURRENT][ch], sizeof(float)*nFrameLength);
memcpy( (float*)timesignal_next[ch], delayline[CURRENT][ch], sizeof(float)*nFrameLength);
break;
}
#ifdef ___USE_TIMER // 0.0008ぐらい -> 0.0002ぐらい
QueryPerformanceCounter( &end );
printf( "%f\n", ( double )( end.QuadPart - begin.QuadPart ) / freq.QuadPart );
#endif
}
#ifndef _USE_SSE // 0.00005ぐらい
for(int ch=0; ch<nChannels; ch++){
//残響成分
for(int i=0; i<nFrameLength/2; i++){
outputBuffer[i*nChannels+ch] = timesignal_current[ch][i]
+delayline[LASTx3][ch][i+nFrameLength*3/2]
+delayline[LASTx2][ch][i+nFrameLength]
+delayline[LAST][ch][i+nFrameLength/2];
}
for(int i=nFrameLength/2; i<nFrameLength; i++){
outputBuffer[i*nChannels+ch] = timesignal_current[ch][i]
+delayline[LASTx2][ch][i+nFrameLength]
+delayline[LAST][ch][i+nFrameLength/2]
+timesignal_next[ch][i-nFrameLength/2];
}
}
#else // 0.00003ぐらい
if(nChannels == 1){
__m128 a;
__m128 b;
__m128 c;
__m128 d;
for(int i=0; i<(nFrameLength>>1); i+=16){
a = _mm_load_ps(×ignal_current[0][i]);
b = _mm_load_ps(&delayline[LASTx3][0][i+(nFrameLength*3>>1)]);
c = _mm_load_ps(&delayline[LASTx2][0][i+nFrameLength]);
d = _mm_load_ps(&delayline[LAST][0][i+(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
_mm_store_ps(&outputBuffer[i], a);
a = _mm_load_ps(×ignal_current[0][i+4]);
b = _mm_load_ps(&delayline[LASTx3][0][i+4+(nFrameLength*3>>1)]);
c = _mm_load_ps(&delayline[LASTx2][0][i+4+nFrameLength]);
d = _mm_load_ps(&delayline[LAST][0][i+4+(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
_mm_store_ps(&outputBuffer[i+4], a);
a = _mm_load_ps(×ignal_current[0][i+8]);
b = _mm_load_ps(&delayline[LASTx3][0][i+8+(nFrameLength*3>>1)]);
c = _mm_load_ps(&delayline[LASTx2][0][i+8+nFrameLength]);
d = _mm_load_ps(&delayline[LAST][0][i+8+(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
_mm_store_ps(&outputBuffer[i+8], a);
a = _mm_load_ps(×ignal_current[0][i+12]);
b = _mm_load_ps(&delayline[LASTx3][0][i+12+(nFrameLength*3>>1)]);
c = _mm_load_ps(&delayline[LASTx2][0][i+12+nFrameLength]);
d = _mm_load_ps(&delayline[LAST][0][i+12+(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
_mm_store_ps(&outputBuffer[i+12], a);
}
for(int i=nFrameLength/2; i<nFrameLength; i+=16){
a = _mm_load_ps(×ignal_current[0][i]);
b = _mm_load_ps(&delayline[LASTx2][0][i+nFrameLength]);
c = _mm_load_ps(&delayline[LAST][0][i+(nFrameLength>>1)]);
d = _mm_load_ps(×ignal_next[0][i-(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
_mm_store_ps(&outputBuffer[i], a);
a = _mm_load_ps(×ignal_current[0][i+4]);
b = _mm_load_ps(&delayline[LASTx2][0][i+4+nFrameLength]);
c = _mm_load_ps(&delayline[LAST][0][i+4+(nFrameLength>>1)]);
d = _mm_load_ps(×ignal_next[0][i+4-(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
_mm_store_ps(&outputBuffer[i+4], a);
a = _mm_load_ps(×ignal_current[0][i+8]);
b = _mm_load_ps(&delayline[LASTx2][0][i+8+nFrameLength]);
c = _mm_load_ps(&delayline[LAST][0][i+8+(nFrameLength>>1)]);
d = _mm_load_ps(×ignal_next[0][i+8-(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
_mm_store_ps(&outputBuffer[i+8], a);
a = _mm_load_ps(×ignal_current[0][i+12]);
b = _mm_load_ps(&delayline[LASTx2][0][i+12+nFrameLength]);
c = _mm_load_ps(&delayline[LAST][0][i+12+(nFrameLength>>1)]);
d = _mm_load_ps(×ignal_next[0][i+12-(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
_mm_store_ps(&outputBuffer[i+12], a);
}
}
if(nChannels == 2){// SSE: 2channelに限定すれば書ける
__m128 a;
__m128 b;
__m128 c;
__m128 d;
__m128 e;
__m128 f;
__m128 g;
__m128 h;
__m128 l;
__m128 m;
for(int i=0; i<(nFrameLength>>1); i+=16){
// 0
a = _mm_load_ps(×ignal_current[0][i]);
b = _mm_load_ps(&delayline[LASTx3][0][i+(nFrameLength*3>>1)]);
c = _mm_load_ps(&delayline[LASTx2][0][i+nFrameLength]);
d = _mm_load_ps(&delayline[LAST][0][i+(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
e = _mm_load_ps(×ignal_current[1][i]);
f = _mm_load_ps(&delayline[LASTx3][1][i+(nFrameLength*3>>1)]);
g = _mm_load_ps(&delayline[LASTx2][1][i+nFrameLength]);
h = _mm_load_ps(&delayline[LAST][1][i+(nFrameLength>>1)]);
e = _mm_add_ps(e,f);
g = _mm_add_ps(g,h);
e = _mm_add_ps(e,h);
l = _mm_unpacklo_ps(a,e);
m = _mm_unpackhi_ps(a,e);
_mm_store_ps(&outputBuffer[(i<<1)+0], l);
_mm_store_ps(&outputBuffer[(i<<1)+4], m);
// 1
a = _mm_load_ps(×ignal_current[0][i+4]);
b = _mm_load_ps(&delayline[LASTx3][0][i+4+(nFrameLength*3>>1)]);
c = _mm_load_ps(&delayline[LASTx2][0][i+4+nFrameLength]);
d = _mm_load_ps(&delayline[LAST][0][i+4+(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
e = _mm_load_ps(×ignal_current[1][i+4]);
f = _mm_load_ps(&delayline[LASTx3][1][i+4+(nFrameLength*3>>1)]);
g = _mm_load_ps(&delayline[LASTx2][1][i+4+nFrameLength]);
h = _mm_load_ps(&delayline[LAST][1][i+4+(nFrameLength>>1)]);
e = _mm_add_ps(e,f);
g = _mm_add_ps(g,h);
e = _mm_add_ps(e,h);
l = _mm_unpacklo_ps(a,e);
m = _mm_unpackhi_ps(a,e);
_mm_store_ps(&outputBuffer[( (i+4)<<1)+0], l);
_mm_store_ps(&outputBuffer[( (i+4)<<1)+4], m);
// 2
a = _mm_load_ps(×ignal_current[0][i+8]);
b = _mm_load_ps(&delayline[LASTx3][0][i+8+(nFrameLength*3>>1)]);
c = _mm_load_ps(&delayline[LASTx2][0][i+8+nFrameLength]);
d = _mm_load_ps(&delayline[LAST][0][i+8+(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
e = _mm_load_ps(×ignal_current[1][i+8]);
f = _mm_load_ps(&delayline[LASTx3][1][i+8+(nFrameLength*3>>1)]);
g = _mm_load_ps(&delayline[LASTx2][1][i+8+nFrameLength]);
h = _mm_load_ps(&delayline[LAST][1][i+8+(nFrameLength>>1)]);
e = _mm_add_ps(e,f);
g = _mm_add_ps(g,h);
e = _mm_add_ps(e,h);
l = _mm_unpacklo_ps(a,e);
m = _mm_unpackhi_ps(a,e);
_mm_store_ps(&outputBuffer[( (i+8)<<1)+0], l);
_mm_store_ps(&outputBuffer[( (i+8)<<1)+4], m);
// 3
a = _mm_load_ps(×ignal_current[0][i+12]);
b = _mm_load_ps(&delayline[LASTx3][0][i+12+(nFrameLength*3>>1)]);
c = _mm_load_ps(&delayline[LASTx2][0][i+12+nFrameLength]);
d = _mm_load_ps(&delayline[LAST][0][i+12+(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
e = _mm_load_ps(×ignal_current[1][i+12]);
f = _mm_load_ps(&delayline[LASTx3][1][i+12+(nFrameLength*3>>1)]);
g = _mm_load_ps(&delayline[LASTx2][1][i+12+nFrameLength]);
h = _mm_load_ps(&delayline[LAST][1][i+12+(nFrameLength>>1)]);
e = _mm_add_ps(e,f);
g = _mm_add_ps(g,h);
e = _mm_add_ps(e,h);
l = _mm_unpacklo_ps(a,e);
m = _mm_unpackhi_ps(a,e);
_mm_store_ps(&outputBuffer[( (i+12)<<1)+0], l);
_mm_store_ps(&outputBuffer[( (i+12)<<1)+4], m);
}
for(int i=(nFrameLength>>1); i<nFrameLength; i+=16){
// 0
a = _mm_load_ps(×ignal_current[0][i]);
b = _mm_load_ps(&delayline[LASTx2][0][i+nFrameLength]);
c = _mm_load_ps(&delayline[LAST][0][i+(nFrameLength>>1)]);
d = _mm_load_ps(×ignal_next[0][i-(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
e = _mm_load_ps(×ignal_current[1][i]);
f = _mm_load_ps(&delayline[LASTx2][1][i+nFrameLength]);
g = _mm_load_ps(&delayline[LAST][1][i+(nFrameLength>>1)]);
h = _mm_load_ps(×ignal_next[1][i-(nFrameLength>>1)]);
e = _mm_add_ps(e,f);
g = _mm_add_ps(g,h);
e = _mm_add_ps(e,h);
l = _mm_unpacklo_ps(a,e);
m = _mm_unpackhi_ps(a,e);
_mm_store_ps(&outputBuffer[(i<<1)+0], l);
_mm_store_ps(&outputBuffer[(i<<1)+4], m);
// 1
a = _mm_load_ps(×ignal_current[0][i+4]);
b = _mm_load_ps(&delayline[LASTx2][0][i+4+nFrameLength]);
c = _mm_load_ps(&delayline[LAST][0][i+4+(nFrameLength>>1)]);
d = _mm_load_ps(×ignal_next[0][i+4-(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
e = _mm_load_ps(×ignal_current[1][i+4]);
f = _mm_load_ps(&delayline[LASTx2][1][i+4+nFrameLength]);
g = _mm_load_ps(&delayline[LAST][1][i+4+(nFrameLength>>1)]);
h = _mm_load_ps(×ignal_next[1][i+4-(nFrameLength>>1)]);
e = _mm_add_ps(e,f);
g = _mm_add_ps(g,h);
e = _mm_add_ps(e,h);
l = _mm_unpacklo_ps(a,e);
m = _mm_unpackhi_ps(a,e);
_mm_store_ps(&outputBuffer[( (i+4)<<1)+0], l);
_mm_store_ps(&outputBuffer[( (i+4)<<1)+4], m);
// 2
a = _mm_load_ps(×ignal_current[0][i+8]);
b = _mm_load_ps(&delayline[LASTx2][0][i+8+nFrameLength]);
c = _mm_load_ps(&delayline[LAST][0][i+8+(nFrameLength>>1)]);
d = _mm_load_ps(×ignal_next[0][i+8-(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
e = _mm_load_ps(×ignal_current[1][i+8]);
f = _mm_load_ps(&delayline[LASTx2][1][i+8+nFrameLength]);
g = _mm_load_ps(&delayline[LAST][1][i+8+(nFrameLength>>1)]);
h = _mm_load_ps(×ignal_next[1][i+8-(nFrameLength>>1)]);
e = _mm_add_ps(e,f);
g = _mm_add_ps(g,h);
e = _mm_add_ps(e,h);
l = _mm_unpacklo_ps(a,e);
m = _mm_unpackhi_ps(a,e);
_mm_store_ps(&outputBuffer[( (i+8)<<1)+0], l);
_mm_store_ps(&outputBuffer[( (i+8)<<1)+4], m);
// 3
a = _mm_load_ps(×ignal_current[0][i+12]);
b = _mm_load_ps(&delayline[LASTx2][0][i+12+nFrameLength]);
c = _mm_load_ps(&delayline[LAST][0][i+12+(nFrameLength>>1)]);
d = _mm_load_ps(×ignal_next[0][i+12-(nFrameLength>>1)]);
a = _mm_add_ps(a,b);
c = _mm_add_ps(c,d);
a = _mm_add_ps(a,c);
e = _mm_load_ps(×ignal_current[1][i+12]);
f = _mm_load_ps(&delayline[LASTx2][1][i+12+nFrameLength]);
g = _mm_load_ps(&delayline[LAST][1][i+12+(nFrameLength>>1)]);
h = _mm_load_ps(×ignal_next[1][i+12-(nFrameLength>>1)]);
e = _mm_add_ps(e,f);
g = _mm_add_ps(g,h);
e = _mm_add_ps(e,h);
l = _mm_unpacklo_ps(a,e);
m = _mm_unpackhi_ps(a,e);
_mm_store_ps(&outputBuffer[( (i+12)<<1)+0], l);
_mm_store_ps(&outputBuffer[( (i+12)<<1)+4], m);
}
}
else {
for(int ch=0; ch<nChannels; ch++){
//残響成分
for(int i=0; i<(nFrameLength>>1); i++){
outputBuffer[i*nChannels+ch] = timesignal_current[ch][i]
+delayline[LASTx3][ch][i+(nFrameLength*3>>1)]
+delayline[LASTx2][ch][i+nFrameLength]
+delayline[LAST][ch][i+(nFrameLength>>1)];
}
for(int i=(nFrameLength>>1); i<nFrameLength; i++){
outputBuffer[i*nChannels+ch] = timesignal_current[ch][i]
+delayline[LASTx2][ch][i+nFrameLength]
+delayline[LAST][ch][i+(nFrameLength>>1)]
+timesignal_next[ch][i-(nFrameLength>>1)];
}
}
}
#endif
for(int ch=0; ch<nChannels; ch++){
// 残響成分の保存
memcpy(delayline[LASTx3][ch], delayline[LAST][ch], (nFrameLength<<1)*sizeof(float));
memcpy(delayline[LASTx2][ch], timesignal_current[ch], (nFrameLength<<1)*sizeof(float));
memcpy(delayline[LAST][ch], timesignal_next[ch], (nFrameLength<<1)*sizeof(float));
}
// 音源の保存
memcpy(source_buffer, source, nFrameLength*sizeof(float));
}
/*! 入力データのポインタを設定する */
void ConvolutionProcess::in(const float *input) {
source = input;
}
/*! 出力データのポインタを返す */
const float *ConvolutionProcess::out() {
return outputBuffer;
}
/*! インパルス応答のスペクトラムのポインタのポインタを設定する */
void ConvolutionProcess::in_impulse(const fftwf_complex** input, int ch) {
p_spectrum_impulse[ch] = input;
}
/*! 残響成分のスペクトラムのポインタを設定(データの更新は外部クラスで行われる) */
void ConvolutionProcess::in_reverberation(const fftwf_complex * input) {
spectrum_reverberation = input;
}
convolution_process.h
#ifndef __CONVOLUTION_PROCESS_H__
#define __CONVOLUTION_PROCESS_H__
#include "SoundProcess.h"
#include "FFT.h"
#include <xmmintrin.h> // SSE
#include <pmmintrin.h> // SSE3
#define LASTx30
#define LASTx21
#define LAST2
#define CURRENT3
#define NEXT4
/*! 畳み込み計算 */
inline void convolution(fftwf_complex *out, const fftwf_complex *x, const fftwf_complex *y, int n) {
#ifndef _USE_SSE
for(int i=0; i<n; i++){
float a = x[i][0];
float b = x[i][1];
float c = y[i][0];
float d = y[i][1];
out[i][0] = a*c-b*d;
out[i][1] = a*d+b*c;
}
#else
__m128 a;
__m128 b;
__m128 c;
__m128 d;
__m128 e;
__m128 f;
__m128 g;
__m128 h;
__m128 m;
__m128 j;
for(int i=0; i<n; i+=16){
// 0
a = _mm_load_ps(&x[i][0]);
b = _mm_load_ps(&y[i][0]);
c = _mm_load_ps(&x[i+2][0]);
d = _mm_load_ps(&y[i+2][0]);
e = _mm_mul_ps(a,b);
f = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1));
g = _mm_mul_ps(a,f);
h = _mm_mul_ps(c,d);
m = _mm_shuffle_ps(d,d,_MM_SHUFFLE(2,3,0,1));
j = _mm_mul_ps(c,m);
a = _mm_hsub_ps(e,h);
b = _mm_hadd_ps(g,j);
c = _mm_unpacklo_ps(a,b);
d = _mm_unpackhi_ps(a,b);
_mm_store_ps(&out[i][0],c);
_mm_store_ps(&out[i+2][0],d);
// 1
a = _mm_load_ps(&x[i+4][0]);
b = _mm_load_ps(&y[i+4][0]);
c = _mm_load_ps(&x[i+6][0]);
d = _mm_load_ps(&y[i+6][0]);
e = _mm_mul_ps(a,b);
f = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1));
g = _mm_mul_ps(a,f);
h = _mm_mul_ps(c,d);
m = _mm_shuffle_ps(d,d,_MM_SHUFFLE(2,3,0,1));
j = _mm_mul_ps(c,m);
a = _mm_hsub_ps(e,h);
b = _mm_hadd_ps(g,j);
c = _mm_unpacklo_ps(a,b);
d = _mm_unpackhi_ps(a,b);
_mm_store_ps(&out[i+4][0],c);
_mm_store_ps(&out[i+6][0],d);
// 2
a = _mm_load_ps(&x[i+8][0]);
b = _mm_load_ps(&y[i+8][0]);
c = _mm_load_ps(&x[i+10][0]);
d = _mm_load_ps(&y[i+10][0]);
e = _mm_mul_ps(a,b);
f = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1));
g = _mm_mul_ps(a,f);
h = _mm_mul_ps(c,d);
m = _mm_shuffle_ps(d,d,_MM_SHUFFLE(2,3,0,1));
j = _mm_mul_ps(c,m);
a = _mm_hsub_ps(e,h);
b = _mm_hadd_ps(g,j);
c = _mm_unpacklo_ps(a,b);
d = _mm_unpackhi_ps(a,b);
_mm_store_ps(&out[i+8][0],c);
_mm_store_ps(&out[i+10][0],d);
// 3
a = _mm_load_ps(&x[i+12][0]);
b = _mm_load_ps(&y[i+12][0]);
c = _mm_load_ps(&x[i+14][0]);
d = _mm_load_ps(&y[i+14][0]);
e = _mm_mul_ps(a,b);
f = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1));
g = _mm_mul_ps(a,f);
h = _mm_mul_ps(c,d);
m = _mm_shuffle_ps(d,d,_MM_SHUFFLE(2,3,0,1));
j = _mm_mul_ps(c,m);
a = _mm_hsub_ps(e,h);
b = _mm_hadd_ps(g,j);
c = _mm_unpacklo_ps(a,b);
d = _mm_unpackhi_ps(a,b);
_mm_store_ps(&out[i+12][0],c);
_mm_store_ps(&out[i+14][0],d);
}
#endif
}
class ConvolutionProcess : public SoundProcess {
public:
ConvolutionProcess(const int ch = OUTPUT_CHANNELS);
~ConvolutionProcess();
void execute();
void flush();
void in(const float *);
void in_impulse(const fftwf_complex**, int);
const float* out();
//追加
void in_reverberation(const fftwf_complex*);
void in_spectrum_type(int* p){p_spectrum_type = p; }
protected:
void init();
private:
const int nChannels;
const float *source;
float *source_buffer;
float ***delayline;
float *outputBuffer;
fftwf_complex **convolution_current;
fftwf_complex **convolution_next;
const fftwf_complex** p_spectrum_impulse[OUTPUT_CHANNELS];
const fftwf_complex* spectrum_current[OUTPUT_CHANNELS];
const fftwf_complex* spectrum_next[OUTPUT_CHANNELS];
const float* timesignal_current[OUTPUT_CHANNELS];
const float* timesignal_next[OUTPUT_CHANNELS];
float *window_function;
//追加
int *p_spectrum_type;
const fftwf_complex*spectrum_reverberation;
};
#endif // __CONVOLUTION_PROCESS_H__
汚いコード垂れ流し状態なので、丁寧に説明しないとなんだかよくわかりませんね。需要があったら、解説書きます。このソースだけでは動きません。