openFrameworksのshaderでGLSL4.2のatomic counter bufferを使って画素の数をカウントしてみる
openFrameworksでは新しいものは面倒みないと割り切っているのか、#version 120もしくは#version 150までの対応が普通のようです。
ここでは、どうしても超高速に画素の数を数えたくなる用事ができたので、最近(#version 420)で追加されたらしいatomic counter bufferと言うのを、動くかどうか分からないまま恐る恐るテストしてみたいと思います。
#include "ofMain.h" #include "ofApp.h" //======================================================================== int main( ){ //ofGLFWWindowSettings settings; ofGLWindowSettings settings; settings.setGLVersion(4, 5); //version of opengl corresponding to your GLSL version settings.width = 1280; settings.height = 720; ofCreateWindow(settings); // this kicks off the running of my app // can be OF_WINDOW or OF_FULLSCREEN // pass in width and height too: ofRunApp(new ofApp()); }
#pragma once #include "ofMain.h" class ofApp : public ofBaseApp{ public: void setup(); void update(); void draw(); void keyPressed(int key); void keyReleased(int key); void mouseMoved(int x, int y ); void mouseDragged(int x, int y, int button); void mousePressed(int x, int y, int button); void mouseReleased(int x, int y, int button); void mouseEntered(int x, int y); void mouseExited(int x, int y); void windowResized(int w, int h); void dragEvent(ofDragInfo dragInfo); void gotMessage(ofMessage msg); ofShader shader; ofPixels pix; ofTexture tex; ofFbo fbo; GLuint atomicsBuffer; };
#include "ofApp.h" // #include "ofxTimeMeasurements.h" // from here: void create_a_buffer_for_atomic_counters(GLuint& acb) { glGenBuffers(1, &acb); // bind the buffer and define its initial storage capacity glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, acb); glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint) * 3, NULL, GL_DYNAMIC_DRAW); // unbind the buffer glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, 0); } void reset_the_atomic_counter_buffers(GLuint& acb) { GLuint *userCounters; glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, acb); // map the buffer, userCounters will point to the buffers data userCounters = (GLuint*)glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint) * 3, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT ); // set the memory to zeros, resetting the values in the buffer memset(userCounters, 0, sizeof(GLuint) * 3); // unmap the buffer glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER); glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, acb); // added important } void simpler_reset_the_atomic_counter_buffers(GLuint& acb) { glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, acb); GLuint a[3] = { 0,0,0 }; glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint) * 3, a); glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, 0); glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, acb); // added important } void read_back_the_values_from_the_buffer(GLuint& acb, int& redPixels, int& greenPixels, int& bluePixels) { GLuint *userCounters; glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, acb); // again we map the buffer to userCounters, but this time for read-only access userCounters = (GLuint*)glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint) * 3, GL_MAP_READ_BIT ); // copy the values to other variables because... redPixels = (int)userCounters[0]; greenPixels = (int)userCounters[1]; bluePixels = (int)userCounters[2]; // ... as soon as we unmap the buffer // the pointer userCounters becomes invalid. glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER); } void alternative_read_back_the_values_from_the_buffer(GLuint& acb, int& redPixels, int& greenPixels, int& bluePixels) { GLuint userCounters[3]; glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, acb); glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint) * 3, userCounters); glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, 0); redPixels = userCounters[0]; greenPixels = userCounters[1]; bluePixels = userCounters[2]; } // from here: #define STRINGIFY(A) #A //-------------------------------------------------------------- void ofApp::setup() { string fragmentShaderProgram = STRINGIFY( #version 450 compatibility\n #extension GL_ARB_shader_atomic_counters : enable\n #extension GL_EXT_gpu_shader4 : enable\n layout(binding = 0, offset = 0) uniform atomic_uint atRed; layout(binding = 0, offset = 4) uniform atomic_uint atGreen; layout(binding = 0, offset = 8) uniform atomic_uint atBlue; uniform sampler2DRect tex0; in vec4 gl_FragCoord; out vec4 colorOut; void main() { vec2 pos = vec2(gl_FragCoord.x, gl_FragCoord.y); // テクスチャ上の座標を取得する float r = texture2DRect(tex0, pos).r; float g = texture2DRect(tex0, pos).g; float b = texture2DRect(tex0, pos).b; float a = texture2DRect(tex0, pos).a; if (r > 0.5) atomicCounterIncrement(atRed); if (g > 0.5) atomicCounterIncrement(atGreen); if (b > 0.5) atomicCounterIncrement(atBlue); colorOut = vec4(r, g, b, a); } ); shader.setupShaderFromSource(GL_FRAGMENT_SHADER, fragmentShaderProgram); shader.linkProgram(); // 画像の読み込み ofLoadImage(pix, "Koala.jpg"); // 汎用性のためにofPixelsで読み込み tex.allocate(pix, true); // trueが要るのかどうか不明 tex.setTextureWrap(GL_CLAMP_TO_BORDER, GL_CLAMP_TO_BORDER); fbo.allocate((int)tex.getWidth(), (int)tex.getHeight()); // メインウィンドウの大きさ調整 ofSetWindowShape((int)tex.getWidth(), (int)tex.getHeight()); // アトミックカウンタバッファの初期化 create_a_buffer_for_atomic_counters(atomicsBuffer); //reset_the_atomic_counter_buffers(atomicsBuffer); simpler_reset_the_atomic_counter_buffers(atomicsBuffer); } //-------------------------------------------------------------- void ofApp::update() { } //-------------------------------------------------------------- void ofApp::draw() { int r, g, b; ofBackground(0); //TS_START("measurement1"); fbo.begin(); shader.begin(); shader.setUniformTexture("tex0", tex, 0); // テクスチャを渡す ofDrawRectangle(-tex.getWidth() / 2, -tex.getHeight() / 2, tex.getWidth(), tex.getHeight()); // 原点を調整してfboにレンダリング shader.end(); fbo.end(); // アトミックカウンタバッファから結果の受け取りとリセット //read_back_the_values_from_the_buffer(atomicsBuffer, r, g, b); alternative_read_back_the_values_from_the_buffer(atomicsBuffer, r, g, b); simpler_reset_the_atomic_counter_buffers(atomicsBuffer); //TS_STOP("measurement1"); fbo.draw(0, 0); cout << "rgb count=" << r << ", " << g << ", " << b << std::endl; } //-------------------------------------------------------------- void ofApp::keyPressed(int key) { } //-------------------------------------------------------------- void ofApp::keyReleased(int key) { } //-------------------------------------------------------------- void ofApp::mouseMoved(int x, int y) { } //-------------------------------------------------------------- void ofApp::mouseDragged(int x, int y, int button) { } //-------------------------------------------------------------- void ofApp::mousePressed(int x, int y, int button) { } //-------------------------------------------------------------- void ofApp::mouseReleased(int x, int y, int button) { } //-------------------------------------------------------------- void ofApp::mouseEntered(int x, int y) { } //-------------------------------------------------------------- void ofApp::mouseExited(int x, int y) { } //-------------------------------------------------------------- void ofApp::windowResized(int w, int h) { } //-------------------------------------------------------------- void ofApp::gotMessage(ofMessage msg) { } //-------------------------------------------------------------- void ofApp::dragEvent(ofDragInfo dragInfo) { }
atomic counterは、shaderの中でatomic_uint型という特殊な変数を使いますが、これを受け渡す方法はofShaderには(まだ)用意されていません。
glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, acb);
rgb count=376021, 307367, 232583
rgb count=1, 0, 0
unsigned char *image = new unsigned char[1024 * 768 * 4]; TS_START("measurement2"); #ifdef _OPENMP #pragma omp parallel for #endif for (int i = 0; i < 1024; i++) { for (int j = 0; j < 768; j++) { image[(j * 1024 + i) * 4 + 0] = 255; image[(j * 1024 + i) * 4 + 1] = 255; image[(j * 1024 + i) * 4 + 2] = 255; image[(j * 1024 + i) * 4 + 3] = 255; } } TS_STOP("measurement2"); delete[] image;
CPUは、DUAL XEON E5-2620の24論理スレッド構成です。