00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <cstdio>
00023 #include <algorithm>
00024
00025 #include "luxrays/kernels/kernels.h"
00026 #include "luxrays/core/pixeldevice.h"
00027 #include "luxrays/core/context.h"
00028
00029 using namespace luxrays;
00030
00031
00032
00033
00034
00035 #if !defined(LUXRAYS_DISABLE_OPENCL)
00036
00037 OpenCLSampleBuffer::OpenCLSampleBuffer(OpenCLPixelDevice *dev, const size_t bufferSize) : SampleBuffer(bufferSize) {
00038 device = dev;
00039 oclBuffer = new cl::Buffer(device->deviceDesc->GetOCLContext(),
00040 CL_MEM_READ_ONLY,
00041 sizeof(SampleBufferElem) * OpenCLPixelDevice::SampleBufferSize);
00042 device->deviceDesc->usedMemory += oclBuffer->getInfo<CL_MEM_SIZE>();
00043 }
00044
00045 OpenCLSampleBuffer::~OpenCLSampleBuffer() {
00046 device->deviceDesc->usedMemory -= oclBuffer->getInfo<CL_MEM_SIZE>();
00047 delete oclBuffer;
00048 }
00049
00050 void OpenCLSampleBuffer::Write() const {
00051 assert (GetSampleCount() <= OpenCLPixelDevice::SampleBufferSize);
00052
00053
00054 device->oclQueue->enqueueWriteBuffer(
00055 *oclBuffer,
00056 CL_FALSE,
00057 0,
00058 sizeof(SampleBufferElem) * GetSampleCount(),
00059 GetSampleBuffer());
00060 }
00061
00062 void OpenCLSampleBuffer::Wait() const {
00063 if (oclEvent())
00064 oclEvent.wait();
00065 }
00066
00067 void OpenCLSampleBuffer::CollectStats() const {
00068 if (oclEvent()) {
00069 device->statsTotalSampleTime += (oclEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>() -
00070 oclEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>()) * 1e-9;
00071 device->statsTotalSamplesCount += GetSampleCount();
00072 }
00073 }
00074
00075
00076
00077 size_t OpenCLPixelDevice::SampleBufferSize = 65536;
00078
00079 OpenCLPixelDevice::OpenCLPixelDevice(const Context *context, OpenCLDeviceDescription *desc,
00080 const unsigned int index) :
00081 PixelDevice(context, DEVICE_TYPE_OPENCL, index) {
00082 deviceDesc = desc;
00083 deviceName = (desc->GetName() +"Pixel").c_str();
00084
00085 sampleFrameBuffer = NULL;
00086 frameBuffer = NULL;
00087 sampleFrameBuff = NULL;
00088 frameBuff = NULL;
00089
00090
00091 const float alpha = 2.f;
00092 const float expX = expf(-alpha * 2.f * 2.f);
00093 const float expY = expf(-alpha * 2.f * 2.f);
00094
00095 float *ftp2x2 = Gaussian2x2_filterTable;
00096 for (u_int y = 0; y < FilterTableSize; ++y) {
00097 const float fy = (static_cast<float>(y) + .5f) * 2.f / FilterTableSize;
00098 for (u_int x = 0; x < FilterTableSize; ++x) {
00099 const float fx = (static_cast<float>(x) + .5f) * 2.f / FilterTableSize;
00100 *ftp2x2++ = Max<float>(0.f, expf(-alpha * fx * fx) - expX) *
00101 Max<float>(0.f, expf(-alpha * fy * fy) - expY);
00102 }
00103 }
00104
00105
00106 cl::Context &oclContext = deviceDesc->GetOCLContext();
00107 cl::Device &oclDevice = deviceDesc->GetOCLDevice();
00108
00109
00110 oclQueue = new cl::CommandQueue(oclContext, oclDevice, CL_QUEUE_PROFILING_ENABLE);
00111
00112
00113
00114
00115
00116 CompileKernel(oclContext, oclDevice, KernelSource_Pixel_ClearFB, "PixelClearFB", &clearFBKernel);
00117 CompileKernel(oclContext, oclDevice, KernelSource_Pixel_ClearSampleFB, "PixelClearSampleFB", &clearSampleFBKernel);
00118 CompileKernel(oclContext, oclDevice, KernelSource_Pixel_AddSampleBuffer, "PixelAddSampleBuffer", &addSampleBufferKernel);
00119 CompileKernel(oclContext, oclDevice, KernelSource_Pixel_AddSampleBufferPreview, "PixelAddSampleBufferPreview", &addSampleBufferPreviewKernel);
00120 CompileKernel(oclContext, oclDevice, KernelSource_Pixel_AddSampleBufferGaussian2x2, "PixelAddSampleBufferGaussian2x2", &addSampleBufferGaussian2x2Kernel);
00121 CompileKernel(oclContext, oclDevice, KernelSource_Pixel_UpdateFrameBuffer, "PixelUpdateFrameBuffer", &updateFrameBufferKernel);
00122
00123
00124
00125
00126
00127 sampleBuffers.resize(0);
00128 freeSampleBuffers.resize(0);
00129
00130 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "]" << " GammaTable buffer size: " << (sizeof(float) * GammaTableSize / 1024) << "Kbytes");
00131 gammaTableBuff = new cl::Buffer(oclContext,
00132 CL_MEM_READ_ONLY,
00133 sizeof(float) * GammaTableSize);
00134 deviceDesc->usedMemory += gammaTableBuff->getInfo<CL_MEM_SIZE>();
00135 SetGamma();
00136
00137 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "]" << " FilterTable buffer size: " << (sizeof(float) * FilterTableSize * FilterTableSize / 1024) << "Kbytes");
00138 filterTableBuff = new cl::Buffer(oclContext,
00139 CL_MEM_READ_ONLY,
00140 sizeof(float) * FilterTableSize * FilterTableSize);
00141 deviceDesc->usedMemory += filterTableBuff->getInfo<CL_MEM_SIZE>();
00142 oclQueue->enqueueWriteBuffer(
00143 *filterTableBuff,
00144 CL_FALSE,
00145 0,
00146 sizeof(float) * FilterTableSize * FilterTableSize,
00147 Gaussian2x2_filterTable);
00148 }
00149
00150 OpenCLPixelDevice::~OpenCLPixelDevice() {
00151 if (started)
00152 PixelDevice::Stop();
00153
00154 delete sampleFrameBuffer;
00155 delete frameBuffer;
00156
00157 for (size_t i = 0; i < sampleBuffers.size(); ++i)
00158 delete sampleBuffers[i];
00159
00160 if (sampleFrameBuff) {
00161 deviceDesc->usedMemory -= sampleFrameBuff->getInfo<CL_MEM_SIZE>();
00162 delete sampleFrameBuff;
00163 }
00164
00165 if (frameBuff) {
00166 deviceDesc->usedMemory -= frameBuff->getInfo<CL_MEM_SIZE>();
00167 delete frameBuff;
00168 }
00169
00170 deviceDesc->usedMemory -= filterTableBuff->getInfo<CL_MEM_SIZE>();
00171 delete filterTableBuff;
00172
00173 deviceDesc->usedMemory -= gammaTableBuff->getInfo<CL_MEM_SIZE>();
00174 delete gammaTableBuff;
00175
00176 delete clearFBKernel;
00177 delete clearSampleFBKernel;
00178 delete addSampleBufferKernel;
00179 delete addSampleBufferPreviewKernel;
00180 delete addSampleBufferGaussian2x2Kernel;
00181 delete updateFrameBufferKernel;
00182 delete oclQueue;
00183 }
00184
00185 void OpenCLPixelDevice::CompileKernel(cl::Context &ctx, cl::Device &device, const std::string &src,
00186 const char *kernelName, cl::Kernel **kernel) {
00187
00188 cl::Program::Sources source(1, std::make_pair(src.c_str(), src.length()));
00189 cl::Program program = cl::Program(ctx, source);
00190 try {
00191 VECTOR_CLASS<cl::Device> buildDevice;
00192 buildDevice.push_back(device);
00193 program.build(buildDevice, "-I.");
00194 } catch (cl::Error err) {
00195 cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
00196 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] " << kernelName << " compilation error:\n" << strError.c_str());
00197
00198 throw err;
00199 }
00200
00201 *kernel = new cl::Kernel(program, kernelName);
00202 }
00203
00204 void OpenCLPixelDevice::Init(const unsigned int w, const unsigned int h) {
00205 PixelDevice::Init(w, h);
00206
00207
00208
00209
00210
00211 delete sampleFrameBuffer;
00212 delete frameBuffer;
00213
00214 if (sampleFrameBuff) {
00215 deviceDesc->usedMemory -= sampleFrameBuff->getInfo<CL_MEM_SIZE>();
00216 delete sampleFrameBuff;
00217 }
00218
00219 if (frameBuff) {
00220 deviceDesc->usedMemory -= frameBuff->getInfo<CL_MEM_SIZE>();
00221 delete frameBuff;
00222 }
00223
00224
00225
00226
00227
00228 sampleFrameBuffer = new SampleFrameBuffer(width, height);
00229 frameBuffer = new FrameBuffer(width, height);
00230
00231 cl::Context &oclContext = deviceDesc->GetOCLContext();
00232
00233
00234 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "]" << " SampleFrameBuffer buffer size: " << (sizeof(SamplePixel) * width * height / 1024) << "Kbytes");
00235 sampleFrameBuff = new cl::Buffer(oclContext,
00236 CL_MEM_READ_WRITE,
00237 sizeof(SamplePixel) * width * height);
00238 deviceDesc->usedMemory += sampleFrameBuff->getInfo<CL_MEM_SIZE>();
00239
00240 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "]" << " FrameBuffer buffer size: " << (sizeof(Pixel) * width * height / 1024) << "Kbytes");
00241 frameBuff = new cl::Buffer(oclContext,
00242 CL_MEM_READ_WRITE,
00243 sizeof(Pixel) * width * height);
00244 deviceDesc->usedMemory += frameBuff->getInfo<CL_MEM_SIZE>();
00245
00246 ClearSampleFrameBuffer();
00247 ClearFrameBuffer();
00248 }
00249
00250 void OpenCLPixelDevice::ClearSampleFrameBuffer() {
00251 clearSampleFBKernel->setArg(0, width);
00252 clearSampleFBKernel->setArg(1, height);
00253 clearSampleFBKernel->setArg(2, *sampleFrameBuff);
00254 oclQueue->enqueueNDRangeKernel(*clearSampleFBKernel, cl::NullRange,
00255 cl::NDRange(RoundUp<size_t>(width, 8), RoundUp<size_t>(height, 8)), cl::NDRange(8, 8));
00256 }
00257
00258 void OpenCLPixelDevice::ClearFrameBuffer() {
00259 clearFBKernel->setArg(0, width);
00260 clearFBKernel->setArg(1, height);
00261 clearFBKernel->setArg(2, *frameBuff);
00262 oclQueue->enqueueNDRangeKernel(*clearFBKernel, cl::NullRange,
00263 cl::NDRange(RoundUp<size_t>(width, 8), RoundUp<size_t>(height, 8)), cl::NDRange(8, 8));
00264 }
00265
00266 void OpenCLPixelDevice::SetGamma(const float gamma) {
00267 float x = 0.f;
00268 const float dx = 1.f / GammaTableSize;
00269 for (unsigned int i = 0; i < GammaTableSize; ++i, x += dx)
00270 gammaTable[i] = powf(Clamp(x, 0.f, 1.f), 1.f / gamma);
00271
00272 oclQueue->enqueueWriteBuffer(
00273 *gammaTableBuff,
00274 CL_FALSE,
00275 0,
00276 sizeof(float) * GammaTableSize,
00277 gammaTable);
00278 }
00279
00280 void OpenCLPixelDevice::Start() {
00281 boost::mutex::scoped_lock lock(splatMutex);
00282
00283 PixelDevice::Start();
00284 }
00285
00286 void OpenCLPixelDevice::Interrupt() {
00287 boost::mutex::scoped_lock lock(splatMutex);
00288 assert (started);
00289 }
00290
00291 void OpenCLPixelDevice::Stop() {
00292 boost::mutex::scoped_lock lock(splatMutex);
00293
00294 oclQueue->finish();
00295 PixelDevice::Stop();
00296 }
00297
00298 SampleBuffer *OpenCLPixelDevice::GetFreeSampleBuffer() {
00299 boost::mutex::scoped_lock lock(splatMutex);
00300
00301
00302 if (freeSampleBuffers.size() > 3) {
00303 OpenCLSampleBuffer *osb = freeSampleBuffers.front();
00304 freeSampleBuffers.pop_front();
00305
00306 osb->Wait();
00307 osb->CollectStats();
00308 osb->Reset();
00309 return osb;
00310 } else {
00311
00312 for (int i = 0; i < 5; ++i) {
00313 OpenCLSampleBuffer *osb = new OpenCLSampleBuffer(this, SampleBufferSize);
00314 sampleBuffers.push_back(osb);
00315 freeSampleBuffers.push_back(osb);
00316 }
00317
00318 OpenCLSampleBuffer *osb = new OpenCLSampleBuffer(this, SampleBufferSize);
00319 sampleBuffers.push_back(osb);
00320
00321 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "]" << " SampleBuffer buffer size: " << (sizeof(SampleBufferElem) * SampleBufferSize / 1024) << "Kbytes (*" << sampleBuffers.size() <<")");
00322
00323 return osb;
00324 }
00325 }
00326
00327 void OpenCLPixelDevice::FreeSampleBuffer(SampleBuffer *sampleBuffer) {
00328 boost::mutex::scoped_lock lock(splatMutex);
00329
00330 freeSampleBuffers.push_back((OpenCLSampleBuffer *)sampleBuffer);
00331 }
00332
00333 void OpenCLPixelDevice::AddSampleBuffer(const FilterType type, SampleBuffer *sampleBuffer) {
00334 boost::mutex::scoped_lock lock(splatMutex);
00335 assert (started);
00336
00337 OpenCLSampleBuffer *osb = (OpenCLSampleBuffer *)sampleBuffer;
00338
00339
00340 osb->Write();
00341
00342
00343 *(osb->GetOCLEvent()) = cl::Event();
00344 switch (type) {
00345 case FILTER_GAUSSIAN: {
00346 addSampleBufferGaussian2x2Kernel->setArg(0, width);
00347 addSampleBufferGaussian2x2Kernel->setArg(1, height);
00348 addSampleBufferGaussian2x2Kernel->setArg(2, *sampleFrameBuff);
00349 addSampleBufferGaussian2x2Kernel->setArg(3, (unsigned int)osb->GetSampleCount());
00350 addSampleBufferGaussian2x2Kernel->setArg(4, *(osb->GetOCLBuffer()));
00351 addSampleBufferGaussian2x2Kernel->setArg(5, *filterTableBuff);
00352
00353 oclQueue->enqueueNDRangeKernel(*addSampleBufferGaussian2x2Kernel, cl::NullRange,
00354 cl::NDRange(sampleBuffer->GetSize()), cl::NDRange(64),
00355 NULL, osb->GetOCLEvent());
00356 break;
00357 }
00358 case FILTER_PREVIEW: {
00359 addSampleBufferPreviewKernel->setArg(0, width);
00360 addSampleBufferPreviewKernel->setArg(1, height);
00361 addSampleBufferPreviewKernel->setArg(2, *sampleFrameBuff);
00362 addSampleBufferPreviewKernel->setArg(3, (unsigned int)osb->GetSampleCount());
00363 addSampleBufferPreviewKernel->setArg(4, *(osb->GetOCLBuffer()));
00364
00365 oclQueue->enqueueNDRangeKernel(*addSampleBufferPreviewKernel, cl::NullRange,
00366 cl::NDRange(sampleBuffer->GetSize()), cl::NDRange(64),
00367 NULL, osb->GetOCLEvent());
00368 break;
00369 }
00370 case FILTER_NONE: {
00371 addSampleBufferKernel->setArg(0, width);
00372 addSampleBufferKernel->setArg(1, height);
00373 addSampleBufferKernel->setArg(2, *sampleFrameBuff);
00374 addSampleBufferKernel->setArg(3, (unsigned int)osb->GetSampleCount());
00375 addSampleBufferKernel->setArg(4, *(osb->GetOCLBuffer()));
00376
00377 oclQueue->enqueueNDRangeKernel(*addSampleBufferKernel, cl::NullRange,
00378 cl::NDRange(osb->GetSize()), cl::NDRange(64),
00379 NULL, osb->GetOCLEvent());
00380 break;
00381 }
00382 default:
00383 assert (false);
00384 break;
00385 }
00386
00387 freeSampleBuffers.push_back(osb);
00388 }
00389
00390 void OpenCLPixelDevice::UpdateFrameBuffer(const ToneMapParams ¶ms) {
00391 cl::Event event;
00392
00393 {
00394 boost::mutex::scoped_lock lock(splatMutex);
00395
00396
00397 updateFrameBufferKernel->setArg(0, width);
00398 updateFrameBufferKernel->setArg(1, height);
00399 updateFrameBufferKernel->setArg(2, *sampleFrameBuff);
00400 updateFrameBufferKernel->setArg(3, *frameBuff);
00401 updateFrameBufferKernel->setArg(4, *gammaTableBuff);
00402
00403 oclQueue->enqueueNDRangeKernel(*updateFrameBufferKernel, cl::NullRange,
00404 cl::NDRange(RoundUp<size_t>(width, 8), RoundUp<size_t>(height, 8)), cl::NDRange(8, 8));
00405
00406 oclQueue->enqueueReadBuffer(
00407 *frameBuff,
00408 CL_FALSE,
00409 0,
00410 sizeof(Pixel) * width * height,
00411 frameBuffer->GetPixels(),
00412 NULL,
00413 &event);
00414 }
00415
00416 event.wait();
00417 }
00418
00419 void OpenCLPixelDevice::Merge(const SampleFrameBuffer *sfb) {
00420 throw std::runtime_error("Internal error: OpenCLPixelDevice::Merge() not yet implemented");
00421 }
00422
00423 const SampleFrameBuffer *OpenCLPixelDevice::GetSampleFrameBuffer() const {
00424 return sampleFrameBuffer;
00425 }
00426
00427 #endif