00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "luxrays/core/intersectiondevice.h"
00023 #include "luxrays/core/context.h"
00024 #include "luxrays/kernels/kernels.h"
00025 #include "luxrays/accelerators/bvhaccel.h"
00026 #include "luxrays/accelerators/qbvhaccel.h"
00027 #include "luxrays/accelerators/mqbvhaccel.h"
00028
00029 using namespace luxrays;
00030
00031
00032
00033
00034
00035 #if !defined(LUXRAYS_DISABLE_OPENCL)
00036
00037 size_t OpenCLIntersectionDevice::RayBufferSize = OPENCL_RAYBUFFER_SIZE;
00038
00039 OpenCLIntersectionDevice::OpenCLIntersectionDevice(
00040 const Context *context,
00041 OpenCLDeviceDescription *desc,
00042 const unsigned int index,
00043 const unsigned int forceWGSize) :
00044 HardwareIntersectionDevice(context, DEVICE_TYPE_OPENCL, index) {
00045 forceWorkGroupSize = forceWGSize;
00046 deviceDesc = desc;
00047 deviceName = (desc->GetName() +"Intersect").c_str();
00048 reportedPermissionError = false;
00049 qbvhUseImage = false;
00050 qbvhDisableImageStorage = false;
00051 intersectionThread = NULL;
00052
00053 bvhKernel = NULL;
00054 qbvhKernel = NULL;
00055 qbvhImageKernel = NULL;
00056 mqbvhKernel = NULL;
00057 oclQueue = NULL;
00058
00059 bvhBuff = NULL;
00060 vertsBuff = NULL;
00061 trisBuff = NULL;
00062 bvhBuff = NULL;
00063
00064 qbvhBuff = NULL;
00065 qbvhTrisBuff = NULL;
00066
00067 qbvhImageBuff = NULL;
00068 qbvhTrisImageBuff = NULL;
00069
00070 mqbvhBuff = NULL;
00071 mqbvhMemMapBuff = NULL;
00072 mqbvhLeafBuff = NULL;
00073 mqbvhLeafQuadTrisBuff = NULL;
00074 mqbvhInvTransBuff = NULL;
00075 mqbvhTrisOffsetBuff = NULL;
00076
00077 externalRayBufferQueue = NULL;
00078
00079
00080 cl::Context &oclContext = deviceDesc->GetOCLContext();
00081 cl::Device &oclDevice = deviceDesc->GetOCLDevice();
00082 oclQueue = new cl::CommandQueue(oclContext, oclDevice);
00083 }
00084
00085 OpenCLIntersectionDevice::~OpenCLIntersectionDevice() {
00086 if (started)
00087 Stop();
00088
00089 FreeDataSetBuffers();
00090
00091 delete bvhKernel;
00092 delete qbvhKernel;
00093 delete qbvhImageKernel;
00094 delete mqbvhKernel;
00095 delete oclQueue;
00096 }
00097
00098 void OpenCLIntersectionDevice::SetExternalRayBufferQueue(RayBufferQueue *queue) {
00099 assert (!started);
00100
00101 externalRayBufferQueue = queue;
00102 }
00103
00104 RayBuffer *OpenCLIntersectionDevice::NewRayBuffer() {
00105 return new RayBuffer(RayBufferSize);
00106 }
00107
00108 void OpenCLIntersectionDevice::PushRayBuffer(RayBuffer *rayBuffer) {
00109 assert (started);
00110 assert (!externalRayBufferQueue);
00111
00112 rayBufferQueue.PushToDo(rayBuffer, 0);
00113 }
00114
00115 RayBuffer *OpenCLIntersectionDevice::PopRayBuffer() {
00116 assert (started);
00117 assert (!externalRayBufferQueue);
00118
00119 return rayBufferQueue.PopDone(0);
00120 }
00121
00122 void OpenCLIntersectionDevice::FreeDataSetBuffers() {
00123
00124 if (dataSet) {
00125 deviceDesc->usedMemory -= raysBuff->getInfo<CL_MEM_SIZE>();
00126 delete raysBuff;
00127 deviceDesc->usedMemory -= hitsBuff->getInfo<CL_MEM_SIZE>();
00128 delete hitsBuff;
00129
00130 if (bvhBuff) {
00131 deviceDesc->usedMemory -= vertsBuff->getInfo<CL_MEM_SIZE>();
00132 delete vertsBuff;
00133 deviceDesc->usedMemory -= trisBuff->getInfo<CL_MEM_SIZE>();
00134 delete trisBuff;
00135 deviceDesc->usedMemory -= bvhBuff->getInfo<CL_MEM_SIZE>();
00136 delete bvhBuff;
00137 }
00138
00139 if (qbvhBuff) {
00140 if (qbvhUseImage) {
00141 deviceDesc->usedMemory -= qbvhImageBuff->getInfo<CL_MEM_SIZE>();
00142 delete qbvhImageBuff;
00143 deviceDesc->usedMemory -= qbvhTrisImageBuff->getInfo<CL_MEM_SIZE>();
00144 delete qbvhTrisImageBuff;
00145 } else {
00146 deviceDesc->usedMemory -= qbvhBuff->getInfo<CL_MEM_SIZE>();
00147 delete qbvhBuff;
00148 deviceDesc->usedMemory -= qbvhTrisBuff->getInfo<CL_MEM_SIZE>();
00149 delete qbvhTrisBuff;
00150 }
00151 }
00152
00153 if (mqbvhBuff) {
00154 deviceDesc->usedMemory -= mqbvhBuff->getInfo<CL_MEM_SIZE>();
00155 delete mqbvhBuff;
00156 deviceDesc->usedMemory -= mqbvhMemMapBuff->getInfo<CL_MEM_SIZE>();
00157 delete mqbvhMemMapBuff;
00158 deviceDesc->usedMemory -= mqbvhLeafBuff->getInfo<CL_MEM_SIZE>();
00159 delete mqbvhLeafBuff;
00160 deviceDesc->usedMemory -= mqbvhLeafQuadTrisBuff->getInfo<CL_MEM_SIZE>();
00161 delete mqbvhLeafQuadTrisBuff;
00162 deviceDesc->usedMemory -= mqbvhInvTransBuff->getInfo<CL_MEM_SIZE>();
00163 delete mqbvhInvTransBuff;
00164 deviceDesc->usedMemory -= mqbvhTrisOffsetBuff->getInfo<CL_MEM_SIZE>();
00165 delete mqbvhTrisOffsetBuff;
00166 }
00167 }
00168 }
00169
00170 void OpenCLIntersectionDevice::SetDataSet(const DataSet *newDataSet) {
00171 FreeDataSetBuffers();
00172
00173 IntersectionDevice::SetDataSet(newDataSet);
00174
00175 cl::Context &oclContext = deviceDesc->GetOCLContext();
00176
00177
00178 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Ray buffer size: " << (sizeof(Ray) * RayBufferSize / 1024) << "Kbytes");
00179 raysBuff = new cl::Buffer(oclContext,
00180 CL_MEM_READ_ONLY,
00181 sizeof(Ray) * RayBufferSize);
00182 deviceDesc->usedMemory += raysBuff->getInfo<CL_MEM_SIZE>();
00183
00184 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Ray hits buffer size: " << (sizeof(RayHit) * RayBufferSize / 1024) << "Kbytes");
00185 hitsBuff = new cl::Buffer(oclContext,
00186 CL_MEM_WRITE_ONLY,
00187 sizeof(RayHit) * RayBufferSize);
00188 deviceDesc->usedMemory += hitsBuff->getInfo<CL_MEM_SIZE>();
00189
00190 cl::Device &oclDevice = deviceDesc->GetOCLDevice();
00191 switch (dataSet->GetAcceleratorType()) {
00192 case ACCEL_BVH: {
00193
00194
00195
00196
00197 {
00198
00199 cl::Program::Sources source(1, std::make_pair(KernelSource_BVH.c_str(), KernelSource_BVH.length()));
00200 cl::Program program = cl::Program(oclContext, source);
00201 try {
00202 VECTOR_CLASS<cl::Device> buildDevice;
00203 buildDevice.push_back(oclDevice);
00204 program.build(buildDevice, "-I.");
00205 } catch (cl::Error err) {
00206 cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(oclDevice);
00207 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] BVH compilation error:\n" << strError.c_str());
00208
00209 throw err;
00210 }
00211
00212 bvhKernel = new cl::Kernel(program, "Intersect");
00213 bvhKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &bvhWorkGroupSize);
00214 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] BVH kernel work group size: " << bvhWorkGroupSize);
00215 cl_ulong memSize;
00216 bvhKernel->getWorkGroupInfo<cl_ulong>(oclDevice, CL_KERNEL_LOCAL_MEM_SIZE, &memSize);
00217 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] BVH kernel memory footprint: " << memSize);
00218
00219 bvhKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &bvhWorkGroupSize);
00220 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Suggested work group size: " << bvhWorkGroupSize);
00221
00222 if (forceWorkGroupSize > 0) {
00223 bvhWorkGroupSize = forceWorkGroupSize;
00224 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Forced work group size: " << bvhWorkGroupSize);
00225 }
00226 }
00227
00228 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Vertices buffer size: " << (sizeof(Point) * dataSet->GetTotalVertexCount() / 1024) << "Kbytes");
00229 const BVHAccel *bvh = (BVHAccel *)dataSet->accel;
00230 vertsBuff = new cl::Buffer(oclContext,
00231 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00232 sizeof(Point) * dataSet->GetTotalVertexCount(),
00233 bvh->preprocessedMesh->GetVertices());
00234 deviceDesc->usedMemory += vertsBuff->getInfo<CL_MEM_SIZE>();
00235
00236 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Triangle indices buffer size: " << (sizeof(Triangle) * dataSet->GetTotalTriangleCount() / 1024) << "Kbytes");
00237 trisBuff = new cl::Buffer(oclContext,
00238 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00239 sizeof(Triangle) * dataSet->GetTotalTriangleCount(),
00240 bvh->preprocessedMesh->GetTriangles());
00241 deviceDesc->usedMemory += trisBuff->getInfo<CL_MEM_SIZE>();
00242
00243 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] BVH buffer size: " << (sizeof(BVHAccelArrayNode) * bvh->nNodes / 1024) << "Kbytes");
00244 bvhBuff = new cl::Buffer(oclContext,
00245 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00246 sizeof(BVHAccelArrayNode) * bvh->nNodes,
00247 bvh->bvhTree);
00248 deviceDesc->usedMemory += bvhBuff->getInfo<CL_MEM_SIZE>();
00249
00250
00251 bvhKernel->setArg(2, *vertsBuff);
00252 bvhKernel->setArg(3, *trisBuff);
00253 bvhKernel->setArg(4, dataSet->GetTotalTriangleCount());
00254 bvhKernel->setArg(5, bvh->nNodes);
00255 bvhKernel->setArg(6, *bvhBuff);
00256 break;
00257 }
00258 case ACCEL_QBVH: {
00259
00260
00261
00262
00263 {
00264
00265 {
00266 cl::Program::Sources source(1, std::make_pair(KernelSource_QBVH.c_str(), KernelSource_QBVH.length()));
00267 cl::Program program = cl::Program(oclContext, source);
00268 try {
00269 VECTOR_CLASS<cl::Device> buildDevice;
00270 buildDevice.push_back(oclDevice);
00271 program.build(buildDevice, "-I.");
00272 } catch (cl::Error err) {
00273 cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(oclDevice);
00274 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] QBVH compilation error:\n" << strError.c_str());
00275
00276 throw err;
00277 }
00278
00279 qbvhKernel = new cl::Kernel(program, "Intersect");
00280 qbvhKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &qbvhWorkGroupSize);
00281 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] QBVH kernel work group size: " << qbvhWorkGroupSize);
00282 cl_ulong memSize;
00283 qbvhKernel->getWorkGroupInfo<cl_ulong>(oclDevice, CL_KERNEL_LOCAL_MEM_SIZE, &memSize);
00284 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] QBVH kernel memory footprint: " << memSize);
00285
00286 qbvhKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &qbvhWorkGroupSize);
00287 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Suggested work group size: " << qbvhWorkGroupSize);
00288
00289 if (forceWorkGroupSize > 0) {
00290 qbvhWorkGroupSize = forceWorkGroupSize;
00291 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Forced work group size: " << qbvhWorkGroupSize);
00292 } else if (qbvhWorkGroupSize > 256) {
00293
00294 qbvhWorkGroupSize = 256;
00295 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Cap work group size to: " << qbvhWorkGroupSize);
00296 }
00297 }
00298
00299
00300 if (deviceDesc->HasImageSupport()) {
00301 cl::Program::Sources source(1, std::make_pair(KernelSource_QBVH.c_str(), KernelSource_QBVH.length()));
00302 cl::Program program = cl::Program(oclContext, source);
00303 try {
00304 VECTOR_CLASS<cl::Device> buildDevice;
00305 buildDevice.push_back(oclDevice);
00306 program.build(buildDevice, "-I. -DUSE_IMAGE_STORAGE");
00307 } catch (cl::Error err) {
00308 cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(oclDevice);
00309 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] QBVH Image Storage compilation error:\n" << strError.c_str());
00310
00311 throw err;
00312 }
00313
00314 qbvhImageKernel = new cl::Kernel(program, "Intersect");
00315 qbvhImageKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &qbvhImageWorkGroupSize);
00316 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] QBVH Image Storage kernel work group size: " << qbvhImageWorkGroupSize);
00317 cl_ulong memSize;
00318 qbvhImageKernel->getWorkGroupInfo<cl_ulong>(oclDevice, CL_KERNEL_LOCAL_MEM_SIZE, &memSize);
00319 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] QBVH Image Storage kernel memory footprint: " << memSize);
00320
00321 qbvhImageKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &qbvhImageWorkGroupSize);
00322 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Suggested work group size: " << qbvhImageWorkGroupSize);
00323
00324 if (forceWorkGroupSize > 0) {
00325 qbvhImageWorkGroupSize = forceWorkGroupSize;
00326 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Forced work group size: " << qbvhImageWorkGroupSize);
00327 } else if (qbvhWorkGroupSize > 256) {
00328
00329 qbvhWorkGroupSize = 256;
00330 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Cap work group size to: " << qbvhWorkGroupSize);
00331 }
00332 }
00333 }
00334
00335 const QBVHAccel *qbvh = (QBVHAccel *)dataSet->accel;
00336
00337
00338
00339
00340 const size_t nodeImagePixelRequired = qbvh->nNodes * 7;
00341 const size_t nodeImageWidth = Min<size_t>(RoundUp<size_t>(sqrtf(nodeImagePixelRequired), 7), 0x7fff);
00342 const size_t nodeImageHeight = nodeImagePixelRequired / nodeImageWidth + (((nodeImagePixelRequired % nodeImageWidth) == 0) ? 0 : 1);
00343
00344
00345 const size_t leafPixelRequired = qbvh->nQuads * 10;
00346 const size_t leafImageWidth = Min<size_t>(RoundUp<size_t>(sqrtf(leafPixelRequired), 10), 32760);
00347 const size_t leafImageHeight = leafPixelRequired / leafImageWidth + (((leafPixelRequired % leafImageWidth) == 0) ? 0 : 1);
00348
00349
00350 if (qbvhDisableImageStorage) {
00351 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Disable forced for QBVH scene storage inside image");
00352 qbvhUseImage = false;
00353 } else {
00354 if (deviceDesc->HasImageSupport()) {
00355 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] OpenCL image support is available");
00356
00357
00358 const size_t maxWidth = deviceDesc->GetImage2DMaxWidth();
00359 const size_t maxHeight = deviceDesc->GetImage2DMaxHeight();
00360 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] OpenCL max. image buffer size: " << maxWidth << "x" << maxHeight);
00361
00362 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] QBVH node image buffer size: " << nodeImageWidth << "x" << nodeImageHeight);
00363 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] QBVH triangle image buffer size: " << leafImageWidth << "x" << leafImageHeight);
00364
00365 if ((nodeImageWidth > maxWidth) ||
00366 (nodeImageHeight > maxHeight) ||
00367 (leafImageWidth > maxWidth) ||
00368 (leafImageHeight > maxHeight)) {
00369 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] OpenCL image max. image size supported is too small");
00370 qbvhUseImage = false;
00371 } else {
00372 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Enabled QBVH scene storage inside image");
00373 qbvhUseImage = true;
00374 }
00375 } else {
00376 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] OpenCL image support is not available");
00377 qbvhUseImage = false;
00378 }
00379 }
00380
00381 if (qbvhUseImage) {
00382 assert (nodeImageWidth < 0x7fff * 7);
00383 assert (nodeImageHeight < 0xffff);
00384
00385 assert (leafImageWidth < 0x7ff * 10);
00386 assert (leafImageHeight < 0xffff);
00387
00388 {
00389
00390 unsigned int *inodes = new unsigned int[nodeImageWidth * nodeImageHeight * 7 * 4];
00391 for (size_t i = 0; i < qbvh->nNodes; ++i) {
00392 unsigned int *pnodes = (unsigned int *)&qbvh->nodes[i];
00393 const size_t offset = i * 7 * 4;
00394
00395 for (size_t j = 0; j < 6 * 4; ++j)
00396 inodes[offset + j] = pnodes[j];
00397
00398 for (size_t j = 0; j < 4; ++j) {
00399 int index = qbvh->nodes[i].children[j];
00400
00401 if (QBVHNode::IsEmpty(index)) {
00402 inodes[offset + 6 * 4 + j] = index;
00403 } else if (QBVHNode::IsLeaf(index)) {
00404 int32_t count = QBVHNode::FirstQuadIndex(index) * 10;
00405
00406 const unsigned short x = (count % leafImageWidth) / 10;
00407 const unsigned short y = count / leafImageWidth;
00408 ((int32_t *)inodes)[offset + 6 * 4 + j] = 0x80000000 |
00409 (((static_cast<int32_t>(QBVHNode::NbQuadPrimitives(index)) - 1) & 0xf) << 27) |
00410 (static_cast<int32_t>((x << 16) | y) & 0x07ffffff);
00411 } else {
00412 index *= 7;
00413
00414 const unsigned short x = (index % nodeImageWidth) / 7;
00415 const unsigned short y = index / nodeImageWidth;
00416 inodes[offset + 6 * 4 + j] = (x << 16) | y;
00417 }
00418 }
00419 }
00420
00421 qbvhImageBuff = new cl::Image2D(oclContext,
00422 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00423 cl::ImageFormat(CL_RGBA, CL_UNSIGNED_INT32), nodeImageWidth, nodeImageHeight, 0, inodes);
00424 deviceDesc->usedMemory += qbvhImageBuff->getInfo<CL_MEM_SIZE>();
00425
00426 delete[] inodes;
00427 }
00428
00429 {
00430 unsigned int *iprims = new unsigned int[leafImageWidth * leafImageHeight * 10 * 4];
00431 memcpy(iprims, qbvh->prims, sizeof(QuadTriangle) * qbvh->nQuads);
00432
00433 qbvhTrisImageBuff = new cl::Image2D(oclContext,
00434 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00435 cl::ImageFormat(CL_RGBA, CL_UNSIGNED_INT32), leafImageWidth, leafImageHeight, 0, iprims);
00436 deviceDesc->usedMemory += qbvhTrisImageBuff->getInfo<CL_MEM_SIZE>();
00437
00438 delete iprims;
00439 }
00440
00441
00442 qbvhImageKernel->setArg(2, *qbvhImageBuff);
00443 qbvhImageKernel->setArg(3, *qbvhTrisImageBuff);
00444 qbvhImageKernel->setArg(5, 24 * qbvhImageWorkGroupSize * sizeof(cl_int), NULL);
00445 } else {
00446 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] QBVH buffer size: " << (sizeof(QBVHNode) * qbvh->nNodes / 1024) << "Kbytes");
00447 qbvhBuff = new cl::Buffer(oclContext,
00448 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00449 sizeof(QBVHNode) * qbvh->nNodes,
00450 qbvh->nodes);
00451 deviceDesc->usedMemory += qbvhBuff->getInfo<CL_MEM_SIZE>();
00452
00453 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] QuadTriangle buffer size: " << (sizeof(QuadTriangle) * qbvh->nQuads / 1024) << "Kbytes");
00454 qbvhTrisBuff = new cl::Buffer(oclContext,
00455 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00456 sizeof(QuadTriangle) * qbvh->nQuads,
00457 qbvh->prims);
00458 deviceDesc->usedMemory += qbvhTrisBuff->getInfo<CL_MEM_SIZE>();
00459
00460
00461 qbvhKernel->setArg(2, *qbvhBuff);
00462 qbvhKernel->setArg(3, *qbvhTrisBuff);
00463 qbvhKernel->setArg(5, 24 * qbvhWorkGroupSize * sizeof(cl_int), NULL);
00464 }
00465 break;
00466 }
00467 case ACCEL_MQBVH: {
00468
00469
00470
00471
00472
00473 {
00474 cl::Program::Sources source(1, std::make_pair(KernelSource_MQBVH.c_str(), KernelSource_MQBVH.length()));
00475 cl::Program program = cl::Program(oclContext, source);
00476 try {
00477 VECTOR_CLASS<cl::Device> buildDevice;
00478 buildDevice.push_back(oclDevice);
00479 program.build(buildDevice, "-I.");
00480 } catch (cl::Error err) {
00481 cl::STRING_CLASS strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(oclDevice);
00482 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] MQBVH compilation error:\n" << strError.c_str());
00483
00484 throw err;
00485 }
00486
00487 mqbvhKernel = new cl::Kernel(program, "Intersect");
00488 mqbvhKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &mqbvhWorkGroupSize);
00489 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] MQBVH kernel work group size: " << mqbvhWorkGroupSize);
00490 cl_ulong memSize;
00491 mqbvhKernel->getWorkGroupInfo<cl_ulong>(oclDevice, CL_KERNEL_LOCAL_MEM_SIZE, &memSize);
00492 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] MQBVH kernel memory footprint: " << memSize);
00493
00494 mqbvhKernel->getWorkGroupInfo<size_t>(oclDevice, CL_KERNEL_WORK_GROUP_SIZE, &mqbvhWorkGroupSize);
00495 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Suggested work group size: " << mqbvhWorkGroupSize);
00496
00497 if (forceWorkGroupSize > 0) {
00498 mqbvhWorkGroupSize = forceWorkGroupSize;
00499 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Forced work group size: " << mqbvhWorkGroupSize);
00500 } else if (qbvhWorkGroupSize > 256) {
00501
00502 qbvhWorkGroupSize = 256;
00503 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Cap work group size to: " << qbvhWorkGroupSize);
00504 }
00505 }
00506
00507
00508
00509 const MQBVHAccel *mqbvh = (MQBVHAccel *)dataSet->accel;
00510
00511 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] MQBVH buffer size: " << (sizeof(QBVHNode) * mqbvh->nNodes / 1024) << "Kbytes");
00512 mqbvhBuff = new cl::Buffer(oclContext,
00513 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00514 sizeof(QBVHNode) * mqbvh->nNodes,
00515 mqbvh->nodes);
00516 deviceDesc->usedMemory += mqbvhBuff->getInfo<CL_MEM_SIZE>();
00517
00518
00519 unsigned int totalNodesCount = 0;
00520 unsigned int totalQuadTrisCount = 0;
00521
00522 std::map<const QBVHAccel *, unsigned int> indexNodesMap;
00523 std::map<const QBVHAccel *, unsigned int> indexQuadTrisMap;
00524
00525 for (std::map<Mesh *, QBVHAccel *, bool (*)(Mesh *, Mesh *)>::const_iterator it = mqbvh->accels.begin(); it != mqbvh->accels.end(); it++) {
00526 const QBVHAccel *qbvh = it->second;
00527
00528 indexNodesMap[qbvh] = totalNodesCount;
00529 totalNodesCount += qbvh->nNodes;
00530 indexQuadTrisMap[qbvh] = totalQuadTrisCount;
00531 totalQuadTrisCount += qbvh->nQuads;
00532 }
00533
00534 unsigned int *memMap = new unsigned int[mqbvh->nLeafs * 2];
00535 for (unsigned int i = 0; i < mqbvh->nLeafs; ++i) {
00536 memMap[i * 2] = indexNodesMap[mqbvh->leafs[i]];
00537 memMap[i * 2 + 1] = indexQuadTrisMap[mqbvh->leafs[i]];
00538 }
00539
00540
00541 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] MQBVH leaf Nodes buffer size: " << (totalNodesCount * sizeof(QBVHNode) / 1024) << "Kbytes");
00542 mqbvhLeafBuff = new cl::Buffer(oclContext,
00543 CL_MEM_READ_ONLY,
00544 totalNodesCount * sizeof(QBVHNode));
00545 deviceDesc->usedMemory += mqbvhLeafBuff->getInfo<CL_MEM_SIZE>();
00546
00547 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] MQBVH QuadTriangle buffer size: " << (totalQuadTrisCount * sizeof(QuadTriangle) / 1024) << "Kbytes");
00548 mqbvhLeafQuadTrisBuff = new cl::Buffer(oclContext,
00549 CL_MEM_READ_ONLY,
00550 totalQuadTrisCount * sizeof(QuadTriangle));
00551 deviceDesc->usedMemory += mqbvhLeafQuadTrisBuff->getInfo<CL_MEM_SIZE>();
00552
00553 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] MQBVH memory map buffer size: " << (mqbvh->nLeafs * sizeof(unsigned int) * 2 / 1024) << "Kbytes");
00554 mqbvhMemMapBuff = new cl::Buffer(oclContext,
00555 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00556 mqbvh->nLeafs * sizeof(unsigned int) * 2,
00557 memMap);
00558 deviceDesc->usedMemory += mqbvhMemMapBuff->getInfo<CL_MEM_SIZE>();
00559 delete memMap;
00560
00561
00562 size_t nodesMemOffset = 0;
00563 size_t quadTrisMemOffset = 0;
00564 for (std::map<Mesh *, QBVHAccel *, bool (*)(Mesh *, Mesh *)>::const_iterator it = mqbvh->accels.begin(); it != mqbvh->accels.end(); it++) {
00565 const QBVHAccel *qbvh = it->second;
00566
00567 const size_t nodesMemSize = sizeof(QBVHNode) * qbvh->nNodes;
00568 oclQueue->enqueueWriteBuffer(
00569 *mqbvhLeafBuff,
00570 CL_FALSE,
00571 nodesMemOffset,
00572 nodesMemSize,
00573 qbvh->nodes);
00574 nodesMemOffset += nodesMemSize;
00575
00576 const size_t quadTrisMemSize = sizeof(QuadTriangle) * qbvh->nQuads;
00577 oclQueue->enqueueWriteBuffer(
00578 *mqbvhLeafQuadTrisBuff,
00579 CL_FALSE,
00580 quadTrisMemOffset,
00581 quadTrisMemSize,
00582 qbvh->prims);
00583 quadTrisMemOffset += quadTrisMemSize;
00584 }
00585
00586
00587 Matrix4x4 *invTrans = new Matrix4x4[mqbvh->nLeafs];
00588 for (unsigned int i = 0; i < mqbvh->nLeafs; ++i) {
00589 if (mqbvh->leafsInvTransform[i])
00590 invTrans[i] = mqbvh->leafsInvTransform[i]->GetMatrix();
00591 else
00592 invTrans[i] = Matrix4x4();
00593 }
00594
00595 const size_t invTransMemSize = mqbvh->nLeafs * sizeof(Matrix4x4);
00596 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] MQBVH inverse transformations buffer size: " << (invTransMemSize / 1024) << "Kbytes");
00597 mqbvhInvTransBuff = new cl::Buffer(oclContext,
00598 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00599 invTransMemSize,
00600 invTrans);
00601 deviceDesc->usedMemory += mqbvhInvTransBuff->getInfo<CL_MEM_SIZE>();
00602 delete invTrans;
00603
00604
00605 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] MQBVH primitive offsets buffer size: " << (sizeof(unsigned int) * mqbvh->nLeafs / 1024) << "Kbytes");
00606 mqbvhTrisOffsetBuff = new cl::Buffer(oclContext,
00607 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00608 sizeof(unsigned int) * mqbvh->nLeafs,
00609 mqbvh->leafsOffset);
00610 deviceDesc->usedMemory += mqbvhTrisOffsetBuff->getInfo<CL_MEM_SIZE>();
00611
00612
00613 mqbvhKernel->setArg(2, *mqbvhBuff);
00614 mqbvhKernel->setArg(4, *mqbvhMemMapBuff);
00615 mqbvhKernel->setArg(5, *mqbvhLeafBuff);
00616 mqbvhKernel->setArg(6, *mqbvhLeafQuadTrisBuff);
00617 mqbvhKernel->setArg(7, *mqbvhInvTransBuff);
00618 mqbvhKernel->setArg(8, *mqbvhTrisOffsetBuff);
00619 break;
00620 }
00621 default:
00622 assert (false);
00623 }
00624 }
00625
00626 void OpenCLIntersectionDevice::UpdateDataSet() {
00627 switch (dataSet->GetAcceleratorType()) {
00628 case ACCEL_MQBVH: {
00629 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Updating DataSet");
00630
00631 const MQBVHAccel *mqbvh = (MQBVHAccel *)dataSet->accel;
00632
00633
00634 Matrix4x4 *invTrans = new Matrix4x4[mqbvh->nLeafs];
00635 for (unsigned int i = 0; i < mqbvh->nLeafs; ++i) {
00636 if (mqbvh->leafsInvTransform[i])
00637 invTrans[i] = mqbvh->leafsInvTransform[i]->GetMatrix();
00638 else
00639 invTrans[i] = Matrix4x4();
00640 }
00641
00642 oclQueue->enqueueWriteBuffer(
00643 *mqbvhInvTransBuff,
00644 CL_TRUE,
00645 0,
00646 mqbvh->nLeafs * sizeof(Matrix4x4),
00647 invTrans);
00648 delete invTrans;
00649
00650
00651 deviceDesc->usedMemory -= mqbvhBuff->getInfo<CL_MEM_SIZE>();
00652 delete mqbvhBuff;
00653
00654 mqbvhBuff = new cl::Buffer(deviceDesc->GetOCLContext(),
00655 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
00656 sizeof(QBVHNode) * mqbvh->nNodes,
00657 mqbvh->nodes);
00658 deviceDesc->usedMemory += mqbvhBuff->getInfo<CL_MEM_SIZE>();
00659
00660 mqbvhKernel->setArg(2, *mqbvhBuff);
00661 break;
00662 }
00663 case ACCEL_BVH:
00664 case ACCEL_QBVH:
00665 default:
00666 assert (false);
00667 break;
00668 }
00669 }
00670
00671 void OpenCLIntersectionDevice::Start() {
00672 IntersectionDevice::Start();
00673
00674
00675 intersectionThread = new boost::thread(boost::bind(OpenCLIntersectionDevice::IntersectionThread, this));
00676
00677
00678 bool res = SetThreadRRPriority(intersectionThread);
00679 if (res && !reportedPermissionError) {
00680 LR_LOG(deviceContext, "[OpenCL device::" << deviceName << "] Failed to set ray intersection thread priority (you probably need root/administrator permission to set thread realtime priority)");
00681 reportedPermissionError = true;
00682 }
00683 }
00684
00685 void OpenCLIntersectionDevice::Interrupt() {
00686 assert (started);
00687 intersectionThread->interrupt();
00688 }
00689
00690 void OpenCLIntersectionDevice::Stop() {
00691 IntersectionDevice::Stop();
00692
00693 intersectionThread->interrupt();
00694 intersectionThread->join();
00695 delete intersectionThread;
00696 intersectionThread = NULL;
00697
00698 if (!externalRayBufferQueue)
00699 rayBufferQueue.Clear();
00700 }
00701
00702 void OpenCLIntersectionDevice::TraceRayBuffer(RayBuffer *rayBuffer, cl::Event *event) {
00703
00704 oclQueue->enqueueWriteBuffer(
00705 *raysBuff,
00706 CL_FALSE,
00707 0,
00708 sizeof(Ray) * rayBuffer->GetRayCount(),
00709 rayBuffer->GetRayBuffer());
00710
00711 switch (dataSet->GetAcceleratorType()) {
00712 case ACCEL_BVH: {
00713 bvhKernel->setArg(0, *raysBuff);
00714 bvhKernel->setArg(1, *hitsBuff);
00715 bvhKernel->setArg(7, (unsigned int)rayBuffer->GetRayCount());
00716 oclQueue->enqueueNDRangeKernel(*bvhKernel, cl::NullRange,
00717 cl::NDRange(rayBuffer->GetSize()), cl::NDRange(bvhWorkGroupSize));
00718 break;
00719 }
00720 case ACCEL_QBVH: {
00721 if (qbvhUseImage) {
00722 qbvhImageKernel->setArg(0, *raysBuff);
00723 qbvhImageKernel->setArg(1, *hitsBuff);
00724 qbvhImageKernel->setArg(4, (unsigned int)rayBuffer->GetRayCount());
00725 oclQueue->enqueueNDRangeKernel(*qbvhImageKernel, cl::NullRange,
00726 cl::NDRange(rayBuffer->GetSize()), cl::NDRange(qbvhImageWorkGroupSize));
00727 } else {
00728 qbvhKernel->setArg(0, *raysBuff);
00729 qbvhKernel->setArg(1, *hitsBuff);
00730 qbvhKernel->setArg(4, (unsigned int)rayBuffer->GetRayCount());
00731 oclQueue->enqueueNDRangeKernel(*qbvhKernel, cl::NullRange,
00732 cl::NDRange(rayBuffer->GetSize()), cl::NDRange(qbvhWorkGroupSize));
00733 }
00734 break;
00735 }
00736 case ACCEL_MQBVH: {
00737 mqbvhKernel->setArg(0, *raysBuff);
00738 mqbvhKernel->setArg(1, *hitsBuff);
00739 mqbvhKernel->setArg(3, (unsigned int)rayBuffer->GetRayCount());
00740 oclQueue->enqueueNDRangeKernel(*mqbvhKernel, cl::NullRange,
00741 cl::NDRange(rayBuffer->GetSize()), cl::NDRange(mqbvhWorkGroupSize));
00742 break;
00743 }
00744 default:
00745 assert (false);
00746 }
00747
00748
00749 oclQueue->enqueueReadBuffer(
00750 *hitsBuff,
00751 CL_FALSE,
00752 0,
00753 sizeof(RayHit) * rayBuffer->GetRayCount(),
00754 rayBuffer->GetHitBuffer(), NULL, event);
00755 }
00756
00757 void OpenCLIntersectionDevice::EnqueueTraceRayBuffer(cl::Buffer &rBuff, cl::Buffer &hBuff,
00758 const unsigned int rayCount) {
00759 switch (dataSet->GetAcceleratorType()) {
00760 case ACCEL_BVH: {
00761 bvhKernel->setArg(0, rBuff);
00762 bvhKernel->setArg(1, hBuff);
00763 bvhKernel->setArg(7, rayCount);
00764 oclQueue->enqueueNDRangeKernel(*bvhKernel, cl::NullRange,
00765 cl::NDRange(rayCount), cl::NDRange(bvhWorkGroupSize));
00766 break;
00767 }
00768 case ACCEL_QBVH: {
00769 if (qbvhUseImage) {
00770 qbvhImageKernel->setArg(0, rBuff);
00771 qbvhImageKernel->setArg(1, hBuff);
00772 qbvhImageKernel->setArg(4, rayCount);
00773 oclQueue->enqueueNDRangeKernel(*qbvhImageKernel, cl::NullRange,
00774 cl::NDRange(rayCount), cl::NDRange(qbvhImageWorkGroupSize));
00775 } else {
00776 qbvhKernel->setArg(0, rBuff);
00777 qbvhKernel->setArg(1, hBuff);
00778 qbvhKernel->setArg(4, rayCount);
00779 oclQueue->enqueueNDRangeKernel(*qbvhKernel, cl::NullRange,
00780 cl::NDRange(rayCount), cl::NDRange(qbvhWorkGroupSize));
00781 }
00782 break;
00783 }
00784 case ACCEL_MQBVH: {
00785 mqbvhKernel->setArg(0, rBuff);
00786 mqbvhKernel->setArg(1, hBuff);
00787 mqbvhKernel->setArg(3, rayCount);
00788 oclQueue->enqueueNDRangeKernel(*mqbvhKernel, cl::NullRange,
00789 cl::NDRange(rayCount), cl::NDRange(mqbvhWorkGroupSize));
00790 break;
00791 }
00792 default:
00793 assert (false);
00794 }
00795
00796 statsTotalRayCount += rayCount;
00797 }
00798
00799 void OpenCLIntersectionDevice::IntersectionThread(OpenCLIntersectionDevice *renderDevice) {
00800 LR_LOG(renderDevice->deviceContext, "[OpenCL device::" << renderDevice->deviceName << "] Rendering thread started");
00801
00802 try {
00803 RayBufferQueue *queue = renderDevice->externalRayBufferQueue ?
00804 renderDevice->externalRayBufferQueue : &(renderDevice->rayBufferQueue);
00805
00806 RayBuffer *rayBuffer0, *rayBuffer1, *rayBuffer2;
00807 const double startTime = WallClockTime();
00808 while (!boost::this_thread::interruption_requested()) {
00809 const double t1 = WallClockTime();
00810 queue->Pop3xToDo(&rayBuffer0, &rayBuffer1, &rayBuffer2);
00811 renderDevice->statsDeviceIdleTime += WallClockTime() - t1;
00812 const unsigned int count = (rayBuffer0 ? 1 : 0) + (rayBuffer1 ? 1 : 0) + (rayBuffer2 ? 1 : 0);
00813
00814 switch(count) {
00815 case 1: {
00816
00817 cl::Event event;
00818 renderDevice->TraceRayBuffer(rayBuffer0, &event);
00819
00820 event.wait();
00821 renderDevice->statsTotalRayCount += rayBuffer0->GetRayCount();
00822 queue->PushDone(rayBuffer0);
00823
00824 renderDevice->statsDeviceTotalTime = WallClockTime() - startTime;
00825 break;
00826 }
00827 case 2: {
00828
00829
00830
00831 cl::Event event0;
00832 renderDevice->TraceRayBuffer(rayBuffer0, &event0);
00833
00834
00835 cl::Event event1;
00836 renderDevice->TraceRayBuffer(rayBuffer1, &event1);
00837
00838
00839 event0.wait();
00840 renderDevice->statsTotalRayCount += rayBuffer0->GetRayCount();
00841 queue->PushDone(rayBuffer0);
00842
00843
00844 event1.wait();
00845 renderDevice->statsTotalRayCount += rayBuffer1->GetRayCount();
00846 queue->PushDone(rayBuffer1);
00847
00848 renderDevice->statsDeviceTotalTime = WallClockTime() - startTime;
00849 break;
00850 }
00851 case 3: {
00852
00853
00854
00855 cl::Event event0;
00856 renderDevice->TraceRayBuffer(rayBuffer0, &event0);
00857
00858
00859 cl::Event event1;
00860 renderDevice->TraceRayBuffer(rayBuffer1, &event1);
00861
00862
00863 cl::Event event2;
00864 renderDevice->TraceRayBuffer(rayBuffer2, &event2);
00865
00866
00867 event0.wait();
00868 renderDevice->statsTotalRayCount += rayBuffer0->GetRayCount();
00869 queue->PushDone(rayBuffer0);
00870
00871
00872 event1.wait();
00873 renderDevice->statsTotalRayCount += rayBuffer1->GetRayCount();
00874 queue->PushDone(rayBuffer1);
00875
00876
00877 event2.wait();
00878 renderDevice->statsTotalRayCount += rayBuffer2->GetRayCount();
00879 queue->PushDone(rayBuffer2);
00880
00881 renderDevice->statsDeviceTotalTime = WallClockTime() - startTime;
00882 break;
00883 }
00884 default:
00885 assert (false);
00886 }
00887 }
00888
00889 LR_LOG(renderDevice->deviceContext, "[OpenCL device::" << renderDevice->deviceName << "] Rendering thread halted");
00890 } catch (boost::thread_interrupted) {
00891 LR_LOG(renderDevice->deviceContext, "[OpenCL device::" << renderDevice->deviceName << "] Rendering thread halted");
00892 } catch (cl::Error err) {
00893 LR_LOG(renderDevice->deviceContext, "[OpenCL device::" << renderDevice->deviceName << "] Rendering thread ERROR: " << err.what() << "(" << err.err() << ")");
00894 }
00895 }
00896
00897 #endif