00001 #include "luxrays/kernels/kernels.h"
00002 std::string luxrays::KernelSource_MQBVH =
00003 "/***************************************************************************\n"
00004 " * Copyright (C) 1998-2010 by authors (see AUTHORS.txt ) *\n"
00005 " * *\n"
00006 " * This file is part of LuxRays. *\n"
00007 " * *\n"
00008 " * LuxRays is free software; you can redistribute it and/or modify *\n"
00009 " * it under the terms of the GNU General Public License as published by *\n"
00010 " * the Free Software Foundation; either version 3 of the License, or *\n"
00011 " * (at your option) any later version. *\n"
00012 " * *\n"
00013 " * LuxRays is distributed in the hope that it will be useful, *\n"
00014 " * but WITHOUT ANY WARRANTY; without even the implied warranty of *\n"
00015 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *\n"
00016 " * GNU General Public License for more details. *\n"
00017 " * *\n"
00018 " * You should have received a copy of the GNU General Public License *\n"
00019 " * along with this program. If not, see <http://www.gnu.org/licenses/>. *\n"
00020 " * *\n"
00021 " * LuxRays website: http://www.luxrender.net *\n"
00022 " ***************************************************************************/\n"
00023 "\n"
00024 "typedef struct {\n"
00025 " float x, y, z;\n"
00026 "} Point;\n"
00027 "\n"
00028 "typedef struct {\n"
00029 " float x, y, z;\n"
00030 "} Vector;\n"
00031 "\n"
00032 "typedef struct {\n"
00033 " Point o;\n"
00034 " Vector d;\n"
00035 " float mint, maxt;\n"
00036 "} Ray;\n"
00037 "\n"
00038 "typedef struct {\n"
00039 " float t;\n"
00040 " float b1, b2; // Barycentric coordinates of the hit point\n"
00041 " uint index;\n"
00042 "} RayHit;\n"
00043 "\n"
00044 "typedef struct {\n"
00045 " Point pMin, pMax;\n"
00046 "} BBox;\n"
00047 "\n"
00048 "typedef struct QuadRay {\n"
00049 " float4 ox, oy, oz;\n"
00050 " float4 dx, dy, dz;\n"
00051 " float4 mint, maxt;\n"
00052 "} QuadRay;\n"
00053 "\n"
00054 "typedef struct {\n"
00055 " float4 origx, origy, origz;\n"
00056 " float4 edge1x, edge1y, edge1z;\n"
00057 " float4 edge2x, edge2y, edge2z;\n"
00058 " uint4 primitives;\n"
00059 "} QuadTiangle;\n"
00060 "\n"
00061 "typedef struct {\n"
00062 " float4 bboxes[2][3];\n"
00063 " int4 children;\n"
00064 "} QBVHNode;\n"
00065 "\n"
00066 "typedef struct {\n"
00067 " float m[4][4];\n"
00068 "} Matrix4x4;\n"
00069 "\n"
00070 "void TransformP(Point *ptrans, Point *p, __global Matrix4x4 *m) {\n"
00071 " const float x = p->x;\n"
00072 " const float y = p->y;\n"
00073 " const float z = p->z;\n"
00074 "\n"
00075 " ptrans->x = m->m[0][0] * x + m->m[0][1] * y + m->m[0][2] * z + m->m[0][3];\n"
00076 " ptrans->y = m->m[1][0] * x + m->m[1][1] * y + m->m[1][2] * z + m->m[1][3];\n"
00077 " ptrans->z = m->m[2][0] * x + m->m[2][1] * y + m->m[2][2] * z + m->m[2][3];\n"
00078 " const float w = m->m[3][0] * x + m->m[3][1] * y + m->m[3][2] * z + m->m[3][3];\n"
00079 "\n"
00080 " ptrans->x /= w;\n"
00081 " ptrans->y /= w;\n"
00082 " ptrans->z /= w;\n"
00083 "}\n"
00084 "\n"
00085 "void TransformV(Vector *ptrans, Vector *p, __global Matrix4x4 *m) {\n"
00086 " const float x = p->x;\n"
00087 " const float y = p->y;\n"
00088 " const float z = p->z;\n"
00089 "\n"
00090 " ptrans->x = m->m[0][0] * x + m->m[0][1] * y + m->m[0][2] * z;\n"
00091 " ptrans->y = m->m[1][0] * x + m->m[1][1] * y + m->m[1][2] * z;\n"
00092 " ptrans->z = m->m[2][0] * x + m->m[2][1] * y + m->m[2][2] * z;\n"
00093 "}\n"
00094 "\n"
00095 "#define emptyLeafNode 0xffffffff\n"
00096 "\n"
00097 "#define QBVHNode_IsLeaf(index) (index < 0)\n"
00098 "#define QBVHNode_IsEmpty(index) (index == emptyLeafNode)\n"
00099 "#define QBVHNode_NbQuadPrimitives(index) ((uint)(((index >> 27) & 0xf) + 1))\n"
00100 "#define QBVHNode_FirstQuadIndex(index) (index & 0x07ffffff)\n"
00101 "\n"
00102 "// Using invDir0/invDir1/invDir2 and sign0/sign1/sign2 instead of an\n"
00103 "// array because I dont' trust OpenCL compiler =)\n"
00104 "int4 QBVHNode_BBoxIntersect(\n"
00105 " const float4 bboxes_minX, const float4 bboxes_maxX,\n"
00106 " const float4 bboxes_minY, const float4 bboxes_maxY,\n"
00107 " const float4 bboxes_minZ, const float4 bboxes_maxZ,\n"
00108 " const QuadRay *ray4,\n"
00109 " const float4 invDir0, const float4 invDir1, const float4 invDir2,\n"
00110 " const int signs0, const int signs1, const int signs2) {\n"
00111 " float4 tMin = ray4->mint;\n"
00112 " float4 tMax = ray4->maxt;\n"
00113 "\n"
00114 " // X coordinate\n"
00115 " tMin = max(tMin, (bboxes_minX - ray4->ox) * invDir0);\n"
00116 " tMax = min(tMax, (bboxes_maxX - ray4->ox) * invDir0);\n"
00117 "\n"
00118 " // Y coordinate\n"
00119 " tMin = max(tMin, (bboxes_minY - ray4->oy) * invDir1);\n"
00120 " tMax = min(tMax, (bboxes_maxY - ray4->oy) * invDir1);\n"
00121 "\n"
00122 " // Z coordinate\n"
00123 " tMin = max(tMin, (bboxes_minZ - ray4->oz) * invDir2);\n"
00124 " tMax = min(tMax, (bboxes_maxZ - ray4->oz) * invDir2);\n"
00125 "\n"
00126 " // Return the visit flags\n"
00127 " return (tMax >= tMin);\n"
00128 "}\n"
00129 "\n"
00130 "void QuadTriangle_Intersect(\n"
00131 " const float4 origx, const float4 origy, const float4 origz,\n"
00132 " const float4 edge1x, const float4 edge1y, const float4 edge1z,\n"
00133 " const float4 edge2x, const float4 edge2y, const float4 edge2z,\n"
00134 " const uint4 primitives,\n"
00135 " QuadRay *ray4, RayHit *rayHit) {\n"
00136 " //--------------------------------------------------------------------------\n"
00137 " // Calc. b1 coordinate\n"
00138 "\n"
00139 " const float4 s1x = (ray4->dy * edge2z) - (ray4->dz * edge2y);\n"
00140 " const float4 s1y = (ray4->dz * edge2x) - (ray4->dx * edge2z);\n"
00141 " const float4 s1z = (ray4->dx * edge2y) - (ray4->dy * edge2x);\n"
00142 "\n"
00143 " const float4 divisor = (s1x * edge1x) + (s1y * edge1y) + (s1z * edge1z);\n"
00144 "\n"
00145 " const float4 dx = ray4->ox - origx;\n"
00146 " const float4 dy = ray4->oy - origy;\n"
00147 " const float4 dz = ray4->oz - origz;\n"
00148 "\n"
00149 " const float4 b1 = ((dx * s1x) + (dy * s1y) + (dz * s1z)) / divisor;\n"
00150 "\n"
00151 " //--------------------------------------------------------------------------\n"
00152 " // Calc. b2 coordinate\n"
00153 "\n"
00154 " const float4 s2x = (dy * edge1z) - (dz * edge1y);\n"
00155 " const float4 s2y = (dz * edge1x) - (dx * edge1z);\n"
00156 " const float4 s2z = (dx * edge1y) - (dy * edge1x);\n"
00157 "\n"
00158 " const float4 b2 = ((ray4->dx * s2x) + (ray4->dy * s2y) + (ray4->dz * s2z)) / divisor;\n"
00159 "\n"
00160 " //--------------------------------------------------------------------------\n"
00161 " // Calc. b0 coordinate\n"
00162 "\n"
00163 " const float4 b0 = ((float4)1.f) - b1 - b2;\n"
00164 "\n"
00165 " //--------------------------------------------------------------------------\n"
00166 "\n"
00167 " const float4 t = ((edge2x * s2x) + (edge2y * s2y) + (edge2z * s2z)) / divisor;\n"
00168 "\n"
00169 " float _b1, _b2;\n"
00170 " float maxt = ray4->maxt.s0;\n"
00171 " uint index;\n"
00172 "\n"
00173 " int4 cond = isnotequal(divisor, (float4)0.f) & isgreaterequal(b0, (float4)0.f) &\n"
00174 " isgreaterequal(b1, (float4)0.f) & isgreaterequal(b2, (float4)0.f) &\n"
00175 " isgreater(t, ray4->mint);\n"
00176 "\n"
00177 " const int cond0 = cond.s0 && (t.s0 < maxt);\n"
00178 " maxt = select(maxt, t.s0, cond0);\n"
00179 " _b1 = select(0.f, b1.s0, cond0);\n"
00180 " _b2 = select(0.f, b2.s0, cond0);\n"
00181 " index = select(0xffffffffu, primitives.s0, cond0);\n"
00182 "\n"
00183 " const int cond1 = cond.s1 && (t.s1 < maxt);\n"
00184 " maxt = select(maxt, t.s1, cond1);\n"
00185 " _b1 = select(_b1, b1.s1, cond1);\n"
00186 " _b2 = select(_b2, b2.s1, cond1);\n"
00187 " index = select(index, primitives.s1, cond1);\n"
00188 "\n"
00189 " const int cond2 = cond.s2 && (t.s2 < maxt);\n"
00190 " maxt = select(maxt, t.s2, cond2);\n"
00191 " _b1 = select(_b1, b1.s2, cond2);\n"
00192 " _b2 = select(_b2, b2.s2, cond2);\n"
00193 " index = select(index, primitives.s2, cond2);\n"
00194 "\n"
00195 " const int cond3 = cond.s3 && (t.s3 < maxt);\n"
00196 " maxt = select(maxt, t.s3, cond3);\n"
00197 " _b1 = select(_b1, b1.s3, cond3);\n"
00198 " _b2 = select(_b2, b2.s3, cond3);\n"
00199 " index = select(index, primitives.s3, cond3);\n"
00200 "\n"
00201 " if (index == 0xffffffffu)\n"
00202 " return;\n"
00203 "\n"
00204 " ray4->maxt = (float4)maxt;\n"
00205 "\n"
00206 " rayHit->t = maxt;\n"
00207 " rayHit->b1 = _b1;\n"
00208 " rayHit->b2 = _b2;\n"
00209 " rayHit->index = index;\n"
00210 "}\n"
00211 "\n"
00212 "void LeafIntersect(\n"
00213 " const Ray *ray,\n"
00214 " RayHit *rayHit,\n"
00215 " __global QBVHNode *nodes,\n"
00216 " __global QuadTiangle *quadTris) {\n"
00217 " // Prepare the ray for intersection\n"
00218 " QuadRay ray4;\n"
00219 " ray4.ox = (float4)ray->o.x;\n"
00220 " ray4.oy = (float4)ray->o.y;\n"
00221 " ray4.oz = (float4)ray->o.z;\n"
00222 "\n"
00223 " ray4.dx = (float4)ray->d.x;\n"
00224 " ray4.dy = (float4)ray->d.y;\n"
00225 " ray4.dz = (float4)ray->d.z;\n"
00226 "\n"
00227 " ray4.mint = (float4)ray->mint;\n"
00228 " ray4.maxt = (float4)ray->maxt;\n"
00229 "\n"
00230 " const float4 invDir0 = (float4)(1.f / ray4.dx.s0);\n"
00231 " const float4 invDir1 = (float4)(1.f / ray4.dy.s0);\n"
00232 " const float4 invDir2 = (float4)(1.f / ray4.dz.s0);\n"
00233 "\n"
00234 " const int signs0 = (ray4.dx.s0 < 0.f);\n"
00235 " const int signs1 = (ray4.dy.s0 < 0.f);\n"
00236 " const int signs2 = (ray4.dz.s0 < 0.f);\n"
00237 "\n"
00238 " rayHit->index = 0xffffffffu;\n"
00239 "\n"
00240 " //------------------------------\n"
00241 " // Main loop\n"
00242 " int todoNode = 0; // the index in the stack\n"
00243 " int nodeStack[24];\n"
00244 " nodeStack[0] = 0; // first node to handle: root node\n"
00245 "\n"
00246 " while (todoNode >= 0) {\n"
00247 " const int nodeData = nodeStack[todoNode];\n"
00248 " --todoNode;\n"
00249 "\n"
00250 " // Leaves are identified by a negative index\n"
00251 " if (!QBVHNode_IsLeaf(nodeData)) {\n"
00252 " __global QBVHNode *node = &nodes[nodeData];\n"
00253 " const int4 visit = QBVHNode_BBoxIntersect(\n"
00254 " node->bboxes[signs0][0], node->bboxes[1 - signs0][0],\n"
00255 " node->bboxes[signs1][1], node->bboxes[1 - signs1][1],\n"
00256 " node->bboxes[signs2][2], node->bboxes[1 - signs2][2],\n"
00257 " &ray4,\n"
00258 " invDir0, invDir1, invDir2,\n"
00259 " signs0, signs1, signs2);\n"
00260 "\n"
00261 " const int4 children = node->children;\n"
00262 "\n"
00263 " // For some reason doing logic operations with int4 is very slow\n"
00264 " nodeStack[todoNode + 1] = children.s3;\n"
00265 " todoNode += (visit.s3 && !QBVHNode_IsEmpty(children.s3)) ? 1 : 0;\n"
00266 " nodeStack[todoNode + 1] = children.s2;\n"
00267 " todoNode += (visit.s2 && !QBVHNode_IsEmpty(children.s2)) ? 1 : 0;\n"
00268 " nodeStack[todoNode + 1] = children.s1;\n"
00269 " todoNode += (visit.s1 && !QBVHNode_IsEmpty(children.s1)) ? 1 : 0;\n"
00270 " nodeStack[todoNode + 1] = children.s0;\n"
00271 " todoNode += (visit.s0 && !QBVHNode_IsEmpty(children.s0)) ? 1 : 0;\n"
00272 " } else {\n"
00273 " // Perform intersection\n"
00274 " const uint nbQuadPrimitives = QBVHNode_NbQuadPrimitives(nodeData);\n"
00275 " const uint offset = QBVHNode_FirstQuadIndex(nodeData);\n"
00276 "\n"
00277 " for (uint primNumber = offset; primNumber < (offset + nbQuadPrimitives); ++primNumber) {\n"
00278 " __global QuadTiangle *quadTri = &quadTris[primNumber];\n"
00279 " const float4 origx = quadTri->origx;\n"
00280 " const float4 origy = quadTri->origy;\n"
00281 " const float4 origz = quadTri->origz;\n"
00282 " const float4 edge1x = quadTri->edge1x;\n"
00283 " const float4 edge1y = quadTri->edge1y;\n"
00284 " const float4 edge1z = quadTri->edge1z;\n"
00285 " const float4 edge2x = quadTri->edge2x;\n"
00286 " const float4 edge2y = quadTri->edge2y;\n"
00287 " const float4 edge2z = quadTri->edge2z;\n"
00288 " const uint4 primitives = quadTri->primitives;\n"
00289 "\n"
00290 " QuadTriangle_Intersect(\n"
00291 " origx, origy, origz,\n"
00292 " edge1x, edge1y, edge1z,\n"
00293 " edge2x, edge2y, edge2z,\n"
00294 " primitives,\n"
00295 " &ray4, rayHit);\n"
00296 " }\n"
00297 " }\n"
00298 " }\n"
00299 "}\n"
00300 "\n"
00301 "__kernel void Intersect(\n"
00302 " __global Ray *rays,\n"
00303 " __global RayHit *rayHits,\n"
00304 " __global QBVHNode *nodes,\n"
00305 " const uint rayCount,\n"
00306 " __global unsigned int *qbvhMemMap,\n"
00307 " __global QBVHNode *leafNodes,\n"
00308 " __global QuadTiangle *leafQuadTris,\n"
00309 " __global Matrix4x4 *invTrans,\n"
00310 " __global unsigned int *leafsOffset) {\n"
00311 " // Select the ray to check\n"
00312 " const int gid = get_global_id(0);\n"
00313 " if (gid >= rayCount)\n"
00314 " return;\n"
00315 "\n"
00316 " // Prepare the ray for intersection\n"
00317 " QuadRay ray4;\n"
00318 " Point rayOrig;\n"
00319 " Vector rayDir;\n"
00320 " {\n"
00321 " __global float4 *basePtr =(__global float4 *)&rays[gid];\n"
00322 " float4 data0 = (*basePtr++);\n"
00323 " float4 data1 = (*basePtr);\n"
00324 "\n"
00325 " rayOrig.x = data0.x;\n"
00326 " rayOrig.y = data0.y;\n"
00327 " rayOrig.z = data0.z;\n"
00328 "\n"
00329 " rayDir.x = data0.w;\n"
00330 " rayDir.y = data1.x;\n"
00331 " rayDir.z = data1.y;\n"
00332 "\n"
00333 " ray4.ox = (float4)data0.x;\n"
00334 " ray4.oy = (float4)data0.y;\n"
00335 " ray4.oz = (float4)data0.z;\n"
00336 "\n"
00337 " ray4.dx = (float4)data0.w;\n"
00338 " ray4.dy = (float4)data1.x;\n"
00339 " ray4.dz = (float4)data1.y;\n"
00340 "\n"
00341 " ray4.mint = (float4)data1.z;\n"
00342 " ray4.maxt = (float4)data1.w;\n"
00343 " }\n"
00344 "\n"
00345 " const float4 invDir0 = (float4)(1.f / ray4.dx.s0);\n"
00346 " const float4 invDir1 = (float4)(1.f / ray4.dy.s0);\n"
00347 " const float4 invDir2 = (float4)(1.f / ray4.dz.s0);\n"
00348 "\n"
00349 " const int signs0 = (ray4.dx.s0 < 0.f);\n"
00350 " const int signs1 = (ray4.dy.s0 < 0.f);\n"
00351 " const int signs2 = (ray4.dz.s0 < 0.f);\n"
00352 "\n"
00353 " RayHit rayHit;\n"
00354 " rayHit.index = 0xffffffffu;\n"
00355 "\n"
00356 " //------------------------------\n"
00357 " // Main loop\n"
00358 " int todoNode = 0; // the index in the stack\n"
00359 " int nodeStack[24];\n"
00360 " nodeStack[0] = 0; // first node to handle: root node\n"
00361 "\n"
00362 " while (todoNode >= 0) {\n"
00363 " const int nodeData = nodeStack[todoNode];\n"
00364 " --todoNode;\n"
00365 "\n"
00366 " // Leaves are identified by a negative index\n"
00367 " if (!QBVHNode_IsLeaf(nodeData)) {\n"
00368 " __global QBVHNode *node = &nodes[nodeData];\n"
00369 " const int4 visit = QBVHNode_BBoxIntersect(\n"
00370 " node->bboxes[signs0][0], node->bboxes[1 - signs0][0],\n"
00371 " node->bboxes[signs1][1], node->bboxes[1 - signs1][1],\n"
00372 " node->bboxes[signs2][2], node->bboxes[1 - signs2][2],\n"
00373 " &ray4,\n"
00374 " invDir0, invDir1, invDir2,\n"
00375 " signs0, signs1, signs2);\n"
00376 "\n"
00377 " const int4 children = node->children;\n"
00378 "\n"
00379 " // For some reason doing logic operations with int4 are very slow\n"
00380 " nodeStack[todoNode + 1] = children.s3;\n"
00381 " todoNode += (visit.s3 && !QBVHNode_IsEmpty(children.s3)) ? 1 : 0;\n"
00382 " nodeStack[todoNode + 1] = children.s2;\n"
00383 " todoNode += (visit.s2 && !QBVHNode_IsEmpty(children.s2)) ? 1 : 0;\n"
00384 " nodeStack[todoNode + 1] = children.s1;\n"
00385 " todoNode += (visit.s1 && !QBVHNode_IsEmpty(children.s1)) ? 1 : 0;\n"
00386 " nodeStack[todoNode + 1] = children.s0;\n"
00387 " todoNode += (visit.s0 && !QBVHNode_IsEmpty(children.s0)) ? 1 : 0;\n"
00388 " } else {\n"
00389 " // Perform intersection with QBVH leaf\n"
00390 " const uint leafIndex = QBVHNode_FirstQuadIndex(nodeData);\n"
00391 "\n"
00392 " Ray tray;\n"
00393 " TransformP(&tray.o, &rayOrig, &invTrans[leafIndex]);\n"
00394 " TransformV(&tray.d, &rayDir, &invTrans[leafIndex]);\n"
00395 " tray.mint = ray4.mint.s0;\n"
00396 " tray.maxt = ray4.maxt.s0;\n"
00397 "\n"
00398 " const unsigned int memIndex = leafIndex * 2;\n"
00399 " const unsigned int leafNodeOffset = qbvhMemMap[memIndex];\n"
00400 " __global QBVHNode *n = &leafNodes[leafNodeOffset];\n"
00401 " const unsigned int leafQuadTriOffset = qbvhMemMap[memIndex + 1];\n"
00402 " __global QuadTiangle *qt = &leafQuadTris[leafQuadTriOffset];\n"
00403 "\n"
00404 " RayHit tmpRayHit;\n"
00405 " LeafIntersect(&tray, &tmpRayHit, n, qt);\n"
00406 "\n"
00407 " if (tmpRayHit.index != 0xffffffffu) {\n"
00408 " rayHit.t = tmpRayHit.t;\n"
00409 " rayHit.b1 = tmpRayHit.b1;\n"
00410 " rayHit.b2 = tmpRayHit.b2;\n"
00411 " rayHit.index = tmpRayHit.index + leafsOffset[leafIndex];\n"
00412 "\n"
00413 " ray4.maxt = (float4)tmpRayHit.t;\n"
00414 " }\n"
00415 " }\n"
00416 " }\n"
00417 "\n"
00418 " // Write result\n"
00419 " rayHits[gid].t = rayHit.t;\n"
00420 " rayHits[gid].b1 = rayHit.b1;\n"
00421 " rayHits[gid].b2 = rayHit.b2;\n"
00422 " rayHits[gid].index = rayHit.index;\n"
00423 "}\n"
00424 ;