00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef _LUXRAYS_QBVHACCEL_H
00023 #define _LUXRAYS_QBVHACCEL_H
00024
00025 #include <string.h>
00026 #include <xmmintrin.h>
00027 #include <boost/cstdint.hpp>
00028
00029 #include "luxrays/luxrays.h"
00030 #include "luxrays/core/acceleretor.h"
00031
00032 using boost::int32_t;
00033
00034 namespace luxrays {
00035
00036 #if defined(WIN32) && !defined(__CYGWIN__)
00037 class __declspec(align(16)) QuadRay {
00038 #else
00039 class QuadRay {
00040 #endif
00041 public:
00042 QuadRay(const Ray &ray)
00043 {
00044 ox = _mm_set1_ps(ray.o.x);
00045 oy = _mm_set1_ps(ray.o.y);
00046 oz = _mm_set1_ps(ray.o.z);
00047
00048 dx = _mm_set1_ps(ray.d.x);
00049 dy = _mm_set1_ps(ray.d.y);
00050 dz = _mm_set1_ps(ray.d.z);
00051
00052 mint = _mm_set1_ps(ray.mint);
00053 maxt = _mm_set1_ps(ray.maxt);
00054 }
00055
00056 __m128 ox, oy, oz;
00057 __m128 dx, dy, dz;
00058 mutable __m128 mint, maxt;
00059 #if defined(WIN32) && !defined(__CYGWIN__)
00060 };
00061 #else
00062 } __attribute__ ((aligned(16)));
00063 #endif
00064
00065 class QuadTriangle : public Aligned16 {
00066 public:
00067
00068 QuadTriangle(const Triangle *tris, const Point *verts,
00069 const unsigned int p1,
00070 const unsigned int p2,
00071 const unsigned int p3,
00072 const unsigned int p4) {
00073
00074 primitives[0] = p1;
00075 primitives[1] = p2;
00076 primitives[2] = p3;
00077 primitives[3] = p4;
00078
00079 for (u_int i = 0; i < 4; ++i) {
00080 const Triangle *t = &tris[primitives[i]];
00081
00082 reinterpret_cast<float *> (&origx)[i] = verts[t->v[0]].x;
00083 reinterpret_cast<float *> (&origy)[i] = verts[t->v[0]].y;
00084 reinterpret_cast<float *> (&origz)[i] = verts[t->v[0]].z;
00085
00086 reinterpret_cast<float *> (&edge1x)[i] = verts[t->v[1]].x - verts[t->v[0]].x;
00087 reinterpret_cast<float *> (&edge1y)[i] = verts[t->v[1]].y - verts[t->v[0]].y;
00088 reinterpret_cast<float *> (&edge1z)[i] = verts[t->v[1]].z - verts[t->v[0]].z;
00089
00090 reinterpret_cast<float *> (&edge2x)[i] = verts[t->v[2]].x - verts[t->v[0]].x;
00091 reinterpret_cast<float *> (&edge2y)[i] = verts[t->v[2]].y - verts[t->v[0]].y;
00092 reinterpret_cast<float *> (&edge2z)[i] = verts[t->v[2]].z - verts[t->v[0]].z;
00093 }
00094 }
00095
00096 ~QuadTriangle() {
00097 }
00098
00099 BBox WorldBound(const Triangle *tris, const Point *verts) const {
00100 return Union(
00101 Union(tris[primitives[0]].WorldBound(verts), tris[primitives[1]].WorldBound(verts)),
00102 Union(tris[primitives[2]].WorldBound(verts), tris[primitives[3]].WorldBound(verts)));
00103 }
00104
00105 bool Intersect(const QuadRay &ray4, const Ray &ray, RayHit *rayHit) const {
00106 const __m128 zero = _mm_setzero_ps();
00107 const __m128 s1x = _mm_sub_ps(_mm_mul_ps(ray4.dy, edge2z),
00108 _mm_mul_ps(ray4.dz, edge2y));
00109 const __m128 s1y = _mm_sub_ps(_mm_mul_ps(ray4.dz, edge2x),
00110 _mm_mul_ps(ray4.dx, edge2z));
00111 const __m128 s1z = _mm_sub_ps(_mm_mul_ps(ray4.dx, edge2y),
00112 _mm_mul_ps(ray4.dy, edge2x));
00113 const __m128 divisor = _mm_add_ps(_mm_mul_ps(s1x, edge1x),
00114 _mm_add_ps(_mm_mul_ps(s1y, edge1y),
00115 _mm_mul_ps(s1z, edge1z)));
00116 __m128 test = _mm_cmpneq_ps(divisor, zero);
00117 const __m128 inverse = _mm_div_ps(_mm_set_ps1(1.f), divisor);
00118 const __m128 dx = _mm_sub_ps(ray4.ox, origx);
00119 const __m128 dy = _mm_sub_ps(ray4.oy, origy);
00120 const __m128 dz = _mm_sub_ps(ray4.oz, origz);
00121 const __m128 b1 = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(dx, s1x),
00122 _mm_add_ps(_mm_mul_ps(dy, s1y), _mm_mul_ps(dz, s1z))),
00123 inverse);
00124 test = _mm_and_ps(test, _mm_cmpge_ps(b1, zero));
00125 const __m128 s2x = _mm_sub_ps(_mm_mul_ps(dy, edge1z),
00126 _mm_mul_ps(dz, edge1y));
00127 const __m128 s2y = _mm_sub_ps(_mm_mul_ps(dz, edge1x),
00128 _mm_mul_ps(dx, edge1z));
00129 const __m128 s2z = _mm_sub_ps(_mm_mul_ps(dx, edge1y),
00130 _mm_mul_ps(dy, edge1x));
00131 const __m128 b2 = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(ray4.dx, s2x),
00132 _mm_add_ps(_mm_mul_ps(ray4.dy, s2y), _mm_mul_ps(ray4.dz, s2z))),
00133 inverse);
00134 const __m128 b0 = _mm_sub_ps(_mm_set1_ps(1.f),
00135 _mm_add_ps(b1, b2));
00136 test = _mm_and_ps(test, _mm_and_ps(_mm_cmpge_ps(b2, zero),
00137 _mm_cmpge_ps(b0, zero)));
00138 const __m128 t = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(edge2x, s2x),
00139 _mm_add_ps(_mm_mul_ps(edge2y, s2y),
00140 _mm_mul_ps(edge2z, s2z))), inverse);
00141 test = _mm_and_ps(test,
00142 _mm_and_ps(_mm_cmpgt_ps(t, ray4.mint),
00143 _mm_cmplt_ps(t, ray4.maxt)));
00144
00145 const int testmask = _mm_movemask_ps(test);
00146 if (testmask == 0) return false;
00147
00148 u_int hit;
00149 if ((testmask & (testmask - 1)) == 0) {
00150 hit = UIntLog2(testmask);
00151 ray.maxt = reinterpret_cast<const float *> (&t)[hit];
00152 } else {
00153 for (u_int i = 0; i < 4; ++i) {
00154 if (reinterpret_cast<const int *> (&test)[i] && reinterpret_cast<const float *> (&t)[i] < ray.maxt) {
00155 hit = i;
00156 ray.maxt = reinterpret_cast<const float *> (&t)[i];
00157 }
00158 }
00159 }
00160
00161 ray4.maxt = _mm_set1_ps(ray.maxt);
00162
00163 rayHit->t = ray.maxt;
00164 rayHit->b1 = reinterpret_cast<const float *> (&b1)[hit];
00165 rayHit->b2 = reinterpret_cast<const float *> (&b2)[hit];
00166 rayHit->index = primitives[hit];
00167
00168 return true;
00169 }
00170
00171 __m128 origx, origy, origz;
00172 __m128 edge1x, edge1y, edge1z;
00173 __m128 edge2x, edge2y, edge2z;
00174 unsigned int primitives[4];
00175 };
00176
00177
00178
00187 #define NB_BINS 8
00188
00192 class QBVHNode {
00193 public:
00194
00195
00196
00197
00198
00199
00200
00201 static const int32_t emptyLeafNode = 0xffffffff;
00202
00207 __m128 bboxes[2][3];
00208
00216 int32_t children[4];
00217
00222 inline QBVHNode() {
00223 for (int i = 0; i < 3; ++i) {
00224 bboxes[0][i] = _mm_set1_ps(INFINITY);
00225 bboxes[1][i] = _mm_set1_ps(-INFINITY);
00226 }
00227
00228
00229 for (int i = 0; i < 4; ++i)
00230 children[i] = emptyLeafNode;
00231 }
00232
00238 inline bool ChildIsLeaf(int i) const {
00239 return (children[i] < 0);
00240 }
00241
00246 inline static bool IsLeaf(int32_t index) {
00247 return (index < 0);
00248 }
00249
00254 inline bool LeafIsEmpty(int i) const {
00255 return (children[i] == emptyLeafNode);
00256 }
00257
00262 inline static bool IsEmpty(int32_t index) {
00263 return (index == emptyLeafNode);
00264 }
00265
00272 inline u_int NbQuadsInLeaf(int i) const {
00273 return static_cast<u_int>((children[i] >> 27) & 0xf) + 1;
00274 }
00275
00280 inline static u_int NbQuadPrimitives(int32_t index) {
00281 return static_cast<u_int>((index >> 27) & 0xf) + 1;
00282 }
00283
00290 inline u_int NbPrimitivesInLeaf(int i) const {
00291 return NbQuadsInLeaf(i) * 4;
00292 }
00293
00300 inline u_int FirstQuadIndexForLeaf(int i) const {
00301 return children[i] & 0x07ffffff;
00302 }
00303
00308 inline static u_int FirstQuadIndex(int32_t index) {
00309 return index & 0x07ffffff;
00310 }
00311
00318 inline void InitializeLeaf(int i, u_int nbQuads, u_int firstQuadIndex) {
00319
00320 if (nbQuads == 0) {
00321 children[i] = emptyLeafNode;
00322 } else {
00323
00324 children[i] = 0x80000000;
00325
00326 children[i] |= ((static_cast<int32_t>(nbQuads) - 1) & 0xf) << 27;
00327
00328 children[i] |= static_cast<int32_t>(firstQuadIndex) & 0x07ffffff;
00329 }
00330 }
00331
00337 inline void SetBBox(int i, const BBox &bbox) {
00338 for (int axis = 0; axis < 3; ++axis) {
00339 reinterpret_cast<float *>(&(bboxes[0][axis]))[i] = bbox.pMin[axis];
00340 reinterpret_cast<float *>(&(bboxes[1][axis]))[i] = bbox.pMax[axis];
00341 }
00342 }
00343
00344
00350 inline int32_t BBoxIntersect(const QuadRay &ray4, const __m128 invDir[3],
00351 const int sign[3]) const {
00352 __m128 tMin = ray4.mint;
00353 __m128 tMax = ray4.maxt;
00354
00355
00356 tMin = _mm_max_ps(tMin, _mm_mul_ps(_mm_sub_ps(bboxes[sign[0]][0],
00357 ray4.ox), invDir[0]));
00358 tMax = _mm_min_ps(tMax, _mm_mul_ps(_mm_sub_ps(bboxes[1 - sign[0]][0],
00359 ray4.ox), invDir[0]));
00360
00361
00362 tMin = _mm_max_ps(tMin, _mm_mul_ps(_mm_sub_ps(bboxes[sign[1]][1],
00363 ray4.oy), invDir[1]));
00364 tMax = _mm_min_ps(tMax, _mm_mul_ps(_mm_sub_ps(bboxes[1 - sign[1]][1],
00365 ray4.oy), invDir[1]));
00366
00367
00368 tMin = _mm_max_ps(tMin, _mm_mul_ps(_mm_sub_ps(bboxes[sign[2]][2],
00369 ray4.oz), invDir[2]));
00370 tMax = _mm_min_ps(tMax, _mm_mul_ps(_mm_sub_ps(bboxes[1 - sign[2]][2],
00371 ray4.oz), invDir[2]));
00372
00373
00374 return _mm_movemask_ps(_mm_cmpge_ps(tMax, tMin));
00375 }
00376 };
00377
00378
00379 class QBVHAccel : public Accelerator {
00380 public:
00384 QBVHAccel(const Context *context, u_int mp, u_int fst, u_int sf);
00385
00389 ~QBVHAccel();
00390
00395 BBox WorldBound() const { return worldBound; }
00396
00397 AcceleratorType GetType() const { return ACCEL_QBVH; }
00398 void Init(const std::deque<Mesh *> &meshes, const unsigned int totalVertexCount,
00399 const unsigned int totalTriangleCount);
00400
00401 const TriangleMeshID GetMeshID(const unsigned int index) const { return meshIDs[index]; }
00402 const TriangleMeshID *GetMeshIDTable() const { return meshIDs; }
00403 const TriangleID GetMeshTriangleID(const unsigned int index) const { return meshTriangleIDs[index]; }
00404 const TriangleID *GetMeshTriangleIDTable() const { return meshTriangleIDs; }
00405
00411 bool Intersect(const Ray *ray, RayHit *hit) const;
00412
00413 const TriangleMesh *GetPreprocessedMesh() const { return preprocessedMesh; }
00414
00415 friend class MQBVHAccel;
00416 friend class OpenCLIntersectionDevice;
00417
00418 private:
00419
00420 void Init(const Mesh *m);
00421
00426 void BuildTree(u_int start, u_int end, u_int *primsIndexes,
00427 BBox *primsBboxes, Point *primsCentroids, const BBox &nodeBbox,
00428 const BBox ¢roidsBbox, int32_t parentIndex, int32_t childIndex,
00429 int depth);
00430
00434 void CreateTempLeaf(int32_t parentIndex, int32_t childIndex, u_int start, u_int end,
00435 const BBox &nodeBbox);
00436
00440 inline int32_t CreateIntermediateNode(int32_t parentIndex, int32_t childIndex,
00441 const BBox &nodeBbox) {
00442 int32_t index = nNodes++;
00443 if (nNodes >= maxNodes) {
00444 QBVHNode *newNodes = AllocAligned<QBVHNode>(2 * maxNodes);
00445 memcpy(newNodes, nodes, sizeof(QBVHNode) * maxNodes);
00446 for (u_int i = 0; i < maxNodes; ++i)
00447 newNodes[maxNodes + i] = QBVHNode();
00448 FreeAligned(nodes);
00449 nodes = newNodes;
00450 maxNodes *= 2;
00451 }
00452
00453 if (parentIndex >= 0) {
00454 nodes[parentIndex].children[childIndex] = index;
00455 nodes[parentIndex].SetBBox(childIndex, nodeBbox);
00456 }
00457 return index;
00458 }
00459
00464 void PreSwizzle(int32_t nodeIndex, u_int *primsIndexes);
00465
00471 void CreateSwizzledLeaf(int32_t parentIndex, int32_t childIndex,
00472 u_int *primsIndexes);
00473
00477 u_int nQuads;
00478
00486 QuadTriangle *prims;
00487
00491 QBVHNode *nodes;
00492
00496 u_int nNodes, maxNodes;
00497
00501 BBox worldBound;
00502
00507 u_int fullSweepThreshold;
00508
00512 u_int skipFactor;
00513
00517 u_int maxPrimsPerLeaf;
00518
00519 const Context *ctx;
00520 TriangleMesh *preprocessedMesh;
00521 const Mesh *mesh;
00522 TriangleMeshID *meshIDs;
00523 TriangleID *meshTriangleIDs;
00524
00525 bool initialized;
00526 };
00527
00528 }
00529
00530 #endif