2 #include "b3GpuRaycast.h"
3 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
4 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
5 #include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
7 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
8 #include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
9 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
10 #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
11 #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
12 #include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
13 #include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h"
15 #include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h"
17 #define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl"
19 struct b3GpuRaycastInternalData
22 cl_device_id m_device;
24 cl_kernel m_raytraceKernel;
25 cl_kernel m_raytracePairsKernel;
26 cl_kernel m_findRayRigidPairIndexRanges;
28 b3GpuParallelLinearBvh* m_plbvh;
29 b3RadixSort32CL* m_radixSorter;
33 b3OpenCLArray<b3RayInfo>* m_gpuRays;
34 b3OpenCLArray<b3RayHit>* m_gpuHitResults;
35 b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay;
36 b3OpenCLArray<int>* m_numRayRigidPairsPerRay;
38 //1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB
39 b3OpenCLArray<int>* m_gpuNumRayRigidPairs;
40 b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs; //x == ray index, y == rigid index
45 b3GpuRaycast::b3GpuRaycast(cl_context ctx, cl_device_id device, cl_command_queue q)
47 m_data = new b3GpuRaycastInternalData;
48 m_data->m_context = ctx;
49 m_data->m_device = device;
51 m_data->m_raytraceKernel = 0;
52 m_data->m_raytracePairsKernel = 0;
53 m_data->m_findRayRigidPairIndexRanges = 0;
55 m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q);
56 m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q);
57 m_data->m_fill = new b3FillCL(ctx, device, q);
59 m_data->m_gpuRays = new b3OpenCLArray<b3RayInfo>(ctx, q);
60 m_data->m_gpuHitResults = new b3OpenCLArray<b3RayHit>(ctx, q);
61 m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray<int>(ctx, q);
62 m_data->m_numRayRigidPairsPerRay = new b3OpenCLArray<int>(ctx, q);
63 m_data->m_gpuNumRayRigidPairs = new b3OpenCLArray<int>(ctx, q);
64 m_data->m_gpuRayRigidPairs = new b3OpenCLArray<b3Int2>(ctx, q);
68 cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, &errNum, "", B3_RAYCAST_PATH);
69 b3Assert(errNum == CL_SUCCESS);
70 m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastKernel", &errNum, prog);
71 b3Assert(errNum == CL_SUCCESS);
72 m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastPairsKernel", &errNum, prog);
73 b3Assert(errNum == CL_SUCCESS);
74 m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "findRayRigidPairIndexRanges", &errNum, prog);
75 b3Assert(errNum == CL_SUCCESS);
76 clReleaseProgram(prog);
80 b3GpuRaycast::~b3GpuRaycast()
82 clReleaseKernel(m_data->m_raytraceKernel);
83 clReleaseKernel(m_data->m_raytracePairsKernel);
84 clReleaseKernel(m_data->m_findRayRigidPairIndexRanges);
86 delete m_data->m_plbvh;
87 delete m_data->m_radixSorter;
88 delete m_data->m_fill;
90 delete m_data->m_gpuRays;
91 delete m_data->m_gpuHitResults;
92 delete m_data->m_firstRayRigidPairIndexPerRay;
93 delete m_data->m_numRayRigidPairsPerRay;
94 delete m_data->m_gpuNumRayRigidPairs;
95 delete m_data->m_gpuRayRigidPairs;
100 bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo, float& hitFraction)
102 b3Vector3 rs = rayFrom - spherePos;
103 b3Vector3 rayDir = rayTo - rayFrom;
105 float A = b3Dot(rayDir, rayDir);
106 float B = b3Dot(rs, rayDir);
107 float C = b3Dot(rs, rs) - (radius * radius);
109 float D = B * B - A * C;
113 float t = (-B - sqrt(D)) / A;
115 if ((t >= 0.0f) && (t < hitFraction))
124 bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronData& poly,
125 const b3AlignedObjectArray<b3GpuFace>& faces, float& hitFraction, b3Vector3& hitNormal)
127 float exitFraction = hitFraction;
128 float enterFraction = -0.1f;
129 b3Vector3 curHitNormal = b3MakeVector3(0, 0, 0);
130 for (int i = 0; i < poly.m_numFaces; i++)
132 const b3GpuFace& face = faces[poly.m_faceOffset + i];
133 float fromPlaneDist = b3Dot(rayFromLocal, face.m_plane) + face.m_plane.w;
134 float toPlaneDist = b3Dot(rayToLocal, face.m_plane) + face.m_plane.w;
135 if (fromPlaneDist < 0.f)
137 if (toPlaneDist >= 0.f)
139 float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist);
140 if (exitFraction > fraction)
142 exitFraction = fraction;
148 if (toPlaneDist < 0.f)
150 float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist);
151 if (enterFraction <= fraction)
153 enterFraction = fraction;
154 curHitNormal = face.m_plane;
155 curHitNormal.w = 0.f;
163 if (exitFraction <= enterFraction)
167 if (enterFraction < 0.f)
170 hitFraction = enterFraction;
171 hitNormal = curHitNormal;
175 void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
176 int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
178 // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);
180 B3_PROFILE("castRaysHost");
181 for (int r = 0; r < rays.size(); r++)
183 b3Vector3 rayFrom = rays[r].m_from;
184 b3Vector3 rayTo = rays[r].m_to;
185 float hitFraction = hitResults[r].m_hitFraction;
187 int hitBodyIndex = -1;
190 for (int b = 0; b < numBodies; b++)
192 const b3Vector3& pos = bodies[b].m_pos;
193 //const b3Quaternion& orn = bodies[b].m_quat;
195 switch (collidables[bodies[b].m_collidableIdx].m_shapeType)
199 b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius;
200 if (sphere_intersect(pos, radius, rayFrom, rayTo, hitFraction))
204 hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction);
205 hitNormal = (hitPoint - bodies[b].m_pos).normalize();
208 case SHAPE_CONVEX_HULL:
210 b3Transform convexWorldTransform;
211 convexWorldTransform.setIdentity();
212 convexWorldTransform.setOrigin(bodies[b].m_pos);
213 convexWorldTransform.setRotation(bodies[b].m_quat);
214 b3Transform convexWorld2Local = convexWorldTransform.inverse();
216 b3Vector3 rayFromLocal = convexWorld2Local(rayFrom);
217 b3Vector3 rayToLocal = convexWorld2Local(rayTo);
219 int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex;
220 const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex];
221 if (rayConvex(rayFromLocal, rayToLocal, poly, narrowphaseData->m_convexFaces, hitFraction, hitNormal))
230 static bool once = true;
234 b3Warning("Raytest: unsupported shape type\n");
239 if (hitBodyIndex >= 0)
241 hitResults[r].m_hitFraction = hitFraction;
242 hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction);
243 hitResults[r].m_hitNormal = hitNormal;
244 hitResults[r].m_hitBody = hitBodyIndex;
248 ///todo: add some acceleration structure (AABBs, tree etc)
249 void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
250 int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
251 const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase)
253 //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);
255 B3_PROFILE("castRaysGPU");
258 B3_PROFILE("raycast copyFromHost");
259 m_data->m_gpuRays->copyFromHost(rays);
260 m_data->m_gpuHitResults->copyFromHost(hitResults);
263 int numRays = hitResults.size();
265 m_data->m_firstRayRigidPairIndexPerRay->resize(numRays);
266 m_data->m_numRayRigidPairsPerRay->resize(numRays);
268 m_data->m_gpuNumRayRigidPairs->resize(1);
269 m_data->m_gpuRayRigidPairs->resize(numRays * 16);
273 const bool USE_BRUTE_FORCE_RAYCAST = false;
274 if (USE_BRUTE_FORCE_RAYCAST)
276 B3_PROFILE("raycast launch1D");
278 b3LauncherCL launcher(m_data->m_q, m_data->m_raytraceKernel, "m_raytraceKernel");
279 int numRays = rays.size();
280 launcher.setConst(numRays);
282 launcher.setBuffer(m_data->m_gpuRays->getBufferCL());
283 launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL());
285 launcher.setConst(numBodies);
286 launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
287 launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
288 launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
289 launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
291 launcher.launch1D(numRays);
292 clFinish(m_data->m_q);
296 m_data->m_plbvh->build(broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU());
298 m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
300 int numRayRigidPairs = -1;
301 m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1);
302 if (numRayRigidPairs > m_data->m_gpuRayRigidPairs->size())
304 numRayRigidPairs = m_data->m_gpuRayRigidPairs->size();
305 m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1);
308 m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct
310 //Sort ray-rigid pairs by ray index
312 B3_PROFILE("sort ray-rigid pairs");
313 m_data->m_radixSorter->execute(*reinterpret_cast<b3OpenCLArray<b3SortData>*>(m_data->m_gpuRayRigidPairs));
316 //detect start,count of each ray pair
318 B3_PROFILE("detect ray-rigid pair index ranges");
321 B3_PROFILE("reset ray-rigid pair index ranges");
323 m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index
324 m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays);
325 clFinish(m_data->m_q);
328 b3BufferInfoCL bufferInfo[] =
330 b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL()),
332 b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()),
333 b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL())};
335 b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges");
336 launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
337 launcher.setConst(numRayRigidPairs);
339 launcher.launch1D(numRayRigidPairs);
340 clFinish(m_data->m_q);
344 B3_PROFILE("ray-rigid intersection");
346 b3BufferInfoCL bufferInfo[] =
348 b3BufferInfoCL(m_data->m_gpuRays->getBufferCL()),
349 b3BufferInfoCL(m_data->m_gpuHitResults->getBufferCL()),
350 b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()),
351 b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL()),
353 b3BufferInfoCL(narrowphaseData->m_bodyBufferGPU->getBufferCL()),
354 b3BufferInfoCL(narrowphaseData->m_collidablesGPU->getBufferCL()),
355 b3BufferInfoCL(narrowphaseData->m_convexFacesGPU->getBufferCL()),
356 b3BufferInfoCL(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()),
358 b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL())};
360 b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel");
361 launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
362 launcher.setConst(numRays);
364 launcher.launch1D(numRays);
365 clFinish(m_data->m_q);
371 B3_PROFILE("raycast copyToHost");
372 m_data->m_gpuHitResults->copyToHost(hitResults);