[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / bullet3 / src / Bullet3OpenCL / Raycast / b3GpuRaycast.cpp
1
2 #include "b3GpuRaycast.h"
3 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
4 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
5 #include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
6
7 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
8 #include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
9 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
10 #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
11 #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
12 #include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
13 #include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h"
14
15 #include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h"
16
17 #define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl"
18
19 struct b3GpuRaycastInternalData
20 {
21         cl_context m_context;
22         cl_device_id m_device;
23         cl_command_queue m_q;
24         cl_kernel m_raytraceKernel;
25         cl_kernel m_raytracePairsKernel;
26         cl_kernel m_findRayRigidPairIndexRanges;
27
28         b3GpuParallelLinearBvh* m_plbvh;
29         b3RadixSort32CL* m_radixSorter;
30         b3FillCL* m_fill;
31
32         //1 element per ray
33         b3OpenCLArray<b3RayInfo>* m_gpuRays;
34         b3OpenCLArray<b3RayHit>* m_gpuHitResults;
35         b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay;
36         b3OpenCLArray<int>* m_numRayRigidPairsPerRay;
37
38         //1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB
39         b3OpenCLArray<int>* m_gpuNumRayRigidPairs;
40         b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs;  //x == ray index, y == rigid index
41
42         int m_test;
43 };
44
45 b3GpuRaycast::b3GpuRaycast(cl_context ctx, cl_device_id device, cl_command_queue q)
46 {
47         m_data = new b3GpuRaycastInternalData;
48         m_data->m_context = ctx;
49         m_data->m_device = device;
50         m_data->m_q = q;
51         m_data->m_raytraceKernel = 0;
52         m_data->m_raytracePairsKernel = 0;
53         m_data->m_findRayRigidPairIndexRanges = 0;
54
55         m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q);
56         m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q);
57         m_data->m_fill = new b3FillCL(ctx, device, q);
58
59         m_data->m_gpuRays = new b3OpenCLArray<b3RayInfo>(ctx, q);
60         m_data->m_gpuHitResults = new b3OpenCLArray<b3RayHit>(ctx, q);
61         m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray<int>(ctx, q);
62         m_data->m_numRayRigidPairsPerRay = new b3OpenCLArray<int>(ctx, q);
63         m_data->m_gpuNumRayRigidPairs = new b3OpenCLArray<int>(ctx, q);
64         m_data->m_gpuRayRigidPairs = new b3OpenCLArray<b3Int2>(ctx, q);
65
66         {
67                 cl_int errNum = 0;
68                 cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, &errNum, "", B3_RAYCAST_PATH);
69                 b3Assert(errNum == CL_SUCCESS);
70                 m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastKernel", &errNum, prog);
71                 b3Assert(errNum == CL_SUCCESS);
72                 m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastPairsKernel", &errNum, prog);
73                 b3Assert(errNum == CL_SUCCESS);
74                 m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "findRayRigidPairIndexRanges", &errNum, prog);
75                 b3Assert(errNum == CL_SUCCESS);
76                 clReleaseProgram(prog);
77         }
78 }
79
80 b3GpuRaycast::~b3GpuRaycast()
81 {
82         clReleaseKernel(m_data->m_raytraceKernel);
83         clReleaseKernel(m_data->m_raytracePairsKernel);
84         clReleaseKernel(m_data->m_findRayRigidPairIndexRanges);
85
86         delete m_data->m_plbvh;
87         delete m_data->m_radixSorter;
88         delete m_data->m_fill;
89
90         delete m_data->m_gpuRays;
91         delete m_data->m_gpuHitResults;
92         delete m_data->m_firstRayRigidPairIndexPerRay;
93         delete m_data->m_numRayRigidPairsPerRay;
94         delete m_data->m_gpuNumRayRigidPairs;
95         delete m_data->m_gpuRayRigidPairs;
96
97         delete m_data;
98 }
99
100 bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo, float& hitFraction)
101 {
102         b3Vector3 rs = rayFrom - spherePos;
103         b3Vector3 rayDir = rayTo - rayFrom;
104
105         float A = b3Dot(rayDir, rayDir);
106         float B = b3Dot(rs, rayDir);
107         float C = b3Dot(rs, rs) - (radius * radius);
108
109         float D = B * B - A * C;
110
111         if (D > 0.0)
112         {
113                 float t = (-B - sqrt(D)) / A;
114
115                 if ((t >= 0.0f) && (t < hitFraction))
116                 {
117                         hitFraction = t;
118                         return true;
119                 }
120         }
121         return false;
122 }
123
124 bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronData& poly,
125                            const b3AlignedObjectArray<b3GpuFace>& faces, float& hitFraction, b3Vector3& hitNormal)
126 {
127         float exitFraction = hitFraction;
128         float enterFraction = -0.1f;
129         b3Vector3 curHitNormal = b3MakeVector3(0, 0, 0);
130         for (int i = 0; i < poly.m_numFaces; i++)
131         {
132                 const b3GpuFace& face = faces[poly.m_faceOffset + i];
133                 float fromPlaneDist = b3Dot(rayFromLocal, face.m_plane) + face.m_plane.w;
134                 float toPlaneDist = b3Dot(rayToLocal, face.m_plane) + face.m_plane.w;
135                 if (fromPlaneDist < 0.f)
136                 {
137                         if (toPlaneDist >= 0.f)
138                         {
139                                 float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist);
140                                 if (exitFraction > fraction)
141                                 {
142                                         exitFraction = fraction;
143                                 }
144                         }
145                 }
146                 else
147                 {
148                         if (toPlaneDist < 0.f)
149                         {
150                                 float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist);
151                                 if (enterFraction <= fraction)
152                                 {
153                                         enterFraction = fraction;
154                                         curHitNormal = face.m_plane;
155                                         curHitNormal.w = 0.f;
156                                 }
157                         }
158                         else
159                         {
160                                 return false;
161                         }
162                 }
163                 if (exitFraction <= enterFraction)
164                         return false;
165         }
166
167         if (enterFraction < 0.f)
168                 return false;
169
170         hitFraction = enterFraction;
171         hitNormal = curHitNormal;
172         return true;
173 }
174
175 void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
176                                                                 int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
177 {
178         //      return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);
179
180         B3_PROFILE("castRaysHost");
181         for (int r = 0; r < rays.size(); r++)
182         {
183                 b3Vector3 rayFrom = rays[r].m_from;
184                 b3Vector3 rayTo = rays[r].m_to;
185                 float hitFraction = hitResults[r].m_hitFraction;
186
187                 int hitBodyIndex = -1;
188                 b3Vector3 hitNormal;
189
190                 for (int b = 0; b < numBodies; b++)
191                 {
192                         const b3Vector3& pos = bodies[b].m_pos;
193                         //const b3Quaternion& orn = bodies[b].m_quat;
194
195                         switch (collidables[bodies[b].m_collidableIdx].m_shapeType)
196                         {
197                                 case SHAPE_SPHERE:
198                                 {
199                                         b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius;
200                                         if (sphere_intersect(pos, radius, rayFrom, rayTo, hitFraction))
201                                         {
202                                                 hitBodyIndex = b;
203                                                 b3Vector3 hitPoint;
204                                                 hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction);
205                                                 hitNormal = (hitPoint - bodies[b].m_pos).normalize();
206                                         }
207                                 }
208                                 case SHAPE_CONVEX_HULL:
209                                 {
210                                         b3Transform convexWorldTransform;
211                                         convexWorldTransform.setIdentity();
212                                         convexWorldTransform.setOrigin(bodies[b].m_pos);
213                                         convexWorldTransform.setRotation(bodies[b].m_quat);
214                                         b3Transform convexWorld2Local = convexWorldTransform.inverse();
215
216                                         b3Vector3 rayFromLocal = convexWorld2Local(rayFrom);
217                                         b3Vector3 rayToLocal = convexWorld2Local(rayTo);
218
219                                         int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex;
220                                         const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex];
221                                         if (rayConvex(rayFromLocal, rayToLocal, poly, narrowphaseData->m_convexFaces, hitFraction, hitNormal))
222                                         {
223                                                 hitBodyIndex = b;
224                                         }
225
226                                         break;
227                                 }
228                                 default:
229                                 {
230                                         static bool once = true;
231                                         if (once)
232                                         {
233                                                 once = false;
234                                                 b3Warning("Raytest: unsupported shape type\n");
235                                         }
236                                 }
237                         }
238                 }
239                 if (hitBodyIndex >= 0)
240                 {
241                         hitResults[r].m_hitFraction = hitFraction;
242                         hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction);
243                         hitResults[r].m_hitNormal = hitNormal;
244                         hitResults[r].m_hitBody = hitBodyIndex;
245                 }
246         }
247 }
248 ///todo: add some acceleration structure (AABBs, tree etc)
249 void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
250                                                         int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
251                                                         const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase)
252 {
253         //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);
254
255         B3_PROFILE("castRaysGPU");
256
257         {
258                 B3_PROFILE("raycast copyFromHost");
259                 m_data->m_gpuRays->copyFromHost(rays);
260                 m_data->m_gpuHitResults->copyFromHost(hitResults);
261         }
262
263         int numRays = hitResults.size();
264         {
265                 m_data->m_firstRayRigidPairIndexPerRay->resize(numRays);
266                 m_data->m_numRayRigidPairsPerRay->resize(numRays);
267
268                 m_data->m_gpuNumRayRigidPairs->resize(1);
269                 m_data->m_gpuRayRigidPairs->resize(numRays * 16);
270         }
271
272         //run kernel
273         const bool USE_BRUTE_FORCE_RAYCAST = false;
274         if (USE_BRUTE_FORCE_RAYCAST)
275         {
276                 B3_PROFILE("raycast launch1D");
277
278                 b3LauncherCL launcher(m_data->m_q, m_data->m_raytraceKernel, "m_raytraceKernel");
279                 int numRays = rays.size();
280                 launcher.setConst(numRays);
281
282                 launcher.setBuffer(m_data->m_gpuRays->getBufferCL());
283                 launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL());
284
285                 launcher.setConst(numBodies);
286                 launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
287                 launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
288                 launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
289                 launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
290
291                 launcher.launch1D(numRays);
292                 clFinish(m_data->m_q);
293         }
294         else
295         {
296                 m_data->m_plbvh->build(broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU());
297
298                 m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
299
300                 int numRayRigidPairs = -1;
301                 m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1);
302                 if (numRayRigidPairs > m_data->m_gpuRayRigidPairs->size())
303                 {
304                         numRayRigidPairs = m_data->m_gpuRayRigidPairs->size();
305                         m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1);
306                 }
307
308                 m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs);  //Radix sort needs b3OpenCLArray::size() to be correct
309
310                 //Sort ray-rigid pairs by ray index
311                 {
312                         B3_PROFILE("sort ray-rigid pairs");
313                         m_data->m_radixSorter->execute(*reinterpret_cast<b3OpenCLArray<b3SortData>*>(m_data->m_gpuRayRigidPairs));
314                 }
315
316                 //detect start,count of each ray pair
317                 {
318                         B3_PROFILE("detect ray-rigid pair index ranges");
319
320                         {
321                                 B3_PROFILE("reset ray-rigid pair index ranges");
322
323                                 m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays);  //atomic_min used to find first index
324                                 m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays);
325                                 clFinish(m_data->m_q);
326                         }
327
328                         b3BufferInfoCL bufferInfo[] =
329                                 {
330                                         b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL()),
331
332                                         b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()),
333                                         b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL())};
334
335                         b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges");
336                         launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
337                         launcher.setConst(numRayRigidPairs);
338
339                         launcher.launch1D(numRayRigidPairs);
340                         clFinish(m_data->m_q);
341                 }
342
343                 {
344                         B3_PROFILE("ray-rigid intersection");
345
346                         b3BufferInfoCL bufferInfo[] =
347                                 {
348                                         b3BufferInfoCL(m_data->m_gpuRays->getBufferCL()),
349                                         b3BufferInfoCL(m_data->m_gpuHitResults->getBufferCL()),
350                                         b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()),
351                                         b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL()),
352
353                                         b3BufferInfoCL(narrowphaseData->m_bodyBufferGPU->getBufferCL()),
354                                         b3BufferInfoCL(narrowphaseData->m_collidablesGPU->getBufferCL()),
355                                         b3BufferInfoCL(narrowphaseData->m_convexFacesGPU->getBufferCL()),
356                                         b3BufferInfoCL(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()),
357
358                                         b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL())};
359
360                         b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel");
361                         launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
362                         launcher.setConst(numRays);
363
364                         launcher.launch1D(numRays);
365                         clFinish(m_data->m_q);
366                 }
367         }
368
369         //copy results
370         {
371                 B3_PROFILE("raycast copyToHost");
372                 m_data->m_gpuHitResults->copyToHost(hitResults);
373         }
374 }