1 #ifndef B3_GPU_SAP_BROADPHASE_H
2 #define B3_GPU_SAP_BROADPHASE_H
4 #include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
5 #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2
7 #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
10 #include "Bullet3Common/shared/b3Int2.h"
12 #include "b3GpuBroadphaseInterface.h"
14 class b3GpuSapBroadphase : public b3GpuBroadphaseInterface
17 cl_device_id m_device;
18 cl_command_queue m_queue;
19 cl_kernel m_flipFloatKernel;
20 cl_kernel m_scatterKernel;
21 cl_kernel m_copyAabbsKernel;
22 cl_kernel m_sapKernel;
23 cl_kernel m_sap2Kernel;
24 cl_kernel m_prepareSumVarianceKernel;
26 class b3RadixSort32CL* m_sorter;
29 b3AlignedObjectArray<b3SortData> m_sortedAxisCPU[3][2];
30 b3AlignedObjectArray<b3UnsignedInt2> m_objectMinMaxIndexCPU[3][2];
31 b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0;
32 b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1;
33 b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2;
34 b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0prev;
35 b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1prev;
36 b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2prev;
38 b3OpenCLArray<b3SortData> m_sortedAxisGPU0;
39 b3OpenCLArray<b3SortData> m_sortedAxisGPU1;
40 b3OpenCLArray<b3SortData> m_sortedAxisGPU2;
41 b3OpenCLArray<b3SortData> m_sortedAxisGPU0prev;
42 b3OpenCLArray<b3SortData> m_sortedAxisGPU1prev;
43 b3OpenCLArray<b3SortData> m_sortedAxisGPU2prev;
45 b3OpenCLArray<b3Int4> m_addedHostPairsGPU;
46 b3OpenCLArray<b3Int4> m_removedHostPairsGPU;
47 b3OpenCLArray<int> m_addedCountGPU;
48 b3OpenCLArray<int> m_removedCountGPU;
53 b3OpenCLArray<int> m_pairCount;
55 b3OpenCLArray<b3SapAabb> m_allAabbsGPU;
56 b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU;
58 virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU()
62 virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU()
67 b3OpenCLArray<b3Vector3> m_sum;
68 b3OpenCLArray<b3Vector3> m_sum2;
69 b3OpenCLArray<b3Vector3> m_dst;
71 b3OpenCLArray<int> m_smallAabbsMappingGPU;
72 b3AlignedObjectArray<int> m_smallAabbsMappingCPU;
74 b3OpenCLArray<int> m_largeAabbsMappingGPU;
75 b3AlignedObjectArray<int> m_largeAabbsMappingCPU;
77 b3OpenCLArray<b3Int4> m_overlappingPairs;
79 //temporary gpu work memory
80 b3OpenCLArray<b3SortData> m_gpuSmallSortData;
81 b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs;
83 class b3PrefixScanFloat4CL* m_prefixScanFloat4;
85 enum b3GpuSapKernelType
87 B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU = 1,
88 B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU,
89 B3_GPU_SAP_KERNEL_ORIGINAL,
90 B3_GPU_SAP_KERNEL_BARRIER,
91 B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY
94 b3GpuSapBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q, b3GpuSapKernelType kernelType = B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
95 virtual ~b3GpuSapBroadphase();
97 static b3GpuBroadphaseInterface* CreateFuncBruteForceCpu(cl_context ctx, cl_device_id device, cl_command_queue q)
99 return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU);
102 static b3GpuBroadphaseInterface* CreateFuncBruteForceGpu(cl_context ctx, cl_device_id device, cl_command_queue q)
104 return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU);
107 static b3GpuBroadphaseInterface* CreateFuncOriginal(cl_context ctx, cl_device_id device, cl_command_queue q)
109 return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_ORIGINAL);
111 static b3GpuBroadphaseInterface* CreateFuncBarrier(cl_context ctx, cl_device_id device, cl_command_queue q)
113 return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BARRIER);
115 static b3GpuBroadphaseInterface* CreateFuncLocalMemory(cl_context ctx, cl_device_id device, cl_command_queue q)
117 return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
120 virtual void calculateOverlappingPairs(int maxPairs);
121 virtual void calculateOverlappingPairsHost(int maxPairs);
126 virtual void calculateOverlappingPairsHostIncremental3Sap();
128 virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask);
129 virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask);
131 //call writeAabbsToGpu after done making all changes (createProxy etc)
132 virtual void writeAabbsToGpu();
134 virtual cl_mem getAabbBufferWS();
135 virtual int getNumOverlap();
136 virtual cl_mem getOverlappingPairBuffer();
138 virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU();
139 virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU();
140 virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU();
143 #endif //B3_GPU_SAP_BROADPHASE_H