2 This software is provided 'as-is', without any express or implied warranty.
3 In no event will the authors be held liable for any damages arising from the use of this software.
4 Permission is granted to anyone to use this software for any purpose,
5 including commercial applications, and to alter it and redistribute it freely,
6 subject to the following restrictions:
8 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
9 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
10 3. This notice may not be removed or altered from any source distribution.
12 //Initial Author Jackson Lee, 2014
14 #ifndef B3_GPU_PARALLEL_LINEAR_BVH_H
15 #define B3_GPU_PARALLEL_LINEAR_BVH_H
17 //#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
18 #include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
19 #include "Bullet3Common/shared/b3Int2.h"
20 #include "Bullet3Common/shared/b3Int4.h"
21 #include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h"
23 #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
24 #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
25 #include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"
27 #include "Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h"
29 #define b3Int64 cl_long
31 ///@brief GPU Parallel Linearized Bounding Volume Heirarchy(LBVH) that is reconstructed every frame
33 ///See presentation in docs/b3GpuParallelLinearBvh.pdf for algorithm details.
36 ///"Fast BVH Construction on GPUs" [Lauterbach et al. 2009] \n
37 ///"Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d trees" [Karras 2012] \n
39 ///The basic algorithm for building the BVH as presented in [Lauterbach et al. 2009] consists of 4 stages:
40 /// - [fully parallel] Assign morton codes for each AABB using its center (after quantizing the AABB centers into a virtual grid)
41 /// - [fully parallel] Sort morton codes
42 /// - [somewhat parallel] Build binary radix tree (assign parent/child pointers for internal nodes of the BVH)
43 /// - [somewhat parallel] Set internal node AABBs
45 ///[Karras 2012] improves on the algorithm by introducing fully parallel methods for the last 2 stages.
46 ///The BVH implementation here shares many concepts with [Karras 2012], but a different method is used for constructing the tree.
47 ///Instead of searching for the child nodes of each internal node, we search for the parent node of each node.
48 ///Additionally, a non-atomic traversal that starts from the leaf nodes and moves towards the root node is used to set the AABBs.
49 class b3GpuParallelLinearBvh
51 cl_command_queue m_queue;
53 cl_program m_parallelLinearBvhProgram;
55 cl_kernel m_separateAabbsKernel;
56 cl_kernel m_findAllNodesMergedAabbKernel;
57 cl_kernel m_assignMortonCodesAndAabbIndiciesKernel;
59 //Binary radix tree construction kernels
60 cl_kernel m_computeAdjacentPairCommonPrefixKernel;
61 cl_kernel m_buildBinaryRadixTreeLeafNodesKernel;
62 cl_kernel m_buildBinaryRadixTreeInternalNodesKernel;
63 cl_kernel m_findDistanceFromRootKernel;
64 cl_kernel m_buildBinaryRadixTreeAabbsRecursiveKernel;
66 cl_kernel m_findLeafIndexRangesKernel;
69 cl_kernel m_plbvhCalculateOverlappingPairsKernel;
70 cl_kernel m_plbvhRayTraverseKernel;
71 cl_kernel m_plbvhLargeAabbAabbTestKernel;
72 cl_kernel m_plbvhLargeAabbRayTestKernel;
74 b3RadixSort32CL m_radixSorter;
77 b3OpenCLArray<int> m_rootNodeIndex; //Most significant bit(0x80000000) is set to indicate internal node
78 b3OpenCLArray<int> m_maxDistanceFromRoot; //Max number of internal nodes between an internal node and the root node
79 b3OpenCLArray<int> m_temp; //Used to hold the number of pairs in calculateOverlappingPairs()
81 //1 element per internal node (number_of_internal_nodes == number_of_leaves - 1)
82 b3OpenCLArray<b3SapAabb> m_internalNodeAabbs;
83 b3OpenCLArray<b3Int2> m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index
84 b3OpenCLArray<b3Int2> m_internalNodeChildNodes; //x == left child, y == right child; msb(0x80000000) is set to indicate internal node
85 b3OpenCLArray<int> m_internalNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal
87 //1 element per internal node; for binary radix tree construction
88 b3OpenCLArray<b3Int64> m_commonPrefixes;
89 b3OpenCLArray<int> m_commonPrefixLengths;
90 b3OpenCLArray<int> m_distanceFromRoot; //Number of internal nodes between this node and the root
92 //1 element per leaf node (leaf nodes only include small AABBs)
93 b3OpenCLArray<int> m_leafNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal
94 b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key == morton code, m_value == aabb index in m_leafNodeAabbs
95 b3OpenCLArray<b3SapAabb> m_mergedAabb; //m_mergedAabb[0] contains the merged AABB of all leaf nodes
96 b3OpenCLArray<b3SapAabb> m_leafNodeAabbs; //Contains only small AABBs
98 //1 element per large AABB, which is not stored in the BVH
99 b3OpenCLArray<b3SapAabb> m_largeAabbs;
102 b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue);
103 virtual ~b3GpuParallelLinearBvh();
105 ///Must be called before any other function
106 void build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices,
107 const b3OpenCLArray<int>& largeAabbIndices);
109 ///calculateOverlappingPairs() uses the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs.
110 ///@param out_overlappingPairs The size() of this array is used to determine the max number of pairs.
111 ///If the number of overlapping pairs is < out_overlappingPairs.size(), out_overlappingPairs is resized.
112 void calculateOverlappingPairs(b3OpenCLArray<b3Int4>& out_overlappingPairs);
114 ///@param out_numRigidRayPairs Array of length 1; contains the number of detected ray-rigid AABB intersections;
115 ///this value may be greater than out_rayRigidPairs.size() if out_rayRigidPairs is not large enough.
116 ///@param out_rayRigidPairs Contains an array of rays intersecting rigid AABBs; x == ray index, y == rigid body index.
117 ///If the size of this array is insufficient to hold all ray-rigid AABB intersections, additional intersections are discarded.
118 void testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays,
119 b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs);
122 void constructBinaryRadixTree();