dali-physics/third-party/bullet3/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h

   1 /*
   2 This software is provided 'as-is', without any express or implied warranty.
   3 In no event will the authors be held liable for any damages arising from the use of this software.
   4 Permission is granted to anyone to use this software for any purpose,
   5 including commercial applications, and to alter it and redistribute it freely,
   6 subject to the following restrictions:
   7
   8 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
   9 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
  10 3. This notice may not be removed or altered from any source distribution.
  11 */
  12 //Initial Author Jackson Lee, 2014
  13
  14 #ifndef B3_GPU_PARALLEL_LINEAR_BVH_H
  15 #define B3_GPU_PARALLEL_LINEAR_BVH_H
  16
  17 //#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
  18 #include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
  19 #include "Bullet3Common/shared/b3Int2.h"
  20 #include "Bullet3Common/shared/b3Int4.h"
  21 #include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h"
  22
  23 #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
  24 #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
  25 #include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"
  26
  27 #include "Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h"
  28
  29 #define b3Int64 cl_long
  30
  31 ///@brief GPU Parallel Linearized Bounding Volume Heirarchy(LBVH) that is reconstructed every frame
  32 ///@remarks
  33 ///See presentation in docs/b3GpuParallelLinearBvh.pdf for algorithm details.
  34 ///@par
  35 ///Related papers: \n
  36 ///"Fast BVH Construction on GPUs" [Lauterbach et al. 2009] \n
  37 ///"Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d trees" [Karras 2012] \n
  38 ///@par
  39 ///The basic algorithm for building the BVH as presented in [Lauterbach et al. 2009] consists of 4 stages:
  40 /// - [fully parallel] Assign morton codes for each AABB using its center (after quantizing the AABB centers into a virtual grid)
  41 /// - [fully parallel] Sort morton codes
  42 /// - [somewhat parallel] Build binary radix tree (assign parent/child pointers for internal nodes of the BVH)
  43 /// - [somewhat parallel] Set internal node AABBs
  44 ///@par
  45 ///[Karras 2012] improves on the algorithm by introducing fully parallel methods for the last 2 stages.
  46 ///The BVH implementation here shares many concepts with [Karras 2012], but a different method is used for constructing the tree.
  47 ///Instead of searching for the child nodes of each internal node, we search for the parent node of each node.
  48 ///Additionally, a non-atomic traversal that starts from the leaf nodes and moves towards the root node is used to set the AABBs.
  49 class b3GpuParallelLinearBvh
  50 {
  51         cl_command_queue m_queue;
  52
  53         cl_program m_parallelLinearBvhProgram;
  54
  55         cl_kernel m_separateAabbsKernel;
  56         cl_kernel m_findAllNodesMergedAabbKernel;
  57         cl_kernel m_assignMortonCodesAndAabbIndiciesKernel;
  58
  59         //Binary radix tree construction kernels
  60         cl_kernel m_computeAdjacentPairCommonPrefixKernel;
  61         cl_kernel m_buildBinaryRadixTreeLeafNodesKernel;
  62         cl_kernel m_buildBinaryRadixTreeInternalNodesKernel;
  63         cl_kernel m_findDistanceFromRootKernel;
  64         cl_kernel m_buildBinaryRadixTreeAabbsRecursiveKernel;
  65
  66         cl_kernel m_findLeafIndexRangesKernel;
  67
  68         //Traversal kernels
  69         cl_kernel m_plbvhCalculateOverlappingPairsKernel;
  70         cl_kernel m_plbvhRayTraverseKernel;
  71         cl_kernel m_plbvhLargeAabbAabbTestKernel;
  72         cl_kernel m_plbvhLargeAabbRayTestKernel;
  73
  74         b3RadixSort32CL m_radixSorter;
  75
  76         //1 element
  77         b3OpenCLArray<int> m_rootNodeIndex;        //Most significant bit(0x80000000) is set to indicate internal node
  78         b3OpenCLArray<int> m_maxDistanceFromRoot;  //Max number of internal nodes between an internal node and the root node
  79         b3OpenCLArray<int> m_temp;                 //Used to hold the number of pairs in calculateOverlappingPairs()
  80
  81         //1 element per internal node (number_of_internal_nodes == number_of_leaves - 1)
  82         b3OpenCLArray<b3SapAabb> m_internalNodeAabbs;
  83         b3OpenCLArray<b3Int2> m_internalNodeLeafIndexRanges;  //x == min leaf index, y == max leaf index
  84         b3OpenCLArray<b3Int2> m_internalNodeChildNodes;       //x == left child, y == right child; msb(0x80000000) is set to indicate internal node
  85         b3OpenCLArray<int> m_internalNodeParentNodes;         //For parent node index, msb(0x80000000) is not set since it is always internal
  86
  87         //1 element per internal node; for binary radix tree construction
  88         b3OpenCLArray<b3Int64> m_commonPrefixes;
  89         b3OpenCLArray<int> m_commonPrefixLengths;
  90         b3OpenCLArray<int> m_distanceFromRoot;  //Number of internal nodes between this node and the root
  91
  92         //1 element per leaf node (leaf nodes only include small AABBs)
  93         b3OpenCLArray<int> m_leafNodeParentNodes;                //For parent node index, msb(0x80000000) is not set since it is always internal
  94         b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies;  //m_key == morton code, m_value == aabb index in m_leafNodeAabbs
  95         b3OpenCLArray<b3SapAabb> m_mergedAabb;                   //m_mergedAabb[0] contains the merged AABB of all leaf nodes
  96         b3OpenCLArray<b3SapAabb> m_leafNodeAabbs;                //Contains only small AABBs
  97
  98         //1 element per large AABB, which is not stored in the BVH
  99         b3OpenCLArray<b3SapAabb> m_largeAabbs;
 100
 101 public:
 102         b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue);
 103         virtual ~b3GpuParallelLinearBvh();
 104
 105         ///Must be called before any other function
 106         void build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices,
 107                            const b3OpenCLArray<int>& largeAabbIndices);
 108
 109         ///calculateOverlappingPairs() uses the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs.
 110         ///@param out_overlappingPairs The size() of this array is used to determine the max number of pairs.
 111         ///If the number of overlapping pairs is < out_overlappingPairs.size(), out_overlappingPairs is resized.
 112         void calculateOverlappingPairs(b3OpenCLArray<b3Int4>& out_overlappingPairs);
 113
 114         ///@param out_numRigidRayPairs Array of length 1; contains the number of detected ray-rigid AABB intersections;
 115         ///this value may be greater than out_rayRigidPairs.size() if out_rayRigidPairs is not large enough.
 116         ///@param out_rayRigidPairs Contains an array of rays intersecting rigid AABBs; x == ray index, y == rigid body index.
 117         ///If the size of this array is insufficient to hold all ray-rigid AABB intersections, additional intersections are discarded.
 118         void testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays,
 119                                                                  b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs);
 120
 121 private:
 122         void constructBinaryRadixTree();
 123 };
 124
 125 #endif