2 #ifndef B3_LAUNCHER_CL_H
3 #define B3_LAUNCHER_CL_H
5 #include "b3BufferInfoCL.h"
6 #include "Bullet3Common/b3MinMax.h"
7 #include "b3OpenCLArray.h"
10 #define B3_DEBUG_SERIALIZE_CL
13 #pragma warning(disable : 4996)
15 #define B3_CL_MAX_ARG_SIZE 16
16 B3_ATTRIBUTE_ALIGNED16(struct)
25 unsigned char m_argData[B3_CL_MAX_ARG_SIZE];
31 cl_command_queue m_commandQueue;
35 b3AlignedObjectArray<b3KernelArgData> m_kernelArguments;
36 int m_serializationSizeInBytes;
37 bool m_enableSerialization;
42 b3AlignedObjectArray<b3OpenCLArray<unsigned char>*> m_arrays;
44 b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name);
46 virtual ~b3LauncherCL();
48 void setBuffer(cl_mem clBuffer);
50 void setBuffers(b3BufferInfoCL* buffInfo, int n);
52 int getSerializationBufferSize() const
54 return m_serializationSizeInBytes;
57 int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx);
59 inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx);
61 int serializeArguments(unsigned char* destBuffer, int destBufferCapacity);
63 int getNumArguments() const
65 return m_kernelArguments.size();
68 b3KernelArgData getArgument(int index)
70 return m_kernelArguments[index];
73 void serializeToFile(const char* fileName, int numWorkItems);
76 inline void setConst(const T& consts)
79 b3Assert(sz <= B3_CL_MAX_ARG_SIZE);
81 if (m_enableSerialization)
83 b3KernelArgData kernelArg;
84 kernelArg.m_argIndex = m_idx;
85 kernelArg.m_isBuffer = 0;
86 T* destArg = (T*)kernelArg.m_argData;
88 kernelArg.m_argSizeInBytes = sizeof(T);
89 m_kernelArguments.push_back(kernelArg);
90 m_serializationSizeInBytes += sizeof(b3KernelArgData);
93 cl_int status = clSetKernelArg(m_kernel, m_idx++, sz, &consts);
94 b3Assert(status == CL_SUCCESS);
97 inline void launch1D(int numThreads, int localSize = 64)
99 launch2D(numThreads, 1, localSize, 1);
102 inline void launch2D(int numThreadsX, int numThreadsY, int localSizeX, int localSizeY)
104 size_t gRange[3] = {1, 1, 1};
105 size_t lRange[3] = {1, 1, 1};
106 lRange[0] = localSizeX;
107 lRange[1] = localSizeY;
108 gRange[0] = b3Max((size_t)1, (numThreadsX / lRange[0]) + (!(numThreadsX % lRange[0]) ? 0 : 1));
109 gRange[0] *= lRange[0];
110 gRange[1] = b3Max((size_t)1, (numThreadsY / lRange[1]) + (!(numThreadsY % lRange[1]) ? 0 : 1));
111 gRange[1] *= lRange[1];
113 cl_int status = clEnqueueNDRangeKernel(m_commandQueue,
114 m_kernel, 2, NULL, gRange, lRange, 0, 0, 0);
115 if (status != CL_SUCCESS)
117 printf("Error: OpenCL status = %d\n", status);
119 b3Assert(status == CL_SUCCESS);
122 void enableSerialization(bool serialize)
124 m_enableSerialization = serialize;
128 #endif //B3_LAUNCHER_CL_H