2 Bullet Continuous Collision Detection and Physics Library
\r
3 Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
\r
5 This software is provided 'as-is', without any express or implied warranty.
\r
6 In no event will the authors be held liable for any damages arising from the use of this software.
\r
7 Permission is granted to anyone to use this software for any purpose,
\r
8 including commercial applications, and to alter it and redistribute it freely,
\r
9 subject to the following restrictions:
\r
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
\r
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
\r
13 3. This notice may not be removed or altered from any source distribution.
\r
17 #include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
\r
18 #include "vectormath/vmInclude.h"
\r
19 #include <stdio.h> //@todo: remove the debugging printf at some stage
\r
20 #include "btSoftBodySolver_OpenCLSIMDAware.h"
\r
21 #include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
\r
22 #include "BulletSoftBody/btSoftBody.h"
\r
23 #include "BulletCollision/CollisionShapes/btCapsuleShape.h"
\r
26 #define WAVEFRONT_SIZE 32
\r
27 #define WAVEFRONT_BLOCK_MULTIPLIER 2
\r
28 #define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
\r
29 #define LINKS_PER_SIMD_LANE 16
\r
31 static const size_t workGroupSize = GROUP_SIZE;
\r
34 //CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
\r
36 ////OpenCL 1.0 kernels don't use float3
\r
37 #define MSTRINGIFY(A) #A
\r
38 static const char* UpdatePositionsFromVelocitiesCLString =
\r
39 #include "OpenCLC10/UpdatePositionsFromVelocities.cl"
\r
40 static const char* SolvePositionsCLString =
\r
41 #include "OpenCLC10/SolvePositionsSIMDBatched.cl"
\r
42 static const char* UpdateNodesCLString =
\r
43 #include "OpenCLC10/UpdateNodes.cl"
\r
44 static const char* UpdatePositionsCLString =
\r
45 #include "OpenCLC10/UpdatePositions.cl"
\r
46 static const char* UpdateConstantsCLString =
\r
47 #include "OpenCLC10/UpdateConstants.cl"
\r
48 static const char* IntegrateCLString =
\r
49 #include "OpenCLC10/Integrate.cl"
\r
50 static const char* ApplyForcesCLString =
\r
51 #include "OpenCLC10/ApplyForces.cl"
\r
52 static const char* UpdateFixedVertexPositionsCLString =
\r
53 #include "OpenCLC10/UpdateFixedVertexPositions.cl"
\r
54 static const char* UpdateNormalsCLString =
\r
55 #include "OpenCLC10/UpdateNormals.cl"
\r
56 static const char* VSolveLinksCLString =
\r
57 #include "OpenCLC10/VSolveLinks.cl"
\r
58 static const char* SolveCollisionsAndUpdateVelocitiesCLString =
\r
59 #include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
\r
60 static const char* OutputToVertexArrayCLString =
\r
61 #include "OpenCLC10/OutputToVertexArray.cl"
\r
65 btSoftBodyLinkDataOpenCLSIMDAware::btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue, cl_context ctx) :
\r
66 m_cqCommandQue(queue),
\r
67 m_wavefrontSize( WAVEFRONT_SIZE ),
\r
68 m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
\r
69 m_maxBatchesWithinWave( 0 ),
\r
70 m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
\r
71 m_numWavefronts( 0 ),
\r
73 m_clNumBatchesAndVerticesWithinWaves( queue, ctx, &m_numBatchesAndVerticesWithinWaves, true ),
\r
74 m_clWavefrontVerticesGlobalAddresses( queue, ctx, &m_wavefrontVerticesGlobalAddresses, true ),
\r
75 m_clLinkVerticesLocalAddresses( queue, ctx, &m_linkVerticesLocalAddresses, true ),
\r
76 m_clLinkStrength( queue, ctx, &m_linkStrength, false ),
\r
77 m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ),
\r
78 m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ),
\r
79 m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ),
\r
80 m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false )
\r
84 btSoftBodyLinkDataOpenCLSIMDAware::~btSoftBodyLinkDataOpenCLSIMDAware()
\r
88 static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
\r
90 Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
\r
94 /** Allocate enough space in all link-related arrays to fit numLinks links */
\r
95 void btSoftBodyLinkDataOpenCLSIMDAware::createLinks( int numLinks )
\r
97 int previousSize = m_links.size();
\r
98 int newSize = previousSize + numLinks;
\r
100 btSoftBodyLinkData::createLinks( numLinks );
\r
102 // Resize the link addresses array as well
\r
103 m_linkAddresses.resize( newSize );
\r
106 /** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
\r
107 void btSoftBodyLinkDataOpenCLSIMDAware::setLinkAt(
\r
108 const LinkDescription &link,
\r
111 btSoftBodyLinkData::setLinkAt( link, linkIndex );
\r
113 if( link.getVertex0() > m_maxVertex )
\r
114 m_maxVertex = link.getVertex0();
\r
115 if( link.getVertex1() > m_maxVertex )
\r
116 m_maxVertex = link.getVertex1();
\r
118 // Set the link index correctly for initialisation
\r
119 m_linkAddresses[linkIndex] = linkIndex;
\r
122 bool btSoftBodyLinkDataOpenCLSIMDAware::onAccelerator()
\r
127 bool btSoftBodyLinkDataOpenCLSIMDAware::moveToAccelerator()
\r
129 bool success = true;
\r
130 success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
\r
131 success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
\r
132 success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
\r
133 success = success && m_clLinkStrength.moveToGPU();
\r
134 success = success && m_clLinksMassLSC.moveToGPU();
\r
135 success = success && m_clLinksRestLengthSquared.moveToGPU();
\r
136 success = success && m_clLinksRestLength.moveToGPU();
\r
137 success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveToGPU();
\r
146 bool btSoftBodyLinkDataOpenCLSIMDAware::moveFromAccelerator()
\r
148 bool success = true;
\r
149 success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
\r
150 success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
\r
151 success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
\r
152 success = success && m_clLinkStrength.moveFromGPU();
\r
153 success = success && m_clLinksMassLSC.moveFromGPU();
\r
154 success = success && m_clLinksRestLengthSquared.moveFromGPU();
\r
155 success = success && m_clLinksRestLength.moveFromGPU();
\r
156 success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveFromGPU();
\r
172 btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
\r
173 btOpenCLSoftBodySolver( queue, ctx, bUpdateAchchoredNodePos ),
\r
174 m_linkData(queue, ctx)
\r
176 // Initial we will clearly need to update solver constants
\r
177 // For now this is global for the cloths linked with this solver - we should probably make this body specific
\r
178 // for performance in future once we understand more clearly when constants need to be updated
\r
179 m_updateSolverConstants = true;
\r
181 m_shadersInitialized = false;
\r
184 btOpenCLSoftBodySolverSIMDAware::~btOpenCLSoftBodySolverSIMDAware()
\r
189 void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate)
\r
191 if( forceUpdate || m_softBodySet.size() != softBodies.size() )
\r
193 // Have a change in the soft body set so update, reloading all the data
\r
194 getVertexData().clear();
\r
195 getTriangleData().clear();
\r
196 getLinkData().clear();
\r
197 m_softBodySet.resize(0);
\r
198 m_anchorIndex.clear();
\r
200 int maxPiterations = 0;
\r
201 int maxViterations = 0;
\r
203 for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
\r
205 btSoftBody *softBody = softBodies[ softBodyIndex ];
\r
206 using Vectormath::Aos::Matrix3;
\r
207 using Vectormath::Aos::Point3;
\r
209 // Create SoftBody that will store the information within the solver
\r
210 btOpenCLAcceleratedSoftBodyInterface* newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody );
\r
211 m_softBodySet.push_back( newSoftBody );
\r
213 m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
\r
214 m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
\r
215 m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
\r
216 m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
\r
217 m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
\r
218 m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
\r
219 // Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
\r
220 m_perClothFriction.push_back(softBody->m_cfg.kDF);
\r
221 m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
\r
223 // Add space for new vertices and triangles in the default solver for now
\r
224 // TODO: Include space here for tearing too later
\r
225 int firstVertex = getVertexData().getNumVertices();
\r
226 int numVertices = softBody->m_nodes.size();
\r
227 // Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
\r
228 // maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
\r
229 int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
\r
230 // Allocate space for new vertices in all the vertex arrays
\r
231 getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
\r
234 int firstTriangle = getTriangleData().getNumTriangles();
\r
235 int numTriangles = softBody->m_faces.size();
\r
236 int maxTriangles = numTriangles;
\r
237 getTriangleData().createTriangles( maxTriangles );
\r
239 // Copy vertices from softbody into the solver
\r
240 for( int vertex = 0; vertex < numVertices; ++vertex )
\r
242 Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
\r
243 btSoftBodyVertexData::VertexDescription desc;
\r
245 // TODO: Position in the softbody might be pre-transformed
\r
246 // or we may need to adapt for the pose.
\r
247 //desc.setPosition( cloth.getMeshTransform()*multPoint );
\r
248 desc.setPosition( multPoint );
\r
250 float vertexInverseMass = softBody->m_nodes[vertex].m_im;
\r
251 desc.setInverseMass(vertexInverseMass);
\r
252 getVertexData().setVertexAt( desc, firstVertex + vertex );
\r
254 m_anchorIndex.push_back(-1);
\r
256 for( int vertex = numVertices; vertex < maxVertices; ++vertex )
\r
258 m_anchorIndex.push_back(-1.0);
\r
261 // Copy triangles similarly
\r
262 // We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
\r
263 for( int triangle = 0; triangle < numTriangles; ++triangle )
\r
265 // Note that large array storage is relative to the array not to the cloth
\r
266 // So we need to add firstVertex to each value
\r
267 int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
\r
268 int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
\r
269 int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
\r
270 btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
\r
271 getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
\r
273 // Increase vertex triangle counts for this triangle
\r
274 getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
\r
275 getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
\r
276 getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
\r
279 int firstLink = getLinkData().getNumLinks();
\r
280 int numLinks = softBody->m_links.size();
\r
281 int maxLinks = numLinks;
\r
283 // Allocate space for the links
\r
284 getLinkData().createLinks( numLinks );
\r
287 for( int link = 0; link < numLinks; ++link )
\r
289 int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
\r
290 int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
\r
292 btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
\r
293 newLink.setLinkStrength(1.f);
\r
294 getLinkData().setLinkAt(newLink, firstLink + link);
\r
297 newSoftBody->setFirstVertex( firstVertex );
\r
298 newSoftBody->setFirstTriangle( firstTriangle );
\r
299 newSoftBody->setNumVertices( numVertices );
\r
300 newSoftBody->setMaxVertices( maxVertices );
\r
301 newSoftBody->setNumTriangles( numTriangles );
\r
302 newSoftBody->setMaxTriangles( maxTriangles );
\r
303 newSoftBody->setFirstLink( firstLink );
\r
304 newSoftBody->setNumLinks( numLinks );
\r
306 // Find maximum piterations and viterations
\r
307 int piterations = softBody->m_cfg.piterations;
\r
309 if ( piterations > maxPiterations )
\r
310 maxPiterations = piterations;
\r
312 int viterations = softBody->m_cfg.viterations;
\r
314 if ( viterations > maxViterations )
\r
315 maxViterations = viterations;
\r
318 for( int vertex = 0; vertex < numVertices; ++vertex )
\r
320 if ( softBody->m_nodes[vertex].m_im == 0 )
\r
322 AnchorNodeInfoCL nodeInfo;
\r
323 nodeInfo.clVertexIndex = firstVertex + vertex;
\r
324 nodeInfo.pNode = &softBody->m_nodes[vertex];
\r
326 m_anchorNodeInfoArray.push_back(nodeInfo);
\r
331 if ( numVertices > 0 )
\r
333 for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
\r
335 btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
\r
336 btSoftBody::Node* firstNode = &softBody->m_nodes[0];
\r
338 AnchorNodeInfoCL nodeInfo;
\r
339 nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
\r
340 nodeInfo.pNode = anchorNode;
\r
342 m_anchorNodeInfoArray.push_back(nodeInfo);
\r
347 m_anchorPosition.clear();
\r
348 m_anchorPosition.resize(m_anchorNodeInfoArray.size());
\r
350 for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
\r
352 const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
\r
353 m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
\r
354 getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
\r
357 updateConstants(0.f);
\r
359 // set position and velocity iterations
\r
360 setNumberOfPositionIterations(maxPiterations);
\r
361 setNumberOfVelocityIterations(maxViterations);
\r
363 // set wind velocity
\r
364 m_perClothWindVelocity.resize( m_softBodySet.size() );
\r
365 for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
\r
367 btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
\r
368 m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
\r
371 m_clPerClothWindVelocity.changedOnCPU();
\r
373 // generate batches
\r
374 m_linkData.generateBatches();
\r
375 m_triangleData.generateBatches();
\r
377 // Build the shaders to match the batching parameters
\r
383 btSoftBodyLinkData &btOpenCLSoftBodySolverSIMDAware::getLinkData()
\r
385 // TODO: Consider setting link data to "changed" here
\r
392 void btOpenCLSoftBodySolverSIMDAware::updateConstants( float timeStep )
\r
395 using namespace Vectormath::Aos;
\r
397 if( m_updateSolverConstants )
\r
399 m_updateSolverConstants = false;
\r
401 // Will have to redo this if we change the structure (tear, maybe) or various other possible changes
\r
403 // Initialise link constants
\r
404 const int numLinks = m_linkData.getNumLinks();
\r
405 for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
\r
407 btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
\r
408 m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
\r
409 float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
\r
410 float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
\r
411 float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
\r
412 float massLSC = (invMass0 + invMass1)/linearStiffness;
\r
413 m_linkData.getMassLSC(linkIndex) = massLSC;
\r
414 float restLength = m_linkData.getRestLength(linkIndex);
\r
415 float restLengthSquared = restLength*restLength;
\r
416 m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
\r
424 void btOpenCLSoftBodySolverSIMDAware::solveConstraints( float solverdt )
\r
427 using Vectormath::Aos::Vector3;
\r
428 using Vectormath::Aos::Point3;
\r
429 using Vectormath::Aos::lengthSqr;
\r
430 using Vectormath::Aos::dot;
\r
433 int numLinks = m_linkData.getNumLinks();
\r
434 int numVertices = m_vertexData.getNumVertices();
\r
440 m_clPerClothDampingFactor.moveToGPU();
\r
441 m_clPerClothVelocityCorrectionCoefficient.moveToGPU();
\r
444 // Ensure data is on accelerator
\r
445 m_linkData.moveToAccelerator();
\r
446 m_vertexData.moveToAccelerator();
\r
451 prepareCollisionConstraints();
\r
454 for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
\r
457 for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
\r
459 int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
\r
460 int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
\r
461 solveLinksForPosition( startWave, numWaves, kst, ti );
\r
463 } // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
\r
466 // At this point assume that the force array is blank - we will overwrite it
\r
467 solveCollisionsAndUpdateVelocities( 1.f/solverdt );
\r
471 //////////////////////////////////////
\r
472 // Kernel dispatches
\r
475 void btOpenCLSoftBodySolverSIMDAware::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
\r
478 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startWave);
\r
479 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numWaves);
\r
480 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,2, sizeof(float), &kst);
\r
481 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,3, sizeof(float), &ti);
\r
484 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clNumBatchesAndVerticesWithinWaves.m_buffer);
\r
485 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clWavefrontVerticesGlobalAddresses.m_buffer);
\r
486 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinkVerticesLocalAddresses.m_buffer);
\r
487 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
\r
489 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer);
\r
490 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,9, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
\r
491 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,10, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
\r
493 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,11, WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_int2), 0);
\r
494 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,12, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float4), 0);
\r
495 ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,13, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float), 0);
\r
497 size_t numWorkItems = workGroupSize*((numWaves*WAVEFRONT_SIZE + (workGroupSize-1)) / workGroupSize);
\r
499 ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&workGroupSize,0,0,0);
\r
501 if( ciErrNum!= CL_SUCCESS )
\r
503 btAssert( 0 && "enqueueNDRangeKernel(m_solvePositionsFromLinksKernel)");
\r
506 } // solveLinksForPosition
\r
508 void btOpenCLSoftBodySolverSIMDAware::solveCollisionsAndUpdateVelocities( float isolverdt )
\r
510 // Copy kernel parameters to GPU
\r
511 m_vertexData.moveToAccelerator();
\r
512 m_clPerClothFriction.moveToGPU();
\r
513 m_clPerClothDampingFactor.moveToGPU();
\r
514 m_clPerClothCollisionObjects.moveToGPU();
\r
515 m_clCollisionObjectDetails.moveToGPU();
\r
518 int numVerts = m_vertexData.getNumVertices();
\r
519 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
\r
520 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 1, sizeof(int), &isolverdt);
\r
521 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
\r
522 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
\r
523 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 4, sizeof(cl_mem),&m_clPerClothFriction.m_buffer);
\r
524 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
\r
525 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 6, sizeof(cl_mem),&m_clPerClothCollisionObjects.m_buffer);
\r
526 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 7, sizeof(cl_mem),&m_clCollisionObjectDetails.m_buffer);
\r
527 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 8, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
\r
528 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 9, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
\r
529 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 10, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
\r
530 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 11, sizeof(CollisionShapeDescription)*16,0);
\r
531 ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 12, sizeof(cl_mem),&m_vertexData.m_clVertexInverseMass.m_buffer);
\r
532 size_t numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
\r
536 ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solveCollisionsAndUpdateVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);
\r
538 if( ciErrNum != CL_SUCCESS )
\r
540 btAssert( 0 && "enqueueNDRangeKernel(m_solveCollisionsAndUpdateVelocitiesKernel)");
\r
544 } // btOpenCLSoftBodySolverSIMDAware::updateVelocitiesFromPositionsWithoutVelocities
\r
546 // End kernel dispatches
\r
547 /////////////////////////////////////
\r
551 bool btOpenCLSoftBodySolverSIMDAware::buildShaders()
\r
555 if( m_shadersInitialized )
\r
558 const char* additionalMacros="";
\r
560 m_currentCLFunctions->clearKernelCompilationFailures();
\r
562 char *wavefrontMacros = new char[256];
\r
566 "-DMAX_NUM_VERTICES_PER_WAVE=%d -DMAX_BATCHES_PER_WAVE=%d -DWAVEFRONT_SIZE=%d -DWAVEFRONT_BLOCK_MULTIPLIER=%d -DBLOCK_SIZE=%d",
\r
567 m_linkData.getMaxVerticesPerWavefront(),
\r
568 m_linkData.getMaxBatchesPerWavefront(),
\r
569 m_linkData.getWavefrontSize(),
\r
570 WAVEFRONT_BLOCK_MULTIPLIER,
\r
571 WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
\r
573 m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
\r
574 m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros ,"OpenCLC10/SolvePositionsSIMDBatched.cl");
\r
575 m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", additionalMacros ,"OpenCLC10/UpdateNodes.cl");
\r
576 m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositions.cl");
\r
577 m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", additionalMacros ,"OpenCLC10/Integrate.cl");
\r
578 m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", additionalMacros,"OpenCLC10/ApplyForces.cl" );
\r
579 m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" ,additionalMacros,"OpenCLC10/UpdateFixedVertexPositions.cl");
\r
580 m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", additionalMacros ,"OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl");
\r
582 // TODO: Rename to UpdateSoftBodies
\r
583 m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
\r
584 m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
\r
585 m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
\r
587 delete [] wavefrontMacros;
\r
589 if( m_currentCLFunctions->getKernelCompilationFailures()==0)
\r
591 m_shadersInitialized = true;
\r
594 return m_shadersInitialized;
\r
600 static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
\r
602 Vectormath::Aos::Transform3 outTransform;
\r
603 outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
\r
604 outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
\r
605 outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
\r
606 outTransform.setCol(3, toVector3(transform.getOrigin()));
\r
607 return outTransform;
\r
611 static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
\r
613 // A per-batch map of truth values stating whether a given vertex is in that batch
\r
614 // This allows us to significantly optimize the batching
\r
615 btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
\r
617 for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
\r
619 btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
\r
622 bool placed = false;
\r
623 while( batch < wavefrontBatches.size() && !placed )
\r
625 // Test the current batch, see if this wave shares any vertex with the waves in the batch
\r
626 bool foundSharedVertex = false;
\r
627 for( int link = 0; link < wavefront.size(); ++link )
\r
629 btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
\r
630 if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
\r
632 foundSharedVertex = true;
\r
636 if( !foundSharedVertex )
\r
638 wavefrontBatches[batch].push_back( waveIndex );
\r
639 // Insert vertices into this batch too
\r
640 for( int link = 0; link < wavefront.size(); ++link )
\r
642 btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
\r
643 (mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
\r
644 (mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
\r
650 if( batch == wavefrontBatches.size() && !placed )
\r
652 wavefrontBatches.resize( batch + 1 );
\r
653 wavefrontBatches[batch].push_back( waveIndex );
\r
655 // And resize map as well
\r
656 mapOfVerticesInBatches.resize( batch + 1 );
\r
658 // Resize maps with total number of vertices
\r
659 mapOfVerticesInBatches[batch].resize( numVertices+1, false );
\r
661 // Insert vertices into this batch too
\r
662 for( int link = 0; link < wavefront.size(); ++link )
\r
664 btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
\r
665 (mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
\r
666 (mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
\r
670 mapOfVerticesInBatches.clear();
\r
673 // Function to remove an object from a vector maintaining correct ordering of the vector
\r
674 template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
\r
676 int currentSize = vectorToUpdate.size();
\r
677 for( int i = indexToRemove; i < (currentSize-1); ++i )
\r
679 vectorToUpdate[i] = vectorToUpdate[i+1];
\r
681 if( currentSize > 0 )
\r
682 vectorToUpdate.resize( currentSize - 1 );
\r
686 * Insert element into vectorToUpdate at index index.
\r
688 template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
\r
690 vectorToUpdate.resize( vectorToUpdate.size() + 1 );
\r
691 for( int i = (vectorToUpdate.size() - 1); i > index; --i )
\r
693 vectorToUpdate[i] = vectorToUpdate[i-1];
\r
695 vectorToUpdate[index] = element;
\r
699 * Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness.
\r
700 * ie it treats vectorToUpdate as an ordered set.
\r
702 template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
\r
705 while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
\r
709 if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
\r
710 insertAtIndex( vectorToUpdate, index, element );
\r
713 static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
\r
715 for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
\r
717 btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
\r
718 numLinksPerVertex[nodes.vertex0]++;
\r
719 numLinksPerVertex[nodes.vertex1]++;
\r
721 int maxLinksPerVertex = 0;
\r
722 for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
\r
724 maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
\r
726 maxLinks = maxLinksPerVertex;
\r
728 btAlignedObjectArray< int > linksFoundPerVertex;
\r
729 linksFoundPerVertex.resize( numVertices, 0 );
\r
731 listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
\r
733 for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
\r
735 btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
\r
738 int vertexIndex = nodes.vertex0;
\r
739 int linkForVertex = linksFoundPerVertex[nodes.vertex0];
\r
740 int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
\r
742 listOfLinksPerVertex[linkAddress] = linkIndex;
\r
744 linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
\r
748 int vertexIndex = nodes.vertex1;
\r
749 int linkForVertex = linksFoundPerVertex[nodes.vertex1];
\r
750 int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
\r
752 listOfLinksPerVertex[linkAddress] = linkIndex;
\r
754 linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
\r
759 static void computeBatchingIntoWavefronts(
\r
760 btSoftBodyLinkData &linkData,
\r
761 int wavefrontSize,
\r
762 int linksPerWorkItem,
\r
763 int maxLinksPerWavefront,
\r
764 btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts,
\r
765 btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
\r
766 btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
\r
771 // Attempt generation of larger batches of links.
\r
772 btAlignedObjectArray< bool > processedLink;
\r
773 processedLink.resize( linkData.getNumLinks() );
\r
774 btAlignedObjectArray< int > listOfLinksPerVertex;
\r
775 int maxLinksPerVertex = 0;
\r
777 // Count num vertices
\r
778 int numVertices = 0;
\r
779 for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
\r
781 btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
\r
782 numVertices = btMax( numVertices, nodes.vertex0 + 1 );
\r
783 numVertices = btMax( numVertices, nodes.vertex1 + 1 );
\r
786 // Need list of links per vertex
\r
787 // Compute valence of each vertex
\r
788 btAlignedObjectArray <int> numLinksPerVertex;
\r
789 numLinksPerVertex.resize(0);
\r
790 numLinksPerVertex.resize( numVertices, 0 );
\r
792 generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
\r
797 for( int vertex = 0; vertex < 10; ++vertex )
\r
799 for( int link = 0; link < numLinksPerVertex[vertex]; ++link )
\r
801 int linkAddress = vertex * maxLinksPerVertex + link;
\r
806 // At this point we know what links we have for each vertex so we can start batching
\r
808 // We want a vertex to start with, let's go with 0
\r
809 int currentVertex = 0;
\r
810 int linksProcessed = 0;
\r
812 btAlignedObjectArray <int> verticesToProcess;
\r
814 while( linksProcessed < linkData.getNumLinks() )
\r
817 int nextWavefront = linksForWavefronts.size();
\r
818 linksForWavefronts.resize( nextWavefront + 1 );
\r
819 btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
\r
820 verticesForWavefronts.resize( nextWavefront + 1 );
\r
821 btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
\r
823 linksForWavefront.resize(0);
\r
825 // Loop to find enough links to fill the wavefront
\r
826 // Stopping if we either run out of links, or fill it
\r
827 while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
\r
829 // Go through the links for the current vertex
\r
830 for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
\r
832 int linkAddress = currentVertex * maxLinksPerVertex + link;
\r
833 int linkIndex = listOfLinksPerVertex[linkAddress];
\r
835 // If we have not already processed this link, add it to the wavefront
\r
836 // Claim it as another processed link
\r
837 // Add the vertex at the far end to the list of vertices to process.
\r
838 if( !processedLink[linkIndex] )
\r
840 linksForWavefront.push_back( linkIndex );
\r
842 processedLink[linkIndex] = true;
\r
843 int v0 = linkData.getVertexPair(linkIndex).vertex0;
\r
844 int v1 = linkData.getVertexPair(linkIndex).vertex1;
\r
845 if( v0 == currentVertex )
\r
846 verticesToProcess.push_back( v1 );
\r
848 verticesToProcess.push_back( v0 );
\r
851 if( verticesToProcess.size() > 0 )
\r
853 // Get the element on the front of the queue and remove it
\r
854 currentVertex = verticesToProcess[0];
\r
855 removeFromVector( verticesToProcess, 0 );
\r
857 // If we've not yet processed all the links, find the first unprocessed one
\r
858 // and select one of its vertices as the current vertex
\r
859 if( linksProcessed < linkData.getNumLinks() )
\r
861 int searchLink = 0;
\r
862 while( processedLink[searchLink] )
\r
864 currentVertex = linkData.getVertexPair(searchLink).vertex0;
\r
869 // We have either finished or filled a wavefront
\r
870 for( int link = 0; link < linksForWavefront.size(); ++link )
\r
872 int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
\r
873 int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
\r
874 insertUniqueAndOrderedIntoVector( vertexSet, v0 );
\r
875 insertUniqueAndOrderedIntoVector( vertexSet, v1 );
\r
877 // Iterate over links mapped to the wave and batch those
\r
878 // We can run a batch on each cycle trivially
\r
880 batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
\r
881 btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
\r
884 for( int link = 0; link < linksForWavefront.size(); ++link )
\r
886 int linkIndex = linksForWavefront[link];
\r
887 btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
\r
890 bool placed = false;
\r
891 while( batch < batchesWithinWave.size() && !placed )
\r
893 bool foundSharedVertex = false;
\r
894 if( batchesWithinWave[batch].size() >= wavefrontSize )
\r
896 // If we have already filled this batch, move on to another
\r
897 foundSharedVertex = true;
\r
899 for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
\r
901 btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
\r
903 if( vertices.vertex0 == vertices2.vertex0 ||
\r
904 vertices.vertex1 == vertices2.vertex0 ||
\r
905 vertices.vertex0 == vertices2.vertex1 ||
\r
906 vertices.vertex1 == vertices2.vertex1 )
\r
908 foundSharedVertex = true;
\r
913 if( !foundSharedVertex )
\r
915 batchesWithinWave[batch].push_back( linkIndex );
\r
921 if( batch == batchesWithinWave.size() && !placed )
\r
923 batchesWithinWave.resize( batch + 1 );
\r
924 batchesWithinWave[batch].push_back( linkIndex );
\r
932 void btSoftBodyLinkDataOpenCLSIMDAware::generateBatches()
\r
934 btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
\r
935 btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
\r
936 btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
\r
937 btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
\r
939 // Group the links into wavefronts
\r
940 computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
\r
943 // Batch the wavefronts
\r
944 generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
\r
946 m_numWavefronts = linksForWavefronts.size();
\r
948 // At this point we have a description of which links we need to process in each wavefront
\r
950 // First correctly fill the batch ranges vector
\r
951 int numBatches = wavefrontBatches.size();
\r
952 m_wavefrontBatchStartLengths.resize(0);
\r
954 for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
\r
956 int wavesInBatch = wavefrontBatches[batchIndex].size();
\r
957 int nextPrefixSum = prefixSum + wavesInBatch;
\r
958 m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
\r
960 prefixSum += wavesInBatch;
\r
963 // Also find max number of batches within a wave
\r
964 m_maxBatchesWithinWave = 0;
\r
965 m_maxVerticesWithinWave = 0;
\r
966 m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
\r
967 for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
\r
969 // See if the number of batches in this wave is greater than the current maxium
\r
970 int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
\r
971 int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
\r
972 m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
\r
973 m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
\r
976 // Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
\r
977 m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
\r
979 // Now we know the maximum number of vertices per-wave we can resize the global vertices array
\r
980 m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
\r
982 // Grab backup copies of all the link data arrays for the sorting process
\r
983 btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair> m_links_Backup(m_links);
\r
984 btAlignedObjectArray<float> m_linkStrength_Backup(m_linkStrength);
\r
985 btAlignedObjectArray<float> m_linksMassLSC_Backup(m_linksMassLSC);
\r
986 btAlignedObjectArray<float> m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
\r
987 //btAlignedObjectArray<Vectormath::Aos::Vector3> m_linksCLength_Backup(m_linksCLength);
\r
988 //btAlignedObjectArray<float> m_linksLengthRatio_Backup(m_linksLengthRatio);
\r
989 btAlignedObjectArray<float> m_linksRestLength_Backup(m_linksRestLength);
\r
990 btAlignedObjectArray<float> m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
\r
992 // Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
\r
993 m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
\r
994 m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
\r
995 m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
\r
996 m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
\r
997 m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
\r
998 m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
\r
999 m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
\r
1001 // Then re-order links into wavefront blocks
\r
1003 // Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
\r
1004 int wavefrontCount = 0;
\r
1006 // Iterate over batches of wavefronts, then wavefronts in the batch
\r
1007 for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
\r
1009 btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
\r
1010 int wavefrontsInBatch = batch.size();
\r
1013 for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
\r
1016 int originalWavefrontIndex = batch[wavefrontIndex];
\r
1017 btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
\r
1018 int verticesUsedByWavefront = wavefrontVertices.size();
\r
1020 // Copy the set of vertices into the correctly structured array for use on the device
\r
1021 // Fill the non-vertices with -1s
\r
1022 // so we can mask out those reads
\r
1023 for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
\r
1025 m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
\r
1027 for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
\r
1029 m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
\r
1032 // Obtain the set of batches within the current wavefront
\r
1033 btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
\r
1034 // Set the size of the batches for use in the solver, correctly ordered
\r
1035 NumBatchesVerticesPair batchesAndVertices;
\r
1036 batchesAndVertices.numBatches = batchesWithinWavefront.size();
\r
1037 batchesAndVertices.numVertices = verticesUsedByWavefront;
\r
1038 m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
\r
1041 // Now iterate over batches within the wavefront to structure the links correctly
\r
1042 for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
\r
1044 btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
\r
1045 int wavefrontBatchSize = linksInBatch.size();
\r
1047 int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
\r
1049 for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
\r
1051 int originalLinkAddress = linksInBatch[linkIndex];
\r
1052 // Reorder simple arrays trivially
\r
1053 m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
\r
1054 m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
\r
1055 m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
\r
1056 m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
\r
1057 m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
\r
1058 m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
\r
1060 // The local address is more complicated. We need to work out where a given vertex will end up
\r
1061 // by searching the set of vertices for this link and using the index as the local address
\r
1062 btSoftBodyLinkData::LinkNodePair localPair;
\r
1063 btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
\r
1064 localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
\r
1065 localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
\r
1066 m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
\r
1068 for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
\r
1070 // Put 0s into these arrays for padding for cleanliness
\r
1071 m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
\r
1072 m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
\r
1073 m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
\r
1074 m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
\r
1075 m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
\r
1076 m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
\r
1079 // For local addresses of junk data choose a set of addresses just above the range of valid ones
\r
1080 // and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
\r
1081 // The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
\r
1082 // off the end of that range so we need no control
\r
1083 btSoftBodyLinkData::LinkNodePair localPair;
\r
1084 localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
\r
1085 localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
\r
1086 m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
\r
1098 } // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
\r