[dali_2.3.21] Merge branch 'devel/master'
[platform/core/uifw/dali-toolkit.git] / dali-physics / third-party / chipmunk2d / src / cpHastySpace.c
1 // Copyright 2013 Howling Moon Software. All rights reserved.
2 // See http://chipmunk2d.net/legal.php for more information.
3
4 #include <stdlib.h>
5 #include <stdio.h>
6
7 //TODO: Move all the thread stuff to another file
8
9 //#include <sys/param.h >
10
11 #ifdef __APPLE__
12 #include <sys/sysctl.h>
13 #endif
14
15 #ifndef _WIN32
16 #include <pthread.h>
17 #elif defined(__MINGW32__)
18 #include <pthread.h>
19 #else
20 #ifndef WIN32_LEAN_AND_MEAN
21 #define WIN32_LEAN_AND_MEAN
22 #endif
23
24 #ifndef NOMINMAX
25 #define NOMINMAX
26 #endif
27
28 #include <process.h> // _beginthreadex
29 #include <windows.h>
30
31 #ifndef ETIMEDOUT
32 #define ETIMEDOUT 1
33 #endif
34
35 // Simple pthread implementation for Windows
36 // Made from scratch to avoid the LGPL licence from pthread-win32
37 enum {
38         SIGNAL = 0,
39         BROADCAST = 1,
40         MAX_EVENTS = 2
41 };
42
43 typedef HANDLE pthread_t;
44 typedef struct
45 {
46         // Based on http://www.cs.wustl.edu/~schmidt/win32-cv-1.html since Windows has no condition variable until NT6
47         UINT waiters_count;
48         // Count of the number of waiters.
49
50         CRITICAL_SECTION waiters_count_lock;
51         // Serialize access to <waiters_count_>.
52
53         HANDLE events[MAX_EVENTS];
54 } pthread_cond_t;
55 typedef CRITICAL_SECTION pthread_mutex_t;
56
57 typedef struct {} pthread_condattr_t; // Dummy;
58
59 int pthread_cond_destroy(pthread_cond_t* cv)
60 {
61         CloseHandle(cv->events[BROADCAST]);
62         CloseHandle(cv->events[SIGNAL]);
63
64         DeleteCriticalSection(&cv->waiters_count_lock);
65
66         return 0;
67 }
68
69 int pthread_cond_init(pthread_cond_t* cv, const pthread_condattr_t* attr)
70 {
71         // Initialize the count to 0.
72         cv->waiters_count = 0;
73
74         // Create an auto-reset event.
75         cv->events[SIGNAL] = CreateEvent(NULL,  // no security
76                                          FALSE, // auto-reset event
77                                          FALSE, // non-signaled initially
78                                          NULL); // unnamed
79
80         // Create a manual-reset event.
81         cv->events[BROADCAST] = CreateEvent(NULL,  // no security
82                                             TRUE,  // manual-reset
83                                             FALSE, // non-signaled initially
84                                             NULL); // unnamed
85
86         InitializeCriticalSection(&cv->waiters_count_lock);
87
88         return 0;
89 }
90
91 int pthread_cond_broadcast(pthread_cond_t *cv)
92 {
93         // Avoid race conditions.
94         EnterCriticalSection(&cv->waiters_count_lock);
95         int have_waiters = cv->waiters_count > 0;
96         LeaveCriticalSection(&cv->waiters_count_lock);
97
98         if (have_waiters)
99                 SetEvent(cv->events[BROADCAST]);
100
101         return 0;
102 }
103
104 int pthread_cond_signal(pthread_cond_t* cv)
105 {
106         // Avoid race conditions.
107         EnterCriticalSection(&cv->waiters_count_lock);
108         int have_waiters = cv->waiters_count > 0;
109         LeaveCriticalSection(&cv->waiters_count_lock);
110
111         if (have_waiters)
112                 SetEvent(cv->events[SIGNAL]);
113
114         return 0;
115 }
116
117 int pthread_cond_wait(pthread_cond_t* cv, pthread_mutex_t* external_mutex)
118 {
119         // Avoid race conditions.
120         EnterCriticalSection(&cv->waiters_count_lock);
121         cv->waiters_count++;
122         LeaveCriticalSection(&cv->waiters_count_lock);
123
124         // It's ok to release the <external_mutex> here since Win32
125         // manual-reset events maintain state when used with
126         // <SetEvent>.  This avoids the "lost wakeup" bug...
127         LeaveCriticalSection(external_mutex);
128
129         // Wait for either event to become signaled due to <pthread_cond_signal>
130         // being called or <pthread_cond_broadcast> being called.
131         int result = WaitForMultipleObjects(2, cv->events, FALSE, INFINITE);
132
133         EnterCriticalSection(&cv->waiters_count_lock);
134         cv->waiters_count--;
135         int last_waiter =
136                 result == WAIT_OBJECT_0 + BROADCAST
137                 && cv->waiters_count == 0;
138         LeaveCriticalSection(&cv->waiters_count_lock);
139
140         // Some thread called <pthread_cond_broadcast>.
141         if (last_waiter)
142                 // We're the last waiter to be notified or to stop waiting, so
143                 // reset the manual event. 
144                 ResetEvent(cv->events[BROADCAST]);
145
146         // Reacquire the <external_mutex>.
147         EnterCriticalSection(external_mutex);
148
149         return result == WAIT_TIMEOUT ? ETIMEDOUT : 0;
150 }
151
152 typedef struct {} pthread_mutexattr_t; //< Dummy
153
154 int pthread_mutex_init(pthread_mutex_t* mutex, const pthread_mutexattr_t* attr)
155 {
156         InitializeCriticalSection(mutex);
157         return 0;
158 }
159
160 int pthread_mutex_destroy(pthread_mutex_t* mutex)
161 {
162         DeleteCriticalSection(mutex);
163         return 0;
164 }
165
166 int pthread_mutex_lock(pthread_mutex_t* mutex)
167 {
168         EnterCriticalSection(mutex);
169         return 0;
170 }
171
172 int pthread_mutex_unlock(pthread_mutex_t* mutex)
173 {
174         LeaveCriticalSection(mutex);
175         return 0;
176 }
177
178 typedef struct {} pthread_attr_t;
179
180 typedef struct
181 {
182         void *(*start_routine) (void *);
183         void* arg;
184 } pthread_internal_thread;
185
186 unsigned int __stdcall ThreadProc(void* userdata)
187 {
188         pthread_internal_thread* ud = (pthread_internal_thread*) userdata;
189         ud->start_routine(ud->arg);
190
191         free(ud);
192
193         return 0;
194 }
195
196 int pthread_create(pthread_t* thread, const pthread_attr_t* attr, void *(*start_routine) (void *), void *arg)
197 {
198         pthread_internal_thread* ud = (pthread_internal_thread*) malloc(sizeof(pthread_internal_thread));
199         ud->start_routine = start_routine;
200         ud->arg = arg;
201
202         *thread = (HANDLE) (_beginthreadex(NULL, 0, &ThreadProc, ud, 0, NULL));
203         if (!*thread)
204                 return 1;
205
206         return 0;
207 }
208
209 int pthread_join(pthread_t thread, void **value_ptr)
210 {
211         WaitForSingleObject(thread, INFINITE);
212         CloseHandle(thread);
213
214         return 0;
215 }
216
217 #endif
218
219 #include "chipmunk/chipmunk_private.h"
220 #include "chipmunk/cpHastySpace.h"
221
222
223 //MARK: ARM NEON Solver
224
225 #if __ARM_NEON__
226 #include <arm_neon.h>
227
228 // Tested and known to work fine with Clang 3.0 and GCC 4.2
229 // Doesn't work with Clang 1.6, and I have no idea why.
230 #if defined(__clang_major__) && __clang_major__ < 3
231         #error Compiler not supported.
232 #endif
233
234 #if CP_USE_DOUBLES
235         #if !__arm64
236                 #error Cannot use CP_USE_DOUBLES on 32 bit ARM.
237         #endif
238         
239         typedef float64_t cpFloat_t;
240         typedef float64x2_t cpFloatx2_t;
241         #define vld vld1q_f64
242         #define vdup_n vdupq_n_f64
243         #define vst vst1q_f64
244         #define vst_lane vst1q_lane_f64
245         #define vadd vaddq_f64
246         #define vsub vsubq_f64
247         #define vpadd vpaddq_f64
248         #define vmul vmulq_f64
249         #define vmul_n vmulq_n_f64
250         #define vneg vnegq_f64
251         #define vget_lane vgetq_lane_f64
252         #define vset_lane vsetq_lane_f64
253         #define vmin vminq_f64
254         #define vmax vmaxq_f64
255         #define vrev(__a) __builtin_shufflevector(__a, __a, 1, 0)
256 #else
257         typedef float32_t cpFloat_t;
258         typedef float32x2_t cpFloatx2_t;
259         #define vld vld1_f32
260         #define vdup_n vdup_n_f32
261         #define vst vst1_f32
262         #define vst_lane vst1_lane_f32
263         #define vadd vadd_f32
264         #define vsub vsub_f32
265         #define vpadd vpadd_f32
266         #define vmul vmul_f32
267         #define vmul_n vmul_n_f32
268         #define vneg vneg_f32
269         #define vget_lane vget_lane_f32
270         #define vset_lane vset_lane_f32
271         #define vmin vmin_f32
272         #define vmax vmax_f32
273         #define vrev vrev64_f32
274 #endif
275
276 // TODO could probably do better here, maybe using vcreate?
277 // especially for the constants
278 // Maybe use the {} notation for GCC/Clang?
279 static inline cpFloatx2_t
280 vmake(cpFloat_t x, cpFloat_t y)
281 {
282 //      cpFloatx2_t v = {};
283 //      v = vset_lane(x, v, 0);
284 //      v = vset_lane(y, v, 1);
285 //      
286 //      return v;
287         
288         // This might not be super compatible, but all the NEON headers use it...
289         return (cpFloatx2_t){x, y};
290 }
291
292 static void
293 cpArbiterApplyImpulse_NEON(cpArbiter *arb)
294 {
295         cpBody *a = arb->body_a;
296         cpBody *b = arb->body_b;
297         cpFloatx2_t surface_vr = vld((cpFloat_t *)&arb->surface_vr);
298         cpFloatx2_t n = vld((cpFloat_t *)&arb->n);
299         cpFloat_t friction = arb->u;
300         
301         int numContacts = arb->count;
302         struct cpContact *contacts = arb->contacts;
303         for(int i=0; i<numContacts; i++){
304                 struct cpContact *con = contacts + i;
305                 cpFloatx2_t r1 = vld((cpFloat_t *)&con->r1);
306                 cpFloatx2_t r2 = vld((cpFloat_t *)&con->r2);
307                 
308                 cpFloatx2_t perp = vmake(-1.0, 1.0);
309                 cpFloatx2_t r1p = vmul(vrev(r1), perp);
310                 cpFloatx2_t r2p = vmul(vrev(r2), perp);
311                 
312                 cpFloatx2_t vBias_a = vld((cpFloat_t *)&a->v_bias);
313                 cpFloatx2_t vBias_b = vld((cpFloat_t *)&b->v_bias);
314                 cpFloatx2_t wBias = vmake(a->w_bias, b->w_bias);
315                 
316                 cpFloatx2_t vb1 = vadd(vBias_a, vmul_n(r1p, vget_lane(wBias, 0)));
317                 cpFloatx2_t vb2 = vadd(vBias_b, vmul_n(r2p, vget_lane(wBias, 1)));
318                 cpFloatx2_t vbr = vsub(vb2, vb1);
319                 
320                 cpFloatx2_t v_a = vld((cpFloat_t *)&a->v);
321                 cpFloatx2_t v_b = vld((cpFloat_t *)&b->v);
322                 cpFloatx2_t w = vmake(a->w, b->w);
323                 cpFloatx2_t v1 = vadd(v_a, vmul_n(r1p, vget_lane(w, 0)));
324                 cpFloatx2_t v2 = vadd(v_b, vmul_n(r2p, vget_lane(w, 1)));
325                 cpFloatx2_t vr = vsub(v2, v1);
326                 
327                 cpFloatx2_t vbn_vrn = vpadd(vmul(vbr, n), vmul(vr, n));
328                 
329                 cpFloatx2_t v_offset = vmake(con->bias, -con->bounce);
330                 cpFloatx2_t jOld = vmake(con->jBias, con->jnAcc);
331                 cpFloatx2_t jbn_jn = vmul_n(vsub(v_offset, vbn_vrn), con->nMass);
332                 jbn_jn = vmax(vadd(jOld, jbn_jn), vdup_n(0.0));
333                 cpFloatx2_t jApply = vsub(jbn_jn, jOld);
334                 
335                 cpFloatx2_t t = vmul(vrev(n), perp);
336                 cpFloatx2_t vrt_tmp = vmul(vadd(vr, surface_vr), t);
337                 cpFloatx2_t vrt = vpadd(vrt_tmp, vrt_tmp);
338                 
339                 cpFloatx2_t jtOld = {}; jtOld = vset_lane(con->jtAcc, jtOld, 0);
340                 cpFloatx2_t jtMax = vrev(vmul_n(jbn_jn, friction));
341                 cpFloatx2_t jt = vmul_n(vrt, -con->tMass);
342                 jt = vmax(vneg(jtMax), vmin(vadd(jtOld, jt), jtMax));
343                 cpFloatx2_t jtApply = vsub(jt, jtOld);
344                 
345                 cpFloatx2_t i_inv = vmake(-a->i_inv, b->i_inv);
346                 cpFloatx2_t nperp = vmake(1.0, -1.0);
347                 
348                 cpFloatx2_t jBias = vmul_n(n, vget_lane(jApply, 0));
349                 cpFloatx2_t jBiasCross = vmul(vrev(jBias), nperp);
350                 cpFloatx2_t biasCrosses = vpadd(vmul(r1, jBiasCross), vmul(r2, jBiasCross));
351                 wBias = vadd(wBias, vmul(i_inv, biasCrosses));
352                 
353                 vBias_a = vsub(vBias_a, vmul_n(jBias, a->m_inv));
354                 vBias_b = vadd(vBias_b, vmul_n(jBias, b->m_inv));
355                 
356                 cpFloatx2_t j = vadd(vmul_n(n, vget_lane(jApply, 1)), vmul_n(t, vget_lane(jtApply, 0)));
357                 cpFloatx2_t jCross = vmul(vrev(j), nperp);
358                 cpFloatx2_t crosses = vpadd(vmul(r1, jCross), vmul(r2, jCross));
359                 w = vadd(w, vmul(i_inv, crosses));
360                 
361                 v_a = vsub(v_a, vmul_n(j, a->m_inv));
362                 v_b = vadd(v_b, vmul_n(j, b->m_inv));
363                 
364                 // TODO would moving these earlier help pipeline them better?
365                 vst((cpFloat_t *)&a->v_bias, vBias_a);
366                 vst((cpFloat_t *)&b->v_bias, vBias_b);
367                 vst_lane((cpFloat_t *)&a->w_bias, wBias, 0);
368                 vst_lane((cpFloat_t *)&b->w_bias, wBias, 1);
369                 
370                 vst((cpFloat_t *)&a->v, v_a);
371                 vst((cpFloat_t *)&b->v, v_b);
372                 vst_lane((cpFloat_t *)&a->w, w, 0);
373                 vst_lane((cpFloat_t *)&b->w, w, 1);
374                 
375                 vst_lane((cpFloat_t *)&con->jBias, jbn_jn, 0);
376                 vst_lane((cpFloat_t *)&con->jnAcc, jbn_jn, 1);
377                 vst_lane((cpFloat_t *)&con->jtAcc, jt, 0);
378         }
379 }
380
381 #endif
382
383 //MARK: PThreads
384
385 // Right now using more than 2 threads probably wont help your performance any.
386 // If you are using a ridiculous number of iterations it could help though.
387 #define MAX_THREADS 2
388
389 struct ThreadContext {
390         pthread_t thread;
391         cpHastySpace *space;
392         unsigned long thread_num;
393 };
394
395 typedef void (*cpHastySpaceWorkFunction)(cpSpace *space, unsigned long worker, unsigned long worker_count);
396
397 struct cpHastySpace {
398         cpSpace space;
399         
400         // Number of worker threads (including the main thread)
401         unsigned long num_threads;
402         
403         // Number of worker threads currently executing. (also including the main thread)
404         unsigned long num_working;
405         
406         // Number of constraints (plus contacts) that must exist per step to start the worker threads.
407         unsigned long constraint_count_threshold;
408         
409         pthread_mutex_t mutex;
410         pthread_cond_t cond_work, cond_resume;
411         
412         // Work function to invoke.
413         cpHastySpaceWorkFunction work;
414         
415         struct ThreadContext workers[MAX_THREADS - 1];
416 };
417
418 static void *
419 WorkerThreadLoop(struct ThreadContext *context)
420 {
421         cpHastySpace *hasty = context->space;
422         
423         unsigned long thread = context->thread_num;
424         unsigned long num_threads = hasty->num_threads;
425         
426         for(;;){
427                 pthread_mutex_lock(&hasty->mutex); {
428                         if(--hasty->num_working == 0){
429                                 pthread_cond_signal(&hasty->cond_resume);
430                         }
431                         
432                         pthread_cond_wait(&hasty->cond_work, &hasty->mutex);
433                 } pthread_mutex_unlock(&hasty->mutex);
434                 
435                 cpHastySpaceWorkFunction func = hasty->work;
436                 if(func){
437                         hasty->work(&hasty->space, thread, num_threads);
438                 } else {
439                         break;
440                 }
441         }
442         
443         return NULL;
444 }
445
446 static void
447 RunWorkers(cpHastySpace *hasty, cpHastySpaceWorkFunction func)
448 {
449         hasty->num_working = hasty->num_threads - 1;
450         hasty->work = func;
451         
452         if(hasty->num_working > 0){
453                 pthread_mutex_lock(&hasty->mutex); {
454                         pthread_cond_broadcast(&hasty->cond_work);
455                 } pthread_mutex_unlock(&hasty->mutex);
456                 
457                 func((cpSpace *)hasty, 0, hasty->num_threads);
458                         
459                 pthread_mutex_lock(&hasty->mutex); {
460                         if(hasty->num_working > 0){
461                                 pthread_cond_wait(&hasty->cond_resume, &hasty->mutex);
462                         }
463                 } pthread_mutex_unlock(&hasty->mutex);
464         } else {
465                 func((cpSpace *)hasty, 0, hasty->num_threads);
466         }
467         
468         hasty->work = NULL;
469 }
470
471 static void
472 Solver(cpSpace *space, unsigned long worker, unsigned long worker_count)
473 {
474         cpArray *constraints = space->constraints;
475         cpArray *arbiters = space->arbiters;
476         
477         cpFloat dt = space->curr_dt;
478         unsigned long iterations = (space->iterations + worker_count - 1)/worker_count;
479         
480         for(unsigned long i=0; i<iterations; i++){
481                 for(int j=0; j<arbiters->num; j++){
482                         cpArbiter *arb = (cpArbiter *)arbiters->arr[j];
483                         #ifdef __ARM_NEON__
484                                 cpArbiterApplyImpulse_NEON(arb);
485                         #else
486                                 cpArbiterApplyImpulse(arb);
487                         #endif
488                 }
489                         
490                 for(int j=0; j<constraints->num; j++){
491                         cpConstraint *constraint = (cpConstraint *)constraints->arr[j];
492                         constraint->klass->applyImpulse(constraint, dt);
493                 }
494         }
495 }
496
497 //MARK: Thread Management Functions
498
499 static void
500 HaltThreads(cpHastySpace *hasty)
501 {
502         pthread_mutex_t *mutex = &hasty->mutex;
503         pthread_mutex_lock(mutex); {
504                 hasty->work = NULL; // NULL work function means break and exit
505                 pthread_cond_broadcast(&hasty->cond_work);
506         } pthread_mutex_unlock(mutex);
507         
508         for(unsigned long i=0; i<(hasty->num_threads-1); i++){
509                 pthread_join(hasty->workers[i].thread, NULL);
510         }
511 }
512
513 void
514 cpHastySpaceSetThreads(cpSpace *space, unsigned long threads)
515 {
516 #if TARGET_IPHONE_SIMULATOR == 1
517         // Individual values appear to be written non-atomically when compiled as debug for the simulator.
518         // No idea why, so threads are disabled.
519         threads = 1;
520 #endif  
521         
522         cpHastySpace *hasty = (cpHastySpace *)space;
523         HaltThreads(hasty);
524         
525 #ifdef __APPLE__
526         if(threads == 0){
527                 size_t size = sizeof(threads);
528                 sysctlbyname("hw.ncpu", &threads, &size, NULL, 0);
529         }
530 #else
531         if(threads == 0) threads = 1;
532 #endif
533         
534         hasty->num_threads = (threads < MAX_THREADS ? threads : MAX_THREADS);
535         hasty->num_working = hasty->num_threads - 1;
536         
537         // Create the worker threads and wait for them to signal ready.
538         if(hasty->num_working > 0){
539                 pthread_mutex_lock(&hasty->mutex);
540                 for(unsigned long i=0; i<(hasty->num_threads-1); i++){
541                         hasty->workers[i].space = hasty;
542                         hasty->workers[i].thread_num = i + 1;
543                         
544                         pthread_create(&hasty->workers[i].thread, NULL, (void*(*)(void*))WorkerThreadLoop, &hasty->workers[i]);
545                 }
546                 
547                 pthread_cond_wait(&hasty->cond_resume, &hasty->mutex);
548                 pthread_mutex_unlock(&hasty->mutex);
549         }
550 }
551
552 unsigned long
553 cpHastySpaceGetThreads(cpSpace *space)
554 {
555         return ((cpHastySpace *)space)->num_threads;
556 }
557
558 //MARK: Overriden cpSpace Functions.
559
560 cpSpace *
561 cpHastySpaceNew(void)
562 {
563         cpHastySpace *hasty = (cpHastySpace *)cpcalloc(1, sizeof(cpHastySpace));
564         cpSpaceInit((cpSpace *)hasty);
565         
566         pthread_mutex_init(&hasty->mutex, NULL);
567         pthread_cond_init(&hasty->cond_work, NULL);
568         pthread_cond_init(&hasty->cond_resume, NULL);
569         
570         // TODO magic number, should test this more thoroughly.
571         hasty->constraint_count_threshold = 50;
572         
573         // Default to 1 thread for determinism.
574         hasty->num_threads = 1;
575         cpHastySpaceSetThreads((cpSpace *)hasty, 1);
576
577         return (cpSpace *)hasty;
578 }
579
580 void
581 cpHastySpaceFree(cpSpace *space)
582 {
583         cpHastySpace *hasty = (cpHastySpace *)space;
584         
585         HaltThreads(hasty);
586         
587         pthread_mutex_destroy(&hasty->mutex);
588         pthread_cond_destroy(&hasty->cond_work);
589         pthread_cond_destroy(&hasty->cond_resume);
590         
591         cpSpaceFree(space);
592 }
593
594 void
595 cpHastySpaceStep(cpSpace *space, cpFloat dt)
596 {
597         // don't step if the timestep is 0!
598         if(dt == 0.0f) return;
599         
600         space->stamp++;
601         
602         cpFloat prev_dt = space->curr_dt;
603         space->curr_dt = dt;
604                 
605         cpArray *bodies = space->dynamicBodies;
606         cpArray *constraints = space->constraints;
607         cpArray *arbiters = space->arbiters;
608         
609         // Reset and empty the arbiter list.
610         for(int i=0; i<arbiters->num; i++){
611                 cpArbiter *arb = (cpArbiter *)arbiters->arr[i];
612                 arb->state = CP_ARBITER_STATE_NORMAL;
613                 
614                 // If both bodies are awake, unthread the arbiter from the contact graph.
615                 if(!cpBodyIsSleeping(arb->body_a) && !cpBodyIsSleeping(arb->body_b)){
616                         cpArbiterUnthread(arb);
617                 }
618         }
619         arbiters->num = 0;
620         
621         cpSpaceLock(space); {
622                 // Integrate positions
623                 for(int i=0; i<bodies->num; i++){
624                         cpBody *body = (cpBody *)bodies->arr[i];
625                         body->position_func(body, dt);
626                 }
627                 
628                 // Find colliding pairs.
629                 cpSpacePushFreshContactBuffer(space);
630                 cpSpatialIndexEach(space->dynamicShapes, (cpSpatialIndexIteratorFunc)cpShapeUpdateFunc, NULL);
631                 cpSpatialIndexReindexQuery(space->dynamicShapes, (cpSpatialIndexQueryFunc)cpSpaceCollideShapes, space);
632         } cpSpaceUnlock(space, cpFalse);
633         
634         // Rebuild the contact graph (and detect sleeping components if sleeping is enabled)
635         cpSpaceProcessComponents(space, dt);
636         
637         cpSpaceLock(space); {
638                 // Clear out old cached arbiters and call separate callbacks
639                 cpHashSetFilter(space->cachedArbiters, (cpHashSetFilterFunc)cpSpaceArbiterSetFilter, space);
640
641                 // Prestep the arbiters and constraints.
642                 cpFloat slop = space->collisionSlop;
643                 cpFloat biasCoef = 1.0f - cpfpow(space->collisionBias, dt);
644                 for(int i=0; i<arbiters->num; i++){
645                         cpArbiterPreStep((cpArbiter *)arbiters->arr[i], dt, slop, biasCoef);
646                 }
647
648                 for(int i=0; i<constraints->num; i++){
649                         cpConstraint *constraint = (cpConstraint *)constraints->arr[i];
650                         
651                         cpConstraintPreSolveFunc preSolve = constraint->preSolve;
652                         if(preSolve) preSolve(constraint, space);
653                         
654                         constraint->klass->preStep(constraint, dt);
655                 }
656         
657                 // Integrate velocities.
658                 cpFloat damping = cpfpow(space->damping, dt);
659                 cpVect gravity = space->gravity;
660                 for(int i=0; i<bodies->num; i++){
661                         cpBody *body = (cpBody *)bodies->arr[i];
662                         body->velocity_func(body, gravity, damping, dt);
663                 }
664                 
665                 // Apply cached impulses
666                 cpFloat dt_coef = (prev_dt == 0.0f ? 0.0f : dt/prev_dt);
667                 for(int i=0; i<arbiters->num; i++){
668                         cpArbiterApplyCachedImpulse((cpArbiter *)arbiters->arr[i], dt_coef);
669                 }
670                 
671                 for(int i=0; i<constraints->num; i++){
672                         cpConstraint *constraint = (cpConstraint *)constraints->arr[i];
673                         constraint->klass->applyCachedImpulse(constraint, dt_coef);
674                 }
675                 
676                 // Run the impulse solver.
677                 cpHastySpace *hasty = (cpHastySpace *)space;
678                 if((unsigned long)(arbiters->num + constraints->num) > hasty->constraint_count_threshold){
679                         RunWorkers(hasty, Solver);
680                 } else {
681                         Solver(space, 0, 1);
682                 }
683                 
684                 // Run the constraint post-solve callbacks
685                 for(int i=0; i<constraints->num; i++){
686                         cpConstraint *constraint = (cpConstraint *)constraints->arr[i];
687                         
688                         cpConstraintPostSolveFunc postSolve = constraint->postSolve;
689                         if(postSolve) postSolve(constraint, space);
690                 }
691                 
692                 // run the post-solve callbacks
693                 for(int i=0; i<arbiters->num; i++){
694                         cpArbiter *arb = (cpArbiter *) arbiters->arr[i];
695                         
696                         cpCollisionHandler *handler = arb->handler;
697                         handler->postSolveFunc(arb, space, handler->userData);
698                 }
699         } cpSpaceUnlock(space, cpTrue);
700 }