1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Parallel for algorithm.
12 // the thread's iteration space [32lsb, 32msb)
20 byte pad[CacheLineSize];
24 runtime_parforalloc(uint32 nthrmax)
28 // The ParFor object is followed by CacheLineSize padding
29 // and then nthrmax ParForThread.
30 desc = (ParFor*)runtime_malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
31 desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
32 desc->nthrmax = nthrmax;
36 // For testing from Go
37 // func parforalloc2(nthrmax uint32) *ParFor
39 ParFor *runtime_parforalloc2(uint32)
40 __asm__ (GOSYM_PREFIX "runtime.parforalloc2");
43 runtime_parforalloc2(uint32 nthrmax)
45 return runtime_parforalloc(nthrmax);
49 runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
54 if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
55 runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
56 runtime_throw("parfor: invalid args");
71 for(i=0; i<nthr; i++) {
72 begin = (uint64)n*i / nthr;
73 end = (uint64)n*(i+1) / nthr;
74 pos = &desc->thr[i].pos;
75 if(((uintptr)pos & 7) != 0)
76 runtime_throw("parforsetup: pos is not aligned");
77 *pos = (uint64)begin | (((uint64)end)<<32);
81 // For testing from Go
82 // func parforsetup2(desc *ParFor, nthr, n uint32, ctx *byte, wait bool, body func(*ParFor, uint32))
84 void runtime_parforsetup2(ParFor *, uint32, uint32, void *, bool, void *)
85 __asm__ (GOSYM_PREFIX "runtime.parforsetup2");
88 runtime_parforsetup2(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void *body)
90 runtime_parforsetup(desc, nthr, n, ctx, wait, *(void(**)(ParFor*, uint32))body);
94 runtime_parfordo(ParFor *desc)
97 uint32 tid, begin, end, begin2, try, victim, i;
98 uint64 *mypos, *victimpos, pos, newpos;
99 void (*body)(ParFor*, uint32);
102 // Obtain 0-based thread index.
103 tid = runtime_xadd(&desc->thrseq, 1) - 1;
104 if(tid >= desc->nthr) {
105 runtime_printf("tid=%d nthr=%d\n", tid, desc->nthr);
106 runtime_throw("parfor: invalid tid");
109 // If single-threaded, just execute the for serially.
111 for(i=0; i<desc->cnt; i++)
117 me = &desc->thr[tid];
121 // While there is local work,
122 // bump low index and execute the iteration.
123 pos = runtime_xadd64(mypos, 1);
124 begin = (uint32)pos-1;
125 end = (uint32)(pos>>32);
133 // Out of work, need to steal something.
136 // If we don't see any work for long enough,
137 // increment the done counter...
138 if(try > desc->nthr*4 && !idle) {
140 runtime_xadd(&desc->done, 1);
142 // ...if all threads have incremented the counter,
144 if(desc->done + !idle == desc->nthr) {
146 runtime_xadd(&desc->done, 1);
149 // Choose a random victim for stealing.
150 victim = runtime_fastrand1() % (desc->nthr-1);
153 victimpos = &desc->thr[victim].pos;
154 pos = runtime_atomicload64(victimpos);
156 // See if it has any work.
158 end = (uint32)(pos>>32);
164 runtime_xadd(&desc->done, -1);
167 begin2 = begin + (end-begin)/2;
168 newpos = (uint64)begin | (uint64)begin2<<32;
169 if(runtime_cas64(victimpos, &pos, newpos)) {
175 // Has successfully stolen some work.
177 runtime_throw("parfor: should not be idle");
178 runtime_atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
180 me->nstealcnt += end-begin;
184 if(try < desc->nthr) {
186 } else if (try < 4*desc->nthr) {
188 runtime_procyield(20);
189 // If a caller asked not to wait for the others, exit now
190 // (assume that most work is already done at this point).
191 } else if (!desc->wait) {
193 runtime_xadd(&desc->done, 1);
195 } else if (try < 6*desc->nthr) {
205 runtime_xadd64(&desc->nsteal, me->nsteal);
206 runtime_xadd64(&desc->nstealcnt, me->nstealcnt);
207 runtime_xadd64(&desc->nprocyield, me->nprocyield);
208 runtime_xadd64(&desc->nosyield, me->nosyield);
209 runtime_xadd64(&desc->nsleep, me->nsleep);
217 // For testing from Go
218 // func parforiters(desc *ParFor, tid uintptr) (uintptr, uintptr)
220 struct parforiters_ret {
225 struct parforiters_ret runtime_parforiters(ParFor *, uintptr)
226 __asm__ (GOSYM_PREFIX "runtime.parforiters");
228 struct parforiters_ret
229 runtime_parforiters(ParFor *desc, uintptr tid)
231 struct parforiters_ret ret;
233 ret.start = (uint32)desc->thr[tid].pos;
234 ret.end = (uint32)(desc->thr[tid].pos>>32);