Importing Upstream version 4.8.2
[platform/upstream/gcc48.git] / libgo / runtime / parfor.c
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Parallel for algorithm.
6
7 #include "runtime.h"
8 #include "arch.h"
9
10 struct ParForThread
11 {
12         // the thread's iteration space [32lsb, 32msb)
13         uint64 pos;
14         // stats
15         uint64 nsteal;
16         uint64 nstealcnt;
17         uint64 nprocyield;
18         uint64 nosyield;
19         uint64 nsleep;
20         byte pad[CacheLineSize];
21 };
22
23 ParFor*
24 runtime_parforalloc(uint32 nthrmax)
25 {
26         ParFor *desc;
27
28         // The ParFor object is followed by CacheLineSize padding
29         // and then nthrmax ParForThread.
30         desc = (ParFor*)runtime_malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
31         desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
32         desc->nthrmax = nthrmax;
33         return desc;
34 }
35
36 // For testing from Go
37 // func parforalloc2(nthrmax uint32) *ParFor
38
39 ParFor *runtime_parforalloc2(uint32)
40    __asm__ (GOSYM_PREFIX "runtime.parforalloc2");
41
42 ParFor *
43 runtime_parforalloc2(uint32 nthrmax)
44 {
45         return runtime_parforalloc(nthrmax);
46 }
47
48 void
49 runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
50 {
51         uint32 i, begin, end;
52         uint64 *pos;
53
54         if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
55                 runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
56                 runtime_throw("parfor: invalid args");
57         }
58
59         desc->body = body;
60         desc->done = 0;
61         desc->nthr = nthr;
62         desc->thrseq = 0;
63         desc->cnt = n;
64         desc->ctx = ctx;
65         desc->wait = wait;
66         desc->nsteal = 0;
67         desc->nstealcnt = 0;
68         desc->nprocyield = 0;
69         desc->nosyield = 0;
70         desc->nsleep = 0;
71         for(i=0; i<nthr; i++) {
72                 begin = (uint64)n*i / nthr;
73                 end = (uint64)n*(i+1) / nthr;
74                 pos = &desc->thr[i].pos;
75                 if(((uintptr)pos & 7) != 0)
76                         runtime_throw("parforsetup: pos is not aligned");
77                 *pos = (uint64)begin | (((uint64)end)<<32);
78         }
79 }
80
81 // For testing from Go
82 // func parforsetup2(desc *ParFor, nthr, n uint32, ctx *byte, wait bool, body func(*ParFor, uint32))
83
84 void runtime_parforsetup2(ParFor *, uint32, uint32, void *, bool, void *)
85   __asm__ (GOSYM_PREFIX "runtime.parforsetup2");
86
87 void
88 runtime_parforsetup2(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void *body)
89 {
90         runtime_parforsetup(desc, nthr, n, ctx, wait, *(void(**)(ParFor*, uint32))body);
91 }
92
93 void
94 runtime_parfordo(ParFor *desc)
95 {
96         ParForThread *me;
97         uint32 tid, begin, end, begin2, try, victim, i;
98         uint64 *mypos, *victimpos, pos, newpos;
99         void (*body)(ParFor*, uint32);
100         bool idle;
101
102         // Obtain 0-based thread index.
103         tid = runtime_xadd(&desc->thrseq, 1) - 1;
104         if(tid >= desc->nthr) {
105                 runtime_printf("tid=%d nthr=%d\n", tid, desc->nthr);
106                 runtime_throw("parfor: invalid tid");
107         }
108
109         // If single-threaded, just execute the for serially.
110         if(desc->nthr==1) {
111                 for(i=0; i<desc->cnt; i++)
112                         desc->body(desc, i);
113                 return;
114         }
115
116         body = desc->body;
117         me = &desc->thr[tid];
118         mypos = &me->pos;
119         for(;;) {
120                 for(;;) {
121                         // While there is local work,
122                         // bump low index and execute the iteration.
123                         pos = runtime_xadd64(mypos, 1);
124                         begin = (uint32)pos-1;
125                         end = (uint32)(pos>>32);
126                         if(begin < end) {
127                                 body(desc, begin);
128                                 continue;
129                         }
130                         break;
131                 }
132
133                 // Out of work, need to steal something.
134                 idle = false;
135                 for(try=0;; try++) {
136                         // If we don't see any work for long enough,
137                         // increment the done counter...
138                         if(try > desc->nthr*4 && !idle) {
139                                 idle = true;
140                                 runtime_xadd(&desc->done, 1);
141                         }
142                         // ...if all threads have incremented the counter,
143                         // we are done.
144                         if(desc->done + !idle == desc->nthr) {
145                                 if(!idle)
146                                         runtime_xadd(&desc->done, 1);
147                                 goto exit;
148                         }
149                         // Choose a random victim for stealing.
150                         victim = runtime_fastrand1() % (desc->nthr-1);
151                         if(victim >= tid)
152                                 victim++;
153                         victimpos = &desc->thr[victim].pos;
154                         pos = runtime_atomicload64(victimpos);
155                         for(;;) {
156                                 // See if it has any work.
157                                 begin = (uint32)pos;
158                                 end = (uint32)(pos>>32);
159                                 if(begin+1 >= end) {
160                                         begin = end = 0;
161                                         break;
162                                 }
163                                 if(idle) {
164                                         runtime_xadd(&desc->done, -1);
165                                         idle = false;
166                                 }
167                                 begin2 = begin + (end-begin)/2;
168                                 newpos = (uint64)begin | (uint64)begin2<<32;
169                                 if(runtime_cas64(victimpos, &pos, newpos)) {
170                                         begin = begin2;
171                                         break;
172                                 }
173                         }
174                         if(begin < end) {
175                                 // Has successfully stolen some work.
176                                 if(idle)
177                                         runtime_throw("parfor: should not be idle");
178                                 runtime_atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
179                                 me->nsteal++;
180                                 me->nstealcnt += end-begin;
181                                 break;
182                         }
183                         // Backoff.
184                         if(try < desc->nthr) {
185                                 // nothing
186                         } else if (try < 4*desc->nthr) {
187                                 me->nprocyield++;
188                                 runtime_procyield(20);
189                         // If a caller asked not to wait for the others, exit now
190                         // (assume that most work is already done at this point).
191                         } else if (!desc->wait) {
192                                 if(!idle)
193                                         runtime_xadd(&desc->done, 1);
194                                 goto exit;
195                         } else if (try < 6*desc->nthr) {
196                                 me->nosyield++;
197                                 runtime_osyield();
198                         } else {
199                                 me->nsleep++;
200                                 runtime_usleep(1);
201                         }
202                 }
203         }
204 exit:
205         runtime_xadd64(&desc->nsteal, me->nsteal);
206         runtime_xadd64(&desc->nstealcnt, me->nstealcnt);
207         runtime_xadd64(&desc->nprocyield, me->nprocyield);
208         runtime_xadd64(&desc->nosyield, me->nosyield);
209         runtime_xadd64(&desc->nsleep, me->nsleep);
210         me->nsteal = 0;
211         me->nstealcnt = 0;
212         me->nprocyield = 0;
213         me->nosyield = 0;
214         me->nsleep = 0;
215 }
216
217 // For testing from Go
218 // func parforiters(desc *ParFor, tid uintptr) (uintptr, uintptr)
219
220 struct parforiters_ret {
221   uintptr start;
222   uintptr end;
223 };
224
225 struct parforiters_ret runtime_parforiters(ParFor *, uintptr)
226   __asm__ (GOSYM_PREFIX "runtime.parforiters");
227
228 struct parforiters_ret
229 runtime_parforiters(ParFor *desc, uintptr tid)
230 {
231         struct parforiters_ret ret;
232
233         ret.start = (uint32)desc->thr[tid].pos;
234         ret.end = (uint32)(desc->thr[tid].pos>>32);
235         return ret;
236 }