sk_tool_utils::create_portable_typeface("sans-serif", SkTypeface::kItalic)};
for (int work = 0; work < loops; work++) {
- sk_parallel_for(16, [&](int threadIndex) {
+ SkTaskGroup().batch(16, [&](int threadIndex) {
SkPaint paint;
paint.setAntiAlias(true);
paint.setSubpixelText(true);
}
SkTaskGroup tg;
- tg.batch([](int i){ run_test(&gThreadedTests[i]); }, gThreadedTests.count());
+ tg.batch(gThreadedTests.count(), [](int i){ run_test(&gThreadedTests[i]); });
for (int i = 0; i < kNumEnclaves; i++) {
SkTArray<Task>* currentEnclave = &enclaves[i];
switch(i) {
case kAnyThread_Enclave:
- tg.batch([currentEnclave](int j) { Task::Run(&(*currentEnclave)[j]); }, currentEnclave->count());
+ tg.batch(currentEnclave->count(),
+ [currentEnclave](int j) { Task::Run(&(*currentEnclave)[j]); });
break;
case kGPU_Enclave:
tg.add([currentEnclave](){ run_enclave_and_gpu_tests(currentEnclave); });
#include "SkTDArray.h"
static void path_fuzz_stroker(SkBitmap* bitmap, int seed) {
- sk_parallel_for(100, [&](int i) {
+ SkTaskGroup().batch(100, [&](int i) {
int localSeed = seed + i;
FuzzPath fuzzPath;
fThreadSafeDrawData[i].draw();
}
#else
- sk_parallel_for(fThreadSafeDrawData.count(), [&](int i) {
+ SkTaskGroup().batch(fThreadSafeDrawData.count(), [&](int i) {
fThreadSafeDrawData[i].draw();
});
#endif
gGlobal->add(fn, pending);
}
- static void Batch(std::function<void(int)> fn, int N, SkAtomic<int32_t>* pending) {
+ static void Batch(int N, std::function<void(int)> fn, SkAtomic<int32_t>* pending) {
if (!gGlobal) {
for (int i = 0; i < N; i++) { fn(i); }
return;
}
- gGlobal->batch(fn, N, pending);
+ gGlobal->batch(N, fn, pending);
}
static void Wait(SkAtomic<int32_t>* pending) {
fWorkAvailable.signal(1);
}
- void batch(std::function<void(int)> fn, int N, SkAtomic<int32_t>* pending) {
+ void batch(int N, std::function<void(int)> fn, SkAtomic<int32_t>* pending) {
pending->fetch_add(+N, sk_memory_order_relaxed); // No barrier needed.
{
AutoLock lock(&fWorkLock);
static ThreadPool* gGlobal;
friend struct SkTaskGroup::Enabler;
- friend int ::sk_parallel_for_thread_count();
};
ThreadPool* ThreadPool::gGlobal = nullptr;
void SkTaskGroup::wait() { ThreadPool::Wait(&fPending); }
void SkTaskGroup::add(SkRunnable* task) { ThreadPool::Add(task, &fPending); }
void SkTaskGroup::add(std::function<void(void)> fn) { ThreadPool::Add(fn, &fPending); }
-void SkTaskGroup::batch (std::function<void(int)> fn, int N) {
- ThreadPool::Batch(fn, N, &fPending);
+void SkTaskGroup::batch(int N, std::function<void(int)> fn) {
+ ThreadPool::Batch(N, fn, &fPending);
}
-int sk_parallel_for_thread_count() {
- if (ThreadPool::gGlobal != nullptr) {
- return ThreadPool::gGlobal->fThreads.count();
- }
- return 0;
-}
void add(std::function<void(void)> fn);
// Add a batch of N tasks, all calling fn with different arguments.
- void batch(std::function<void(int)> fn, int N);
+ void batch(int N, std::function<void(int)> fn);
// Block until all Tasks previously add()ed to this SkTaskGroup have run.
// You may safely reuse this SkTaskGroup after wait() returns.
// Returns best estimate of number of CPU cores available to use.
int sk_num_cores();
-int sk_parallel_for_thread_count();
-
-// Call f(i) for i in [0, end).
-template <typename Func>
-void sk_parallel_for(int end, const Func& f) {
- if (end <= 0) { return; }
-
- struct Chunk {
- const Func* f;
- int start, end;
- };
-
- // TODO(mtklein): this chunking strategy could probably use some tuning.
- int max_chunks = sk_num_cores() * 2,
- stride = (end + max_chunks - 1 ) / max_chunks,
- nchunks = (end + stride - 1 ) / stride;
- SkASSERT(nchunks <= max_chunks);
-
-#if defined(GOOGLE3)
- // Stack frame size is limited in GOOGLE3.
- SkAutoSTMalloc<512, Chunk> chunks(nchunks);
-#else
- // With the chunking strategy above this won't malloc until we have a machine with >512 cores.
- SkAutoSTMalloc<1024, Chunk> chunks(nchunks);
-#endif
-
- for (int i = 0; i < nchunks; i++) {
- Chunk& c = chunks[i];
- c.f = &f;
- c.start = i * stride;
- c.end = SkTMin(c.start + stride, end);
- SkASSERT(c.start < c.end); // Nothing will break if start >= end, but it's a wasted chunk.
- }
-
- Chunk* chunkBase = chunks.get();
- auto run_chunk = [chunkBase](int i) {
- Chunk& c = chunkBase[i];
- for (int i = c.start; i < c.end; i++) {
- (*c.f)(i);
- }
- };
- SkTaskGroup().batch(run_chunk, nchunks);
-}
-
#endif//SkTaskGroup_DEFINED
};
// Parallelism helps speed things up on my desktop from ~725s to ~50s.
- sk_parallel_for(SkXfermode::kLastMode, test_mode);
+ SkTaskGroup().batch(SkXfermode::kLastMode, test_mode);
}
return new int(5);
};
- SkAtomic<int> force_a_race(sk_parallel_for_thread_count());
- if (force_a_race < 1) {
- return;
- }
- sk_parallel_for(sk_num_cores()*4, [&](size_t) {
- force_a_race.fetch_add(-1);
- while (force_a_race.load() > 0);
-
+ SkTaskGroup().batch(sk_num_cores()*4, [&](size_t) {
int* n = once.get(create);
REPORTER_ASSERT(r, *n == 5);
});
static SkAtomic<int> calls(0);
SkAtomic<int> force_a_race(sk_num_cores());
- sk_parallel_for(sk_num_cores()*4, [&](size_t) {
+ SkTaskGroup().batch(sk_num_cores()*4, [&](size_t) {
force_a_race.fetch_add(-1);
while (force_a_race.load() > 0);
DEF_TEST(SkOnce_Multithreaded, r) {
int x = 0;
// Run a bunch of tasks to be the first to add six to x.
- sk_parallel_for(1021, [&](int) {
+ SkTaskGroup().batch(1021, [&](int) {
void(*add_six)(int*) = [](int* p) { *p += 6; };
SkOnce(&mt_once, add_six, &x);
});
void TestRunner::render() {
// TODO: this doesn't really need to use SkRunnables any more.
// We can just write the code to run in the for-loop directly.
- sk_parallel_for(fRunnables.count(), [&](int i) {
+ SkTaskGroup().batch(fRunnables.count(), [&](int i) {
fRunnables[i]->run();
});
}
}
void PathOpsThreadedTestRunner::render() {
- sk_parallel_for(fRunnables.count(), [&](int i) {
+ SkTaskGroup().batch(fRunnables.count(), [&](int i) {
fRunnables[i]->run();
});
}
for (int i = 0; i < kSharedSize; ++i) {
shared[i] = 0;
}
- sk_parallel_for(8, [&](int threadIndex) {
+ SkTaskGroup().batch(8, [&](int threadIndex) {
if (threadIndex % 4 != 0) {
for (int c = 0; c < 100000; ++c) {
sm.acquireShared();
void SkpSkGrThreadedTestRunner::render() {
// TODO: we don't really need to be using SkRunnables here anymore.
// We can just write the code we'd run right in the for loop.
- sk_parallel_for(fRunnables.count(), [&](int i) {
+ SkTaskGroup().batch(fRunnables.count(), [&](int i) {
fRunnables[i]->run();
});
}