From: mtklein Date: Tue, 1 Jul 2014 15:43:42 +0000 (-0700) Subject: draft gpu support in nanobench X-Git-Tag: accepted/tizen/5.0/unified/20181102.025319~6977 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bb6a02823929584231c8e080ee69e7fb1178cbfb;p=platform%2Fupstream%2FlibSkiaSharp.git draft gpu support in nanobench BUG=skia: R=bsalomon@google.com, mtklein@google.com Author: mtklein@chromium.org Review URL: https://codereview.chromium.org/359473004 --- diff --git a/bench/ImageFilterDAGBench.cpp b/bench/ImageFilterDAGBench.cpp index 29d1da1..680628e 100644 --- a/bench/ImageFilterDAGBench.cpp +++ b/bench/ImageFilterDAGBench.cpp @@ -18,8 +18,7 @@ enum { kNumInputs = 5 }; class ImageFilterDAGBench : public Benchmark { public: - ImageFilterDAGBench() { - } + ImageFilterDAGBench() {} protected: virtual const char* onGetName() SK_OVERRIDE { @@ -27,16 +26,18 @@ protected: } virtual void onDraw(const int loops, SkCanvas* canvas) SK_OVERRIDE { - SkAutoTUnref blur(SkBlurImageFilter::Create(20.0f, 20.0f)); - SkImageFilter* inputs[kNumInputs]; - for (int i = 0; i < kNumInputs; ++i) { - inputs[i] = blur.get(); + for (int j = 0; j < loops; j++) { + SkAutoTUnref blur(SkBlurImageFilter::Create(20.0f, 20.0f)); + SkImageFilter* inputs[kNumInputs]; + for (int i = 0; i < kNumInputs; ++i) { + inputs[i] = blur.get(); + } + SkAutoTUnref merge(SkMergeImageFilter::Create(inputs, kNumInputs)); + SkPaint paint; + paint.setImageFilter(merge); + SkRect rect = SkRect::Make(SkIRect::MakeWH(400, 400)); + canvas->drawRect(rect, paint); } - SkAutoTUnref merge(SkMergeImageFilter::Create(inputs, kNumInputs)); - SkPaint paint; - paint.setImageFilter(merge); - SkRect rect = SkRect::Make(SkIRect::MakeWH(400, 400)); - canvas->drawRect(rect, paint); } private: diff --git a/bench/nanobench.cpp b/bench/nanobench.cpp index 77df430..a3dbf22 100644 --- a/bench/nanobench.cpp +++ b/bench/nanobench.cpp @@ -5,6 +5,8 @@ * found in the LICENSE file. */ +#include + #include "Benchmark.h" #include "CrashHandler.h" #include "Stats.h" @@ -17,6 +19,11 @@ #include "SkString.h" #include "SkSurface.h" +#if SK_SUPPORT_GPU + #include "GrContextFactory.h" + GrContextFactory gGrFactory; +#endif + __SK_FORCE_IMAGE_DECODER_LINKING; DEFINE_int32(samples, 10, "Number of samples to measure for each bench."); @@ -26,10 +33,11 @@ DEFINE_double(overheadGoal, 0.0001, DEFINE_string(match, "", "The usual filters on file names of benchmarks to measure."); DEFINE_bool2(quiet, q, false, "Print only bench name and minimum sample."); DEFINE_bool2(verbose, v, false, "Print all samples."); -DEFINE_string(config, "8888 nonrendering", - "Configs to measure. Options: 565 8888 nonrendering"); +DEFINE_string(config, "nonrendering 8888 gpu", "Configs to measure. Options: " + "565 8888 gpu nonrendering debug nullgpu msaa4 msaa16 nvprmsaa4 nvprmsaa16 angle"); +DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU."); +DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allows to lag."); -// TODO: GPU benches static SkString humanize(double ms) { if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3); @@ -38,86 +46,175 @@ static SkString humanize(double ms) { return SkStringPrintf("%.3gms", ms); } -static double estimate_timer_overhead() { - double overhead = 0; +static double time(int loops, Benchmark* bench, SkCanvas* canvas, SkGLContextHelper* gl) { WallTimer timer; - for (int i = 0; i < FLAGS_overheadLoops; i++) { - timer.start(); - timer.end(); - overhead += timer.fWall; + timer.start(); + if (bench) { + bench->draw(loops, canvas); } - return overhead / FLAGS_overheadLoops; -} - -static void safe_flush(SkCanvas* canvas) { if (canvas) { canvas->flush(); } +#if SK_SUPPORT_GPU + if (gl) { + SK_GL(*gl, Flush()); + gl->swapBuffers(); + } +#endif + timer.end(); + return timer.fWall; } -static int guess_loops(double overhead, Benchmark* bench, SkCanvas* canvas) { - WallTimer timer; +static double estimate_timer_overhead() { + double overhead = 0; + for (int i = 0; i < FLAGS_overheadLoops; i++) { + overhead += time(1, NULL, NULL, NULL); + } + return overhead / FLAGS_overheadLoops; +} - // Measure timer overhead and bench time together. +static int cpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas, double* samples) { + // First figure out approximately how many loops of bench it takes to make overhead negligible. + double bench_plus_overhead; do { - timer.start(); - bench->draw(1, canvas); - safe_flush(canvas); - timer.end(); - } while (timer.fWall < overhead); // Shouldn't normally happen. + bench_plus_overhead = time(1, bench, canvas, NULL); + } while (bench_plus_overhead < overhead); // Shouldn't normally happen. - // Later we'll just start and stop the timer once, but loop N times. + // Later we'll just start and stop the timer once but loop N times. // We'll pick N to make timer overhead negligible: // - // Timer Overhead - // ------------------------------- < FLAGS_overheadGoal - // Timer Overhead + N * Bench Time + // overhead + // ------------------------- < FLAGS_overheadGoal + // overhead + N * Bench Time // - // where timer.fWall ≈ Timer Overhead + Bench Time. + // where bench_plus_overhead ≈ overhead + Bench Time. // // Doing some math, we get: // - // (Timer Overhead / FLAGS_overheadGoal) - Timer Overhead - // ----------------------------------------------------- < N - // (timer.fWall - Timer Overhead) + // (overhead / FLAGS_overheadGoal) - overhead + // ------------------------------------------ < N + // bench_plus_overhead - overhead) // // Luckily, this also works well in practice. :) const double numer = overhead / FLAGS_overheadGoal - overhead; - const double denom = timer.fWall - overhead; - return (int)ceil(numer / denom); + const double denom = bench_plus_overhead - overhead; + const int loops = (int)ceil(numer / denom); + + for (int i = 0; i < FLAGS_samples; i++) { + samples[i] = time(loops, bench, canvas, NULL) / loops; + } + return loops; } -static bool push_config_if_enabled(const char* config, SkTDArray* configs) { - if (FLAGS_config.contains(config)) { - configs->push(config); - return true; +#if SK_SUPPORT_GPU +static int gpu_bench(SkGLContextHelper* gl, + Benchmark* bench, + SkCanvas* canvas, + double* samples) { + // Make sure we're done with whatever came before. + SK_GL(*gl, Finish); + + // First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs. + int loops = 1; + double elapsed = 0; + do { + loops *= 2; + // If the GPU lets frames lag at all, we need to make sure we're timing + // _this_ round, not still timing last round. We force this by looping + // more times than any reasonable GPU will allow frames to lag. + for (int i = 0; i < FLAGS_gpuFrameLag; i++) { + elapsed = time(loops, bench, canvas, gl); + } + } while (elapsed < FLAGS_gpuMs); + + // We've overshot at least a little. Scale back linearly. + loops = (int)ceil(loops * FLAGS_gpuMs / elapsed); + + // Might as well make sure we're not still timing our calibration. + SK_GL(*gl, Finish); + + // Pretty much the same deal as the calibration: do some warmup to make + // sure we're timing steady-state pipelined frames. + for (int i = 0; i < FLAGS_gpuFrameLag; i++) { + time(loops, bench, canvas, gl); } - return false; + + // Now, actually do the timing! + for (int i = 0; i < FLAGS_samples; i++) { + samples[i] = time(loops, bench, canvas, gl) / loops; + } + return loops; } +#endif + +static SkString to_lower(const char* str) { + SkString lower(str); + for (size_t i = 0; i < lower.size(); i++) { + lower[i] = tolower(lower[i]); + } + return lower; +} + +struct Target { + const char* config; + Benchmark::Backend backend; + SkAutoTDelete surface; +#if SK_SUPPORT_GPU + SkGLContextHelper* gl; +#endif +}; -static void create_surfaces(Benchmark* bench, - SkTDArray* surfaces, - SkTDArray* configs) { +// If bench is enabled for backend/config, returns a Target* for them, otherwise NULL. +static Target* is_enabled(Benchmark* bench, Benchmark::Backend backend, const char* config) { + if (!bench->isSuitableFor(backend)) { + return NULL; + } - if (bench->isSuitableFor(Benchmark::kNonRendering_Backend) - && push_config_if_enabled("nonrendering", configs)) { - surfaces->push(NULL); + for (int i = 0; i < FLAGS_config.count(); i++) { + if (to_lower(FLAGS_config[i]).equals(config)) { + Target* target = new Target; + target->config = config; + target->backend = backend; + return target; + } } + return NULL; +} - if (bench->isSuitableFor(Benchmark::kRaster_Backend)) { - const int w = bench->getSize().fX, - h = bench->getSize().fY; +// Append all targets that are suitable for bench. +static void create_targets(Benchmark* bench, SkTDArray* targets) { + const int w = bench->getSize().fX, + h = bench->getSize().fY; + const SkImageInfo _8888 = { w, h, kN32_SkColorType, kPremul_SkAlphaType }, + _565 = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType }; - if (push_config_if_enabled("8888", configs)) { - const SkImageInfo info = { w, h, kN32_SkColorType, kPremul_SkAlphaType }; - surfaces->push(SkSurface::NewRaster(info)); + #define CPU_TARGET(config, backend, code) \ + if (Target* t = is_enabled(bench, Benchmark::backend, #config)) { \ + t->surface.reset(code); \ + targets->push(t); \ } + CPU_TARGET(nonrendering, kNonRendering_Backend, NULL) + CPU_TARGET(8888, kRaster_Backend, SkSurface::NewRaster(_8888)) + CPU_TARGET(565, kRaster_Backend, SkSurface::NewRaster(_565)) - if (push_config_if_enabled("565", configs)) { - const SkImageInfo info = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType }; - surfaces->push(SkSurface::NewRaster(info)); +#if SK_SUPPORT_GPU + #define GPU_TARGET(config, ctxType, info, samples) \ + if (Target* t = is_enabled(bench, Benchmark::kGPU_Backend, #config)) { \ + t->surface.reset(SkSurface::NewRenderTarget(gGrFactory.get(ctxType), info, samples)); \ + t->gl = gGrFactory.getGLContext(ctxType); \ + targets->push(t); \ } - } + GPU_TARGET(gpu, GrContextFactory::kNative_GLContextType, _8888, 0) + GPU_TARGET(msaa4, GrContextFactory::kNative_GLContextType, _8888, 4) + GPU_TARGET(msaa16, GrContextFactory::kNative_GLContextType, _8888, 16) + GPU_TARGET(nvprmsaa4, GrContextFactory::kNVPR_GLContextType, _8888, 4) + GPU_TARGET(nvprmsaa16, GrContextFactory::kNVPR_GLContextType, _8888, 16) + GPU_TARGET(debug, GrContextFactory::kDebug_GLContextType, _8888, 0) + GPU_TARGET(nullgpu, GrContextFactory::kNull_GLContextType, _8888, 0) + #if SK_ANGLE + GPU_TARGET(angle, GrContextFactory::kANGLE_GLContextType, _8888, 0) + #endif +#endif } int tool_main(int argc, char** argv); @@ -127,13 +224,16 @@ int tool_main(int argc, char** argv) { SkCommandLineFlags::Parse(argc, argv); const double overhead = estimate_timer_overhead(); + SkAutoTMalloc samples(FLAGS_samples); + + // TODO: display add median, use it in --quiet mode if (FLAGS_verbose) { // No header. } else if (FLAGS_quiet) { SkDebugf("min\tbench\tconfig\n"); } else { - SkDebugf("loops\tmin\tmean\tmax\tstddev\tbench\tconfig\n"); + SkDebugf("loops\tmin\tmean\tmax\tstddev\tconfig\tbench\n"); } for (const BenchRegistry* r = BenchRegistry::Head(); r != NULL; r = r->next()) { @@ -142,38 +242,31 @@ int tool_main(int argc, char** argv) { continue; } - SkTDArray surfaces; - SkTDArray configs; - create_surfaces(bench.get(), &surfaces, &configs); + SkTDArray targets; + create_targets(bench.get(), &targets); bench->preDraw(); - for (int j = 0; j < surfaces.count(); j++) { - SkCanvas* canvas = surfaces[j] ? surfaces[j]->getCanvas() : NULL; - const char* config = configs[j]; - - bench->draw(1, canvas); // Just paranoid warmup. - safe_flush(canvas); - const int loops = guess_loops(overhead, bench.get(), canvas); - - SkAutoTMalloc samples(FLAGS_samples); - WallTimer timer; - for (int i = 0; i < FLAGS_samples; i++) { - timer.start(); - bench->draw(loops, canvas); - safe_flush(canvas); - timer.end(); - samples[i] = timer.fWall / loops; - } + for (int j = 0; j < targets.count(); j++) { + SkCanvas* canvas = targets[j]->surface.get() ? targets[j]->surface->getCanvas() : NULL; + + const int loops = +#if SK_SUPPORT_GPU + Benchmark::kGPU_Backend == targets[j]->backend + ? gpu_bench(targets[j]->gl, bench.get(), canvas, samples.get()) + : +#endif + cpu_bench( overhead, bench.get(), canvas, samples.get()); Stats stats(samples.get(), FLAGS_samples); + const char* config = targets[j]->config; if (FLAGS_verbose) { for (int i = 0; i < FLAGS_samples; i++) { SkDebugf("%s ", humanize(samples[i]).c_str()); } SkDebugf("%s\n", bench->getName()); } else if (FLAGS_quiet) { - if (configs.count() == 1) { + if (targets.count() == 1) { config = ""; // Only print the config if we run the same bench on more than one. } SkDebugf("%s\t%s\t%s\n", humanize(stats.min).c_str(), bench->getName(), config); @@ -185,12 +278,12 @@ int tool_main(int argc, char** argv) { , humanize(stats.mean).c_str() , humanize(stats.max).c_str() , stddev_percent - , bench->getName() , config + , bench->getName() ); } } - surfaces.deleteAll(); + targets.deleteAll(); } return 0;