2 * Copyright 2014 Google Inc.
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
10 #include "Benchmark.h"
11 #include "CrashHandler.h"
13 #include "ProcStats.h"
14 #include "ResultsWriter.h"
15 #include "RecordingBench.h"
20 #include "SkBBoxHierarchy.h"
22 #include "SkCommonFlags.h"
23 #include "SkForceLinking.h"
24 #include "SkGraphics.h"
26 #include "SkPictureRecorder.h"
28 #include "SkSurface.h"
31 #include "gl/GrGLDefines.h"
32 #include "GrContextFactory.h"
33 SkAutoTDelete<GrContextFactory> gGrFactory;
36 __SK_FORCE_IMAGE_DECODER_LINKING;
38 static const int kAutoTuneLoops = 0;
40 static const int kDefaultLoops =
47 static SkString loops_help_txt() {
49 help.printf("Number of times to run each bench. Set this to %d to auto-"
50 "tune for each bench. Timings are only reported when auto-tuning.",
55 DEFINE_int32(loops, kDefaultLoops, loops_help_txt().c_str());
57 DEFINE_int32(samples, 10, "Number of samples to measure for each bench.");
58 DEFINE_int32(overheadLoops, 100000, "Loops to estimate timer overhead.");
59 DEFINE_double(overheadGoal, 0.0001,
60 "Loop until timer overhead is at most this fraction of our measurments.");
61 DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU.");
62 DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allows to lag.");
63 DEFINE_bool(gpuCompressAlphaMasks, false, "Compress masks generated from falling back to "
64 "software path rendering.");
66 DEFINE_string(outResultsFile, "", "If given, write results here as JSON.");
67 DEFINE_int32(maxCalibrationAttempts, 3,
68 "Try up to this many times to guess loops for a bench, or skip the bench.");
69 DEFINE_int32(maxLoops, 1000000, "Never run a bench more times than this.");
70 DEFINE_string(clip, "0,0,1000,1000", "Clip for SKPs.");
71 DEFINE_string(scales, "1.0", "Space-separated scales for SKPs.");
72 DEFINE_bool(bbh, true, "Build a BBH for SKPs?");
73 DEFINE_int32(flushEvery, 10, "Flush --outResultsFile every Nth run.");
75 static SkString humanize(double ms) {
76 if (FLAGS_verbose) return SkStringPrintf("%llu", (uint64_t)(ms*1e6));
77 if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3);
78 if (ms < 1e-3) return SkStringPrintf("%.3gns", ms*1e6);
79 #ifdef SK_BUILD_FOR_WIN
80 if (ms < 1) return SkStringPrintf("%.3gus", ms*1e3);
82 if (ms < 1) return SkStringPrintf("%.3gµs", ms*1e3);
84 return SkStringPrintf("%.3gms", ms);
86 #define HUMANIZE(ms) humanize(ms).c_str()
88 static double time(int loops, Benchmark* bench, SkCanvas* canvas, SkGLContext* gl) {
90 canvas->clear(SK_ColorWHITE);
95 bench->draw(loops, canvas);
110 static double estimate_timer_overhead() {
112 for (int i = 0; i < FLAGS_overheadLoops; i++) {
113 overhead += time(1, NULL, NULL, NULL);
115 return overhead / FLAGS_overheadLoops;
118 static int detect_forever_loops(int loops) {
119 // look for a magic run-forever value
126 static int clamp_loops(int loops) {
128 SkDebugf("ERROR: clamping loops from %d to 1. "
129 "There's probably something wrong with the bench.\n", loops);
132 if (loops > FLAGS_maxLoops) {
133 SkDebugf("WARNING: clamping loops from %d to FLAGS_maxLoops, %d.\n", loops, FLAGS_maxLoops);
134 return FLAGS_maxLoops;
139 static bool write_canvas_png(SkCanvas* canvas, const SkString& filename) {
140 if (filename.isEmpty()) {
143 if (kUnknown_SkColorType == canvas->imageInfo().colorType()) {
147 bmp.setInfo(canvas->imageInfo());
148 if (!canvas->readPixels(&bmp, 0, 0)) {
149 SkDebugf("Can't read canvas pixels.\n");
152 SkString dir = SkOSPath::Dirname(filename.c_str());
153 if (!sk_mkdir(dir.c_str())) {
154 SkDebugf("Can't make dir %s.\n", dir.c_str());
157 SkFILEWStream stream(filename.c_str());
158 if (!stream.isValid()) {
159 SkDebugf("Can't write %s.\n", filename.c_str());
162 if (!SkImageEncoder::EncodeStream(&stream, bmp, SkImageEncoder::kPNG_Type, 100)) {
163 SkDebugf("Can't encode a PNG.\n");
169 static int kFailedLoops = -2;
170 static int cpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas, double* samples) {
171 // First figure out approximately how many loops of bench it takes to make overhead negligible.
172 double bench_plus_overhead = 0.0;
174 if (kAutoTuneLoops == FLAGS_loops) {
175 while (bench_plus_overhead < overhead) {
176 if (round++ == FLAGS_maxCalibrationAttempts) {
177 SkDebugf("WARNING: Can't estimate loops for %s (%s vs. %s); skipping.\n",
178 bench->getUniqueName(), HUMANIZE(bench_plus_overhead), HUMANIZE(overhead));
181 bench_plus_overhead = time(1, bench, canvas, NULL);
185 // Later we'll just start and stop the timer once but loop N times.
186 // We'll pick N to make timer overhead negligible:
189 // ------------------------- < FLAGS_overheadGoal
190 // overhead + N * Bench Time
192 // where bench_plus_overhead ≈ overhead + Bench Time.
194 // Doing some math, we get:
196 // (overhead / FLAGS_overheadGoal) - overhead
197 // ------------------------------------------ < N
198 // bench_plus_overhead - overhead)
200 // Luckily, this also works well in practice. :)
201 int loops = FLAGS_loops;
202 if (kAutoTuneLoops == loops) {
203 const double numer = overhead / FLAGS_overheadGoal - overhead;
204 const double denom = bench_plus_overhead - overhead;
205 loops = (int)ceil(numer / denom);
206 loops = clamp_loops(loops);
208 loops = detect_forever_loops(loops);
211 for (int i = 0; i < FLAGS_samples; i++) {
212 samples[i] = time(loops, bench, canvas, NULL) / loops;
218 static int gpu_bench(SkGLContext* gl,
223 // Make sure we're done with whatever came before.
224 SK_GL(*gl, Finish());
226 // First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs.
227 int loops = FLAGS_loops;
228 if (kAutoTuneLoops == loops) {
232 if (1<<30 == loops) {
233 // We're about to wrap. Something's wrong with the bench.
238 // If the GPU lets frames lag at all, we need to make sure we're timing
239 // _this_ round, not still timing last round. We force this by looping
240 // more times than any reasonable GPU will allow frames to lag.
241 for (int i = 0; i < FLAGS_gpuFrameLag; i++) {
242 elapsed = time(loops, bench, canvas, gl);
244 } while (elapsed < FLAGS_gpuMs);
246 // We've overshot at least a little. Scale back linearly.
247 loops = (int)ceil(loops * FLAGS_gpuMs / elapsed);
248 loops = clamp_loops(loops);
250 // Might as well make sure we're not still timing our calibration.
251 SK_GL(*gl, Finish());
253 loops = detect_forever_loops(loops);
256 // Pretty much the same deal as the calibration: do some warmup to make
257 // sure we're timing steady-state pipelined frames.
258 for (int i = 0; i < FLAGS_gpuFrameLag; i++) {
259 time(loops, bench, canvas, gl);
262 // Now, actually do the timing!
263 for (int i = 0; i < FLAGS_samples; i++) {
264 samples[i] = time(loops, bench, canvas, gl) / loops;
270 static SkString to_lower(const char* str) {
272 for (size_t i = 0; i < lower.size(); i++) {
273 lower[i] = tolower(lower[i]);
280 Benchmark::Backend backend;
285 GrContextFactory::GLContextType ctxType;
292 explicit Target(const Config& c) : config(c) {}
294 SkAutoTDelete<SkSurface> surface;
300 static bool is_cpu_config_allowed(const char* name) {
301 for (int i = 0; i < FLAGS_config.count(); i++) {
302 if (to_lower(FLAGS_config[i]).equals(name)) {
310 static bool is_gpu_config_allowed(const char* name, GrContextFactory::GLContextType ctxType,
312 if (!is_cpu_config_allowed(name)) {
315 if (const GrContext* ctx = gGrFactory->get(ctxType)) {
316 return sampleCnt <= ctx->getMaxSampleCount();
323 #define kBogusGLContextType GrContextFactory::kNative_GLContextType
325 #define kBogusGLContextType 0
328 // Append all configs that are enabled and supported.
329 static void create_configs(SkTDArray<Config>* configs) {
330 #define CPU_CONFIG(name, backend, color, alpha) \
331 if (is_cpu_config_allowed(#name)) { \
332 Config config = { #name, Benchmark::backend, color, alpha, 0, kBogusGLContextType }; \
333 configs->push(config); \
337 CPU_CONFIG(nonrendering, kNonRendering_Backend, kUnknown_SkColorType, kUnpremul_SkAlphaType)
338 CPU_CONFIG(8888, kRaster_Backend, kN32_SkColorType, kPremul_SkAlphaType)
339 CPU_CONFIG(565, kRaster_Backend, kRGB_565_SkColorType, kOpaque_SkAlphaType)
343 #define GPU_CONFIG(name, ctxType, samples) \
344 if (is_gpu_config_allowed(#name, GrContextFactory::ctxType, samples)) { \
347 Benchmark::kGPU_Backend, \
349 kPremul_SkAlphaType, \
351 GrContextFactory::ctxType }; \
352 configs->push(config); \
356 GPU_CONFIG(gpu, kNative_GLContextType, 0)
357 GPU_CONFIG(msaa4, kNative_GLContextType, 4)
358 GPU_CONFIG(msaa16, kNative_GLContextType, 16)
359 GPU_CONFIG(nvprmsaa4, kNVPR_GLContextType, 4)
360 GPU_CONFIG(nvprmsaa16, kNVPR_GLContextType, 16)
361 GPU_CONFIG(debug, kDebug_GLContextType, 0)
362 GPU_CONFIG(nullgpu, kNull_GLContextType, 0)
364 GPU_CONFIG(angle, kANGLE_GLContextType, 0)
370 // If bench is enabled for config, returns a Target* for it, otherwise NULL.
371 static Target* is_enabled(Benchmark* bench, const Config& config) {
372 if (!bench->isSuitableFor(config.backend)) {
376 SkImageInfo info = SkImageInfo::Make(bench->getSize().fX, bench->getSize().fY,
377 config.color, config.alpha);
379 Target* target = new Target(config);
381 if (Benchmark::kRaster_Backend == config.backend) {
382 target->surface.reset(SkSurface::NewRaster(info));
385 else if (Benchmark::kGPU_Backend == config.backend) {
386 target->surface.reset(SkSurface::NewRenderTarget(gGrFactory->get(config.ctxType), info,
388 target->gl = gGrFactory->getGLContext(config.ctxType);
392 if (Benchmark::kNonRendering_Backend != config.backend && !target->surface.get()) {
399 // Creates targets for a benchmark and a set of configs.
400 static void create_targets(SkTDArray<Target*>* targets, Benchmark* b,
401 const SkTDArray<Config>& configs) {
402 for (int i = 0; i < configs.count(); ++i) {
403 if (Target* t = is_enabled(b, configs[i])) {
411 static void fill_gpu_options(ResultsWriter* log, SkGLContext* ctx) {
412 const GrGLubyte* version;
413 SK_GL_RET(*ctx, version, GetString(GR_GL_VERSION));
414 log->configOption("GL_VERSION", (const char*)(version));
416 SK_GL_RET(*ctx, version, GetString(GR_GL_RENDERER));
417 log->configOption("GL_RENDERER", (const char*) version);
419 SK_GL_RET(*ctx, version, GetString(GR_GL_VENDOR));
420 log->configOption("GL_VENDOR", (const char*) version);
422 SK_GL_RET(*ctx, version, GetString(GR_GL_SHADING_LANGUAGE_VERSION));
423 log->configOption("GL_SHADING_LANGUAGE_VERSION", (const char*) version);
427 class BenchmarkStream {
429 BenchmarkStream() : fBenches(BenchRegistry::Head())
430 , fGMs(skiagm::GMRegistry::Head())
431 , fCurrentRecording(0)
434 for (int i = 0; i < FLAGS_skps.count(); i++) {
435 if (SkStrEndsWith(FLAGS_skps[i], ".skp")) {
436 fSKPs.push_back() = FLAGS_skps[i];
438 SkOSFile::Iter it(FLAGS_skps[i], ".skp");
440 while (it.next(&path)) {
441 fSKPs.push_back() = SkOSPath::Join(FLAGS_skps[0], path.c_str());
446 if (4 != sscanf(FLAGS_clip[0], "%d,%d,%d,%d",
447 &fClip.fLeft, &fClip.fTop, &fClip.fRight, &fClip.fBottom)) {
448 SkDebugf("Can't parse %s from --clip as an SkIRect.\n", FLAGS_clip[0]);
452 for (int i = 0; i < FLAGS_scales.count(); i++) {
453 if (1 != sscanf(FLAGS_scales[i], "%f", &fScales.push_back())) {
454 SkDebugf("Can't parse %s from --scales as an SkScalar.\n", FLAGS_scales[i]);
460 static bool ReadPicture(const char* path, SkAutoTUnref<SkPicture>* pic) {
461 // Not strictly necessary, as it will be checked again later,
462 // but helps to avoid a lot of pointless work if we're going to skip it.
463 if (SkCommandLineFlags::ShouldSkip(FLAGS_match, path)) {
467 SkAutoTUnref<SkStream> stream(SkStream::NewFromFile(path));
468 if (stream.get() == NULL) {
469 SkDebugf("Could not read %s.\n", path);
473 pic->reset(SkPicture::CreateFromStream(stream.get()));
474 if (pic->get() == NULL) {
475 SkDebugf("Could not read %s as an SkPicture.\n", path);
483 Benchmark* bench = fBenches->factory()(NULL);
484 fBenches = fBenches->next();
485 fSourceType = "bench";
486 fBenchType = "micro";
491 SkAutoTDelete<skiagm::GM> gm(fGMs->factory()(NULL));
493 if (gm->getFlags() & skiagm::GM::kAsBench_Flag) {
495 fBenchType = "micro";
496 return SkNEW_ARGS(GMBench, (gm.detach()));
500 // First add all .skps as RecordingBenches.
501 while (fCurrentRecording < fSKPs.count()) {
502 const SkString& path = fSKPs[fCurrentRecording++];
503 SkAutoTUnref<SkPicture> pic;
504 if (!ReadPicture(path.c_str(), &pic)) {
507 SkString name = SkOSPath::Basename(path.c_str());
509 fBenchType = "recording";
510 return SkNEW_ARGS(RecordingBench, (name.c_str(), pic.get(), FLAGS_bbh));
513 // Then once each for each scale as SKPBenches (playback).
514 while (fCurrentScale < fScales.count()) {
515 while (fCurrentSKP < fSKPs.count()) {
516 const SkString& path = fSKPs[fCurrentSKP++];
517 SkAutoTUnref<SkPicture> pic;
518 if (!ReadPicture(path.c_str(), &pic)) {
522 // The SKP we read off disk doesn't have a BBH. Re-record so it grows one.
523 SkRTreeFactory factory;
524 SkPictureRecorder recorder;
525 pic->playback(recorder.beginRecording(pic->cullRect().width(),
526 pic->cullRect().height(),
528 pic.reset(recorder.endRecording());
530 SkString name = SkOSPath::Basename(path.c_str());
532 fBenchType = "playback";
533 return SkNEW_ARGS(SKPBench,
534 (name.c_str(), pic.get(), fClip, fScales[fCurrentScale]));
543 void fillCurrentOptions(ResultsWriter* log) const {
544 log->configOption("source_type", fSourceType);
545 log->configOption("bench_type", fBenchType);
546 if (0 == strcmp(fSourceType, "skp")) {
547 log->configOption("clip",
548 SkStringPrintf("%d %d %d %d", fClip.fLeft, fClip.fTop,
549 fClip.fRight, fClip.fBottom).c_str());
550 log->configOption("scale", SkStringPrintf("%.2g", fScales[fCurrentScale]).c_str());
555 const BenchRegistry* fBenches;
556 const skiagm::GMRegistry* fGMs;
558 SkTArray<SkScalar> fScales;
559 SkTArray<SkString> fSKPs;
561 const char* fSourceType; // What we're benching: bench, GM, SKP, ...
562 const char* fBenchType; // How we bench it: micro, recording, playback, ...
563 int fCurrentRecording;
568 int nanobench_main();
569 int nanobench_main() {
574 GrContext::Options grContextOpts;
575 grContextOpts.fDrawPathToCompressedTexture = FLAGS_gpuCompressAlphaMasks;
576 gGrFactory.reset(SkNEW_ARGS(GrContextFactory, (grContextOpts)));
579 if (FLAGS_veryVerbose) {
580 FLAGS_verbose = true;
583 if (kAutoTuneLoops != FLAGS_loops) {
585 FLAGS_gpuFrameLag = 0;
588 if (!FLAGS_writePath.isEmpty()) {
589 SkDebugf("Writing files to %s.\n", FLAGS_writePath[0]);
590 if (!sk_mkdir(FLAGS_writePath[0])) {
591 SkDebugf("Could not create %s. Files won't be written.\n", FLAGS_writePath[0]);
592 FLAGS_writePath.set(0, NULL);
596 SkAutoTDelete<ResultsWriter> log(SkNEW(ResultsWriter));
597 if (!FLAGS_outResultsFile.isEmpty()) {
598 log.reset(SkNEW(NanoJSONResultsWriter(FLAGS_outResultsFile[0])));
601 if (1 == FLAGS_properties.count() % 2) {
602 SkDebugf("ERROR: --properties must be passed with an even number of arguments.\n");
605 for (int i = 1; i < FLAGS_properties.count(); i += 2) {
606 log->property(FLAGS_properties[i-1], FLAGS_properties[i]);
609 if (1 == FLAGS_key.count() % 2) {
610 SkDebugf("ERROR: --key must be passed with an even number of arguments.\n");
613 for (int i = 1; i < FLAGS_key.count(); i += 2) {
614 log->key(FLAGS_key[i-1], FLAGS_key[i]);
617 const double overhead = estimate_timer_overhead();
618 SkDebugf("Timer overhead: %s\n", HUMANIZE(overhead));
620 SkAutoTMalloc<double> samples(FLAGS_samples);
622 if (kAutoTuneLoops != FLAGS_loops) {
623 SkDebugf("Fixed number of loops; times would only be misleading so we won't print them.\n");
624 } else if (FLAGS_verbose) {
626 } else if (FLAGS_quiet) {
627 SkDebugf("median\tbench\tconfig\n");
629 SkDebugf("maxrss\tloops\tmin\tmedian\tmean\tmax\tstddev\t%-*s\tconfig\tbench\n",
630 FLAGS_samples, "samples");
633 SkTDArray<Config> configs;
634 create_configs(&configs);
637 BenchmarkStream benchStream;
638 while (Benchmark* b = benchStream.next()) {
639 SkAutoTDelete<Benchmark> bench(b);
640 if (SkCommandLineFlags::ShouldSkip(FLAGS_match, bench->getUniqueName())) {
644 SkTDArray<Target*> targets;
645 create_targets(&targets, bench.get(), configs);
647 if (!targets.isEmpty()) {
648 log->bench(bench->getUniqueName(), bench->getSize().fX, bench->getSize().fY);
651 for (int j = 0; j < targets.count(); j++) {
652 SkCanvas* canvas = targets[j]->surface.get() ? targets[j]->surface->getCanvas() : NULL;
653 const char* config = targets[j]->config.name;
657 Benchmark::kGPU_Backend == targets[j]->config.backend
658 ? gpu_bench(targets[j]->gl, bench.get(), canvas, samples.get())
661 cpu_bench( overhead, bench.get(), canvas, samples.get());
663 if (canvas && !FLAGS_writePath.isEmpty() && FLAGS_writePath[0]) {
664 SkString pngFilename = SkOSPath::Join(FLAGS_writePath[0], config);
665 pngFilename = SkOSPath::Join(pngFilename.c_str(), bench->getUniqueName());
666 pngFilename.append(".png");
667 write_canvas_png(canvas, pngFilename);
670 if (kFailedLoops == loops) {
671 // Can't be timed. A warning note has already been printed.
675 Stats stats(samples.get(), FLAGS_samples);
677 log->configOption("name", bench->getName());
678 benchStream.fillCurrentOptions(log.get());
680 if (Benchmark::kGPU_Backend == targets[j]->config.backend) {
681 fill_gpu_options(log.get(), targets[j]->gl);
684 log->timer("min_ms", stats.min);
685 log->timer("median_ms", stats.median);
686 log->timer("mean_ms", stats.mean);
687 log->timer("max_ms", stats.max);
688 log->timer("stddev_ms", sqrt(stats.var));
689 if (runs++ % FLAGS_flushEvery == 0) {
693 if (kAutoTuneLoops != FLAGS_loops) {
694 if (targets.count() == 1) {
695 config = ""; // Only print the config if we run the same bench on more than one.
697 SkDebugf("%4dM\t%s\t%s\n"
698 , sk_tools::getMaxResidentSetSizeMB()
699 , bench->getUniqueName()
701 } else if (FLAGS_verbose) {
702 for (int i = 0; i < FLAGS_samples; i++) {
703 SkDebugf("%s ", HUMANIZE(samples[i]));
705 SkDebugf("%s\n", bench->getUniqueName());
706 } else if (FLAGS_quiet) {
707 if (targets.count() == 1) {
708 config = ""; // Only print the config if we run the same bench on more than one.
710 SkDebugf("%s\t%s\t%s\n", HUMANIZE(stats.median), bench->getUniqueName(), config);
712 const double stddev_percent = 100 * sqrt(stats.var) / stats.mean;
713 SkDebugf("%4dM\t%d\t%s\t%s\t%s\t%s\t%.0f%%\t%s\t%s\t%s\n"
714 , sk_tools::getMaxResidentSetSizeMB()
716 , HUMANIZE(stats.min)
717 , HUMANIZE(stats.median)
718 , HUMANIZE(stats.mean)
719 , HUMANIZE(stats.max)
723 , bench->getUniqueName()
726 #if SK_SUPPORT_GPU && GR_CACHE_STATS
727 if (FLAGS_veryVerbose &&
728 Benchmark::kGPU_Backend == targets[j]->config.backend) {
729 gGrFactory->get(targets[j]->config.ctxType)->printCacheStats();
736 if (FLAGS_abandonGpuContext) {
737 gGrFactory->abandonContexts();
739 if (FLAGS_resetGpuContext || FLAGS_abandonGpuContext) {
740 gGrFactory->destroyContexts();
748 #if !defined SK_BUILD_FOR_IOS
749 int main(int argc, char** argv) {
750 SkCommandLineFlags::Parse(argc, argv);
751 return nanobench_main();