From: Mike Klein Date: Sat, 7 Jan 2017 11:09:43 +0000 (-0500) Subject: SkXbyak: loop inside, only body X-Git-Tag: accepted/tizen/5.0/unified/20181102.025319~55^2~956 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1523dbdd7211561957eec466c87af84f1f2bf5c5;p=platform%2Fupstream%2FlibSkiaSharp.git SkXbyak: loop inside, only body SkXbyak_… 927 …JITCompiled 1x …Interpreted 1.33x …HandWritten 1.97x CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD Change-Id: I486bbc341a38354345bfcf3d6150d1628f83f186 Reviewed-on: https://skia-review.googlesource.com/6726 Reviewed-by: Herb Derby Commit-Queue: Mike Klein --- diff --git a/src/opts/SkXbyak.cpp b/src/opts/SkXbyak.cpp index 59863e9..501d687 100644 --- a/src/opts/SkXbyak.cpp +++ b/src/opts/SkXbyak.cpp @@ -33,16 +33,15 @@ namespace { return nullptr; } - Pipeline(const SkRasterPipeline::Stage* stages, int n, bool* supported) { + Pipeline(const SkRasterPipeline::Stage* stages, int nstages, bool* supported) { // Set up some register name aliases. - // y = rsi, tail = rdx; - auto x = rdi; + // y = rsi + auto x = rdi, n = rdx; auto r = ymm0, g = ymm1, b = ymm2, a = ymm3, dr = ymm4, dg = ymm5, db = ymm6, da = ymm7; - Xbyak::Label floatOneStorage; - - //trap(); + Xbyak::Label floatOneStorage, + start; // TODO: set up (x+0.5,y+0.5) in (r,g) vxorps(r,r); @@ -54,7 +53,14 @@ namespace { vxorps(db,db); vxorps(da,da); - for (int i = 0; i < n; i++) { + auto zero = ymm14, + one = ymm15; + vxorps(zero, zero); + vbroadcastss(one, ptr[rip + floatOneStorage]); + + L(start); + //trap(); + for (int i = 0; i < nstages; i++) { switch(stages[i].stage) { case SkRasterPipeline::load_f16: mov(rax, (size_t)stages[i].ctx); @@ -77,11 +83,9 @@ namespace { break; case SkRasterPipeline::unpremul: - vxorps(ymm8, ymm8); // ymm8: 0 - vcmpeqps(ymm10, ymm8, a); // ymm10: a == 0 - vbroadcastss(ymm9, ptr[rip + floatOneStorage]); // ymm9: 1.0f - vdivps(ymm11, ymm9, a); // ymm11: 1/a - vblendvps(ymm10, ymm10, ymm8, ymm11); // ymm10: (a==0) ? 0 : 1/a + vcmpeqps(ymm10, zero, a); // ymm10: a == 0 + vdivps(ymm11, one, a); // ymm11: 1/a + vblendvps(ymm10, ymm10, zero, ymm11); // ymm10: (a==0) ? 0 : 1/a vmulps(r, r, ymm10); vmulps(g, g, ymm10); vmulps(b, b, ymm10); @@ -112,6 +116,9 @@ namespace { return; } } + add(x, 8); + cmp(x, n); + jl(start); vzeroupper(); ret(); @@ -140,14 +147,7 @@ std::function SkRasterPipeline::jit() const { return [pipeline] (size_t x, size_t y, size_t n) { auto call = pipeline->getCode(); //printf("fn addr: %p\n", (void*)call); - while (n >= 8) { - call(x,y,0); - x += 8; - n -= 8; - } - if (n) { - call(x,y,n); - } + call(x,y,n); }; } #if 0