dither stage
authorMike Klein <mtklein@chromium.org>
Wed, 3 May 2017 17:05:13 +0000 (13:05 -0400)
committerSkia Commit-Bot <skia-commit-bot@chromium.org>
Wed, 3 May 2017 18:16:53 +0000 (18:16 +0000)
I think we can dither generically as a pipeline stage.

I'm not married to where the dither happens, or the implementation,
which is mostly cribbed from
https://en.wikipedia.org/wiki/Ordered_dithering.

BUG=skia:3302,skia:6224

Change-Id: If7f6b22a523ca0b34cb03c0aa97b6734c34e0133
Reviewed-on: https://skia-review.googlesource.com/15161
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Florin Malita <fmalita@chromium.org>
Reviewed-by: Herb Derby <herb@google.com>
src/core/SkRasterPipeline.h
src/core/SkRasterPipelineBlitter.cpp
src/jumper/SkJumper.h
src/jumper/SkJumper_generated.S
src/jumper/SkJumper_generated_win.S
src/jumper/SkJumper_stages.cpp

index e8dd950f796fd6dc61810c36a7ffcd26aa4a5a79..361e8336214d9f518e94d254bf953e2ca041d50f 100644 (file)
@@ -62,7 +62,7 @@
     M(unpremul) M(premul)                                        \
     M(set_rgb) M(swap_rb)                                        \
     M(from_srgb) M(to_srgb)                                      \
-    M(constant_color) M(seed_shader)                             \
+    M(constant_color) M(seed_shader) M(dither)                   \
     M(load_a8)   M(store_a8)                                     \
     M(load_g8)                                                   \
     M(load_565)  M(store_565)                                    \
index a926b43efa0e779303679797a54acd6165a87e0d..576ba4ba191b7b998d9f2c5e88ec3e655d5ddf7c 100644 (file)
@@ -16,7 +16,7 @@
 #include "SkRasterPipeline.h"
 #include "SkShader.h"
 #include "SkUtils.h"
-
+#include "../jumper/SkJumper.h"
 
 class SkRasterPipelineBlitter : public SkBlitter {
 public:
@@ -132,6 +132,30 @@ SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst,
         is_opaque = is_opaque && (colorFilter->getFlags() & SkColorFilter::kAlphaUnchanged_Flag);
     }
 
+    // TODO: Think more about under what conditions we dither:
+    //   - if we're drawing anything into 565 and the user has asked us to dither, or
+    //   - if we're drawing a gradient into 565 or 8888.
+    // TODO: move this later in the pipeline, perhaps the first thing we do in append_store()?
+    if ((paint.isDither() && dst.info().colorType() == kRGB_565_SkColorType) ||
+        (shader && shader->asAGradient(nullptr) >= SkShader::kLinear_GradientType)) {
+        float rate;
+        switch (dst.info().colorType()) {
+            case   kRGB_565_SkColorType:  rate =  1/63.0f; break;
+            case kBGRA_8888_SkColorType:
+            case kRGBA_8888_SkColorType:  rate = 1/255.0f; break;
+            default:                      rate =     0.0f; break;
+        }
+        if (rate) {
+            auto ctx = alloc->make<SkJumper_DitherCtx>();
+            ctx->y    = &blitter->fCurrentY;
+            ctx->rate = rate;
+            pipeline->append(SkRasterPipeline::dither, ctx);
+            pipeline->append(SkRasterPipeline::clamp_0);
+            pipeline->append(SkRasterPipeline::clamp_a);
+            is_constant = false;
+        }
+    }
+
     if (is_constant) {
         pipeline->append(SkRasterPipeline::store_f32, &paintColor);
         pipeline->run(0,1);
index cd164fe913b3fccff0ae36acbc1fbce750316001..9c805b3b8ee1b2b87cc6c6d928615fbec688fc46 100644 (file)
@@ -94,4 +94,9 @@ struct SkJumper_ParametricTransferFunction {
     float G, A,B,C,D,E,F;
 };
 
+struct SkJumper_DitherCtx {
+    const int* y;
+    float rate;
+};
+
 #endif//SkJumper_DEFINED
index 90b9e29ff24b0ec1eb3064ec4105e13e5d991ab6..71afcd85a94f7a2f7144a48da21f1cfc17fb17d2 100644 (file)
@@ -91,6 +91,53 @@ _sk_seed_shader_aarch64:
   .long  0x6f00e407                          // movi          v7.2d, #0x0
   .long  0xd61f0060                          // br            x3
 
+HIDDEN _sk_dither_aarch64
+.globl _sk_dither_aarch64
+FUNCTION(_sk_dither_aarch64)
+_sk_dither_aarch64:
+  .long  0xa8c10c28                          // ldp           x8, x3, [x1], #16
+  .long  0x3dc00051                          // ldr           q17, [x2]
+  .long  0x4e040c10                          // dup           v16.4s, w0
+  .long  0x4e21da10                          // scvtf         v16.4s, v16.4s
+  .long  0xf9400109                          // ldr           x9, [x8]
+  .long  0x4e31d610                          // fadd          v16.4s, v16.4s, v17.4s
+  .long  0x4f000432                          // movi          v18.4s, #0x1
+  .long  0x4f000454                          // movi          v20.4s, #0x2
+  .long  0x4d40c935                          // ld1r          {v21.4s}, [x9]
+  .long  0x4ea1ba10                          // fcvtzs        v16.4s, v16.4s
+  .long  0x4e321e11                          // and           v17.16b, v16.16b, v18.16b
+  .long  0x4e341e16                          // and           v22.16b, v16.16b, v20.16b
+  .long  0x4f000493                          // movi          v19.4s, #0x4
+  .long  0x4f245631                          // shl           v17.4s, v17.4s, #4
+  .long  0x4f2156d6                          // shl           v22.4s, v22.4s, #1
+  .long  0x4eb11ed1                          // orr           v17.16b, v22.16b, v17.16b
+  .long  0x4e331e16                          // and           v22.16b, v16.16b, v19.16b
+  .long  0x6e351e10                          // eor           v16.16b, v16.16b, v21.16b
+  .long  0x6f3e06d6                          // ushr          v22.4s, v22.4s, #2
+  .long  0x4e321e12                          // and           v18.16b, v16.16b, v18.16b
+  .long  0x4eb61e31                          // orr           v17.16b, v17.16b, v22.16b
+  .long  0x4e341e14                          // and           v20.16b, v16.16b, v20.16b
+  .long  0x4f255652                          // shl           v18.4s, v18.4s, #5
+  .long  0x4e331e10                          // and           v16.16b, v16.16b, v19.16b
+  .long  0x4f225694                          // shl           v20.4s, v20.4s, #2
+  .long  0x4eb21e31                          // orr           v17.16b, v17.16b, v18.16b
+  .long  0x52a79049                          // mov           w9, #0x3c820000
+  .long  0xbd400913                          // ldr           s19, [x8, #8]
+  .long  0x6f3f0610                          // ushr          v16.4s, v16.4s, #1
+  .long  0x4eb41e31                          // orr           v17.16b, v17.16b, v20.16b
+  .long  0x72810429                          // movk          w9, #0x821
+  .long  0x4eb01e30                          // orr           v16.16b, v17.16b, v16.16b
+  .long  0x4e040d36                          // dup           v22.4s, w9
+  .long  0x4f0567f5                          // movi          v21.4s, #0xbf, lsl #24
+  .long  0x4e21da10                          // scvtf         v16.4s, v16.4s
+  .long  0x4e30ced5                          // fmla          v21.4s, v22.4s, v16.4s
+  .long  0x4f9392b0                          // fmul          v16.4s, v21.4s, v19.s[0]
+  .long  0x6e23de10                          // fmul          v16.4s, v16.4s, v3.4s
+  .long  0x4e20d600                          // fadd          v0.4s, v16.4s, v0.4s
+  .long  0x4e21d601                          // fadd          v1.4s, v16.4s, v1.4s
+  .long  0x4e22d602                          // fadd          v2.4s, v16.4s, v2.4s
+  .long  0xd61f0060                          // br            x3
+
 HIDDEN _sk_constant_color_aarch64
 .globl _sk_constant_color_aarch64
 FUNCTION(_sk_constant_color_aarch64)
@@ -2204,9 +2251,9 @@ FUNCTION(_sk_gather_i8_aarch64)
 _sk_gather_i8_aarch64:
   .long  0xaa0103e8                          // mov           x8, x1
   .long  0xf8408429                          // ldr           x9, [x1], #8
-  .long  0xb4000069                          // cbz           x9, 1cf8 <sk_gather_i8_aarch64+0x14>
+  .long  0xb4000069                          // cbz           x9, 1da0 <sk_gather_i8_aarch64+0x14>
   .long  0xaa0903ea                          // mov           x10, x9
-  .long  0x14000003                          // b             1d00 <sk_gather_i8_aarch64+0x1c>
+  .long  0x14000003                          // b             1da8 <sk_gather_i8_aarch64+0x1c>
   .long  0xf940050a                          // ldr           x10, [x8, #8]
   .long  0x91004101                          // add           x1, x8, #0x10
   .long  0xf8410548                          // ldr           x8, [x10], #16
@@ -3055,7 +3102,7 @@ _sk_linear_gradient_aarch64:
   .long  0x4d40c902                          // ld1r          {v2.4s}, [x8]
   .long  0xf9400128                          // ldr           x8, [x9]
   .long  0x4d40c943                          // ld1r          {v3.4s}, [x10]
-  .long  0xb40006c8                          // cbz           x8, 28cc <sk_linear_gradient_aarch64+0x100>
+  .long  0xb40006c8                          // cbz           x8, 2974 <sk_linear_gradient_aarch64+0x100>
   .long  0x6dbf23e9                          // stp           d9, d8, [sp, #-16]!
   .long  0xf9400529                          // ldr           x9, [x9, #8]
   .long  0x6f00e413                          // movi          v19.2d, #0x0
@@ -3106,9 +3153,9 @@ _sk_linear_gradient_aarch64:
   .long  0xd1000508                          // sub           x8, x8, #0x1
   .long  0x6e771fd0                          // bsl           v16.16b, v30.16b, v23.16b
   .long  0x91009129                          // add           x9, x9, #0x24
-  .long  0xb5fffaa8                          // cbnz          x8, 2814 <sk_linear_gradient_aarch64+0x48>
+  .long  0xb5fffaa8                          // cbnz          x8, 28bc <sk_linear_gradient_aarch64+0x48>
   .long  0x6cc123e9                          // ldp           d9, d8, [sp], #16
-  .long  0x14000005                          // b             28dc <sk_linear_gradient_aarch64+0x110>
+  .long  0x14000005                          // b             2984 <sk_linear_gradient_aarch64+0x110>
   .long  0x6f00e414                          // movi          v20.2d, #0x0
   .long  0x6f00e412                          // movi          v18.2d, #0x0
   .long  0x6f00e411                          // movi          v17.2d, #0x0
@@ -3580,6 +3627,57 @@ _sk_seed_shader_vfp4:
   .long  0xf2807010                          // vmov.i32      d7, #0
   .long  0xe12fff1c                          // bx            ip
 
+HIDDEN _sk_dither_vfp4
+.globl _sk_dither_vfp4
+FUNCTION(_sk_dither_vfp4)
+_sk_dither_vfp4:
+  .long  0xe92d4800                          // push          {fp, lr}
+  .long  0xee800b90                          // vdup.32       d16, r0
+  .long  0xf2c02012                          // vmov.i32      d18, #2
+  .long  0xf3fb0620                          // vcvt.f32.s32  d16, d16
+  .long  0xedd21b00                          // vldr          d17, [r2]
+  .long  0xe8911008                          // ldm           r1, {r3, ip}
+  .long  0xf2c03014                          // vmov.i32      d19, #4
+  .long  0xe2811008                          // add           r1, r1, #8
+  .long  0xf2400da1                          // vadd.f32      d16, d16, d17
+  .long  0xe493e004                          // ldr           lr, [r3], #4
+  .long  0xf2c01011                          // vmov.i32      d17, #1
+  .long  0xf4ee5c9f                          // vld1.32       {d21[]}, [lr :32]
+  .long  0xf3fb0720                          // vcvt.s32.f32  d16, d16
+  .long  0xf24041b1                          // vand          d20, d16, d17
+  .long  0xf24061b2                          // vand          d22, d16, d18
+  .long  0xf34051b5                          // veor          d21, d16, d21
+  .long  0xf24001b3                          // vand          d16, d16, d19
+  .long  0xf2e44534                          // vshl.s32      d20, d20, #4
+  .long  0xf2e16536                          // vshl.s32      d22, d22, #1
+  .long  0xf24511b1                          // vand          d17, d21, d17
+  .long  0xf3fe0030                          // vshr.u32      d16, d16, #2
+  .long  0xf26641b4                          // vorr          d20, d22, d20
+  .long  0xf24521b2                          // vand          d18, d21, d18
+  .long  0xf2e51531                          // vshl.s32      d17, d17, #5
+  .long  0xf26401b0                          // vorr          d16, d20, d16
+  .long  0xf24531b3                          // vand          d19, d21, d19
+  .long  0xf26001b1                          // vorr          d16, d16, d17
+  .long  0xf2e22532                          // vshl.s32      d18, d18, #2
+  .long  0xf3ff1033                          // vshr.u32      d17, d19, #1
+  .long  0xf26001b2                          // vorr          d16, d16, d18
+  .long  0xf26001b1                          // vorr          d16, d16, d17
+  .long  0xeddf1b0b                          // vldr          d17, [pc, #44]
+  .long  0xf3fb0620                          // vcvt.f32.s32  d16, d16
+  .long  0xf3400db1                          // vmul.f32      d16, d16, d17
+  .long  0xf3c3161f                          // vmov.i32      d17, #-1090519040
+  .long  0xf2400da1                          // vadd.f32      d16, d16, d17
+  .long  0xf4e31c9f                          // vld1.32       {d17[]}, [r3 :32]
+  .long  0xf3410db0                          // vmul.f32      d16, d17, d16
+  .long  0xf3400d93                          // vmul.f32      d16, d16, d3
+  .long  0xf2000d80                          // vadd.f32      d0, d16, d0
+  .long  0xf2001d81                          // vadd.f32      d1, d16, d1
+  .long  0xf2002d82                          // vadd.f32      d2, d16, d2
+  .long  0xe8bd4800                          // pop           {fp, lr}
+  .long  0xe12fff1c                          // bx            ip
+  .long  0x3c820821                          // .word         0x3c820821
+  .long  0x3c820821                          // .word         0x3c820821
+
 HIDDEN _sk_constant_color_vfp4
 .globl _sk_constant_color_vfp4
 FUNCTION(_sk_constant_color_vfp4)
@@ -6849,7 +6947,7 @@ _sk_linear_gradient_vfp4:
   .long  0xe494c00c                          // ldr           ip, [r4], #12
   .long  0xf4a41c9f                          // vld1.32       {d1[]}, [r4 :32]
   .long  0xe35c0000                          // cmp           ip, #0
-  .long  0x0a000036                          // beq           2d80 <sk_linear_gradient_vfp4+0x110>
+  .long  0x0a000036                          // beq           2e38 <sk_linear_gradient_vfp4+0x110>
   .long  0xe59e3004                          // ldr           r3, [lr, #4]
   .long  0xf2c01010                          // vmov.i32      d17, #0
   .long  0xf2c07010                          // vmov.i32      d23, #0
@@ -6899,12 +6997,12 @@ _sk_linear_gradient_vfp4:
   .long  0xf26371b3                          // vorr          d23, d19, d19
   .long  0xf26481b4                          // vorr          d24, d20, d20
   .long  0xf26561b5                          // vorr          d22, d21, d21
-  .long  0x1affffd3                          // bne           2cbc <sk_linear_gradient_vfp4+0x4c>
+  .long  0x1affffd3                          // bne           2d74 <sk_linear_gradient_vfp4+0x4c>
   .long  0xf26c01bc                          // vorr          d16, d28, d28
   .long  0xf22b11bb                          // vorr          d1, d27, d27
   .long  0xf22a21ba                          // vorr          d2, d26, d26
   .long  0xf22931b9                          // vorr          d3, d25, d25
-  .long  0xea000003                          // b             2d90 <sk_linear_gradient_vfp4+0x120>
+  .long  0xea000003                          // b             2e48 <sk_linear_gradient_vfp4+0x120>
   .long  0xf2c05010                          // vmov.i32      d21, #0
   .long  0xf2c04010                          // vmov.i32      d20, #0
   .long  0xf2c03010                          // vmov.i32      d19, #0
@@ -7437,14 +7535,14 @@ _sk_seed_shader_hsw:
   .byte  197,249,110,199                     // vmovd         %edi,%xmm0
   .byte  196,226,125,88,192                  // vpbroadcastd  %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,217,60,0,0        // vbroadcastss  0x3cd9(%rip),%ymm1        # 3d9c <_sk_callback_hsw+0x127>
+  .byte  196,226,125,24,13,173,61,0,0        // vbroadcastss  0x3dad(%rip),%ymm1        # 3e70 <_sk_callback_hsw+0x127>
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
   .byte  197,252,88,2                        // vaddps        (%rdx),%ymm0,%ymm0
   .byte  196,226,125,24,16                   // vbroadcastss  (%rax),%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  197,236,88,201                      // vaddps        %ymm1,%ymm2,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,21,189,60,0,0        // vbroadcastss  0x3cbd(%rip),%ymm2        # 3da0 <_sk_callback_hsw+0x12b>
+  .byte  196,226,125,24,21,145,61,0,0        // vbroadcastss  0x3d91(%rip),%ymm2        # 3e74 <_sk_callback_hsw+0x12b>
   .byte  197,228,87,219                      // vxorps        %ymm3,%ymm3,%ymm3
   .byte  197,220,87,228                      // vxorps        %ymm4,%ymm4,%ymm4
   .byte  197,212,87,237                      // vxorps        %ymm5,%ymm5,%ymm5
@@ -7452,6 +7550,52 @@ _sk_seed_shader_hsw:
   .byte  197,196,87,255                      // vxorps        %ymm7,%ymm7,%ymm7
   .byte  255,224                             // jmpq          *%rax
 
+HIDDEN _sk_dither_hsw
+.globl _sk_dither_hsw
+FUNCTION(_sk_dither_hsw)
+_sk_dither_hsw:
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  197,121,110,199                     // vmovd         %edi,%xmm8
+  .byte  196,66,125,88,192                   // vpbroadcastd  %xmm8,%ymm8
+  .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
+  .byte  197,60,88,2                         // vaddps        (%rdx),%ymm8,%ymm8
+  .byte  196,65,126,91,192                   // vcvttps2dq    %ymm8,%ymm8
+  .byte  76,139,0                            // mov           (%rax),%r8
+  .byte  196,66,125,88,8                     // vpbroadcastd  (%r8),%ymm9
+  .byte  196,65,61,239,201                   // vpxor         %ymm9,%ymm8,%ymm9
+  .byte  196,98,125,88,21,80,61,0,0          // vpbroadcastd  0x3d50(%rip),%ymm10        # 3e78 <_sk_callback_hsw+0x12f>
+  .byte  196,65,53,219,218                   // vpand         %ymm10,%ymm9,%ymm11
+  .byte  196,193,37,114,243,5                // vpslld        $0x5,%ymm11,%ymm11
+  .byte  196,65,61,219,210                   // vpand         %ymm10,%ymm8,%ymm10
+  .byte  196,193,45,114,242,4                // vpslld        $0x4,%ymm10,%ymm10
+  .byte  196,98,125,88,37,53,61,0,0          // vpbroadcastd  0x3d35(%rip),%ymm12        # 3e7c <_sk_callback_hsw+0x133>
+  .byte  196,98,125,88,45,48,61,0,0          // vpbroadcastd  0x3d30(%rip),%ymm13        # 3e80 <_sk_callback_hsw+0x137>
+  .byte  196,65,53,219,245                   // vpand         %ymm13,%ymm9,%ymm14
+  .byte  196,193,13,114,246,2                // vpslld        $0x2,%ymm14,%ymm14
+  .byte  196,65,61,219,237                   // vpand         %ymm13,%ymm8,%ymm13
+  .byte  196,65,21,254,237                   // vpaddd        %ymm13,%ymm13,%ymm13
+  .byte  196,65,53,219,204                   // vpand         %ymm12,%ymm9,%ymm9
+  .byte  196,193,53,114,209,1                // vpsrld        $0x1,%ymm9,%ymm9
+  .byte  196,65,61,219,196                   // vpand         %ymm12,%ymm8,%ymm8
+  .byte  196,193,61,114,208,2                // vpsrld        $0x2,%ymm8,%ymm8
+  .byte  196,65,21,235,210                   // vpor          %ymm10,%ymm13,%ymm10
+  .byte  196,65,45,235,192                   // vpor          %ymm8,%ymm10,%ymm8
+  .byte  196,65,37,235,214                   // vpor          %ymm14,%ymm11,%ymm10
+  .byte  196,65,61,235,194                   // vpor          %ymm10,%ymm8,%ymm8
+  .byte  196,65,61,235,193                   // vpor          %ymm9,%ymm8,%ymm8
+  .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
+  .byte  196,98,125,24,13,226,60,0,0         // vbroadcastss  0x3ce2(%rip),%ymm9        # 3e84 <_sk_callback_hsw+0x13b>
+  .byte  196,98,125,24,21,221,60,0,0         // vbroadcastss  0x3cdd(%rip),%ymm10        # 3e88 <_sk_callback_hsw+0x13f>
+  .byte  196,66,61,184,209                   // vfmadd231ps   %ymm9,%ymm8,%ymm10
+  .byte  196,98,125,24,64,8                  // vbroadcastss  0x8(%rax),%ymm8
+  .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
+  .byte  197,60,89,195                       // vmulps        %ymm3,%ymm8,%ymm8
+  .byte  197,188,88,192                      // vaddps        %ymm0,%ymm8,%ymm0
+  .byte  197,188,88,201                      // vaddps        %ymm1,%ymm8,%ymm1
+  .byte  197,188,88,210                      // vaddps        %ymm2,%ymm8,%ymm2
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  255,224                             // jmpq          *%rax
+
 HIDDEN _sk_constant_color_hsw
 .globl _sk_constant_color_hsw
 FUNCTION(_sk_constant_color_hsw)
@@ -7480,7 +7624,7 @@ HIDDEN _sk_srcatop_hsw
 FUNCTION(_sk_srcatop_hsw)
 _sk_srcatop_hsw:
   .byte  197,252,89,199                      // vmulps        %ymm7,%ymm0,%ymm0
-  .byte  196,98,125,24,5,109,60,0,0          // vbroadcastss  0x3c6d(%rip),%ymm8        # 3da4 <_sk_callback_hsw+0x12f>
+  .byte  196,98,125,24,5,127,60,0,0          // vbroadcastss  0x3c7f(%rip),%ymm8        # 3e8c <_sk_callback_hsw+0x143>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,226,61,184,196                  // vfmadd231ps   %ymm4,%ymm8,%ymm0
   .byte  197,244,89,207                      // vmulps        %ymm7,%ymm1,%ymm1
@@ -7496,7 +7640,7 @@ HIDDEN _sk_dstatop_hsw
 .globl _sk_dstatop_hsw
 FUNCTION(_sk_dstatop_hsw)
 _sk_dstatop_hsw:
-  .byte  196,98,125,24,5,64,60,0,0           // vbroadcastss  0x3c40(%rip),%ymm8        # 3da8 <_sk_callback_hsw+0x133>
+  .byte  196,98,125,24,5,82,60,0,0           // vbroadcastss  0x3c52(%rip),%ymm8        # 3e90 <_sk_callback_hsw+0x147>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  196,226,101,184,196                 // vfmadd231ps   %ymm4,%ymm3,%ymm0
@@ -7535,7 +7679,7 @@ HIDDEN _sk_srcout_hsw
 .globl _sk_srcout_hsw
 FUNCTION(_sk_srcout_hsw)
 _sk_srcout_hsw:
-  .byte  196,98,125,24,5,231,59,0,0          // vbroadcastss  0x3be7(%rip),%ymm8        # 3dac <_sk_callback_hsw+0x137>
+  .byte  196,98,125,24,5,249,59,0,0          // vbroadcastss  0x3bf9(%rip),%ymm8        # 3e94 <_sk_callback_hsw+0x14b>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -7548,7 +7692,7 @@ HIDDEN _sk_dstout_hsw
 .globl _sk_dstout_hsw
 FUNCTION(_sk_dstout_hsw)
 _sk_dstout_hsw:
-  .byte  196,226,125,24,5,202,59,0,0         // vbroadcastss  0x3bca(%rip),%ymm0        # 3db0 <_sk_callback_hsw+0x13b>
+  .byte  196,226,125,24,5,220,59,0,0         // vbroadcastss  0x3bdc(%rip),%ymm0        # 3e98 <_sk_callback_hsw+0x14f>
   .byte  197,252,92,219                      // vsubps        %ymm3,%ymm0,%ymm3
   .byte  197,228,89,196                      // vmulps        %ymm4,%ymm3,%ymm0
   .byte  197,228,89,205                      // vmulps        %ymm5,%ymm3,%ymm1
@@ -7561,7 +7705,7 @@ HIDDEN _sk_srcover_hsw
 .globl _sk_srcover_hsw
 FUNCTION(_sk_srcover_hsw)
 _sk_srcover_hsw:
-  .byte  196,98,125,24,5,173,59,0,0          // vbroadcastss  0x3bad(%rip),%ymm8        # 3db4 <_sk_callback_hsw+0x13f>
+  .byte  196,98,125,24,5,191,59,0,0          // vbroadcastss  0x3bbf(%rip),%ymm8        # 3e9c <_sk_callback_hsw+0x153>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,194,93,184,192                  // vfmadd231ps   %ymm8,%ymm4,%ymm0
   .byte  196,194,85,184,200                  // vfmadd231ps   %ymm8,%ymm5,%ymm1
@@ -7574,7 +7718,7 @@ HIDDEN _sk_dstover_hsw
 .globl _sk_dstover_hsw
 FUNCTION(_sk_dstover_hsw)
 _sk_dstover_hsw:
-  .byte  196,98,125,24,5,140,59,0,0          // vbroadcastss  0x3b8c(%rip),%ymm8        # 3db8 <_sk_callback_hsw+0x143>
+  .byte  196,98,125,24,5,158,59,0,0          // vbroadcastss  0x3b9e(%rip),%ymm8        # 3ea0 <_sk_callback_hsw+0x157>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  196,226,61,168,196                  // vfmadd213ps   %ymm4,%ymm8,%ymm0
   .byte  196,226,61,168,205                  // vfmadd213ps   %ymm5,%ymm8,%ymm1
@@ -7598,7 +7742,7 @@ HIDDEN _sk_multiply_hsw
 .globl _sk_multiply_hsw
 FUNCTION(_sk_multiply_hsw)
 _sk_multiply_hsw:
-  .byte  196,98,125,24,5,87,59,0,0           // vbroadcastss  0x3b57(%rip),%ymm8        # 3dbc <_sk_callback_hsw+0x147>
+  .byte  196,98,125,24,5,105,59,0,0          // vbroadcastss  0x3b69(%rip),%ymm8        # 3ea4 <_sk_callback_hsw+0x15b>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,208                       // vmulps        %ymm0,%ymm9,%ymm10
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -7646,7 +7790,7 @@ HIDDEN _sk_xor__hsw
 .globl _sk_xor__hsw
 FUNCTION(_sk_xor__hsw)
 _sk_xor__hsw:
-  .byte  196,98,125,24,5,210,58,0,0          // vbroadcastss  0x3ad2(%rip),%ymm8        # 3dc0 <_sk_callback_hsw+0x14b>
+  .byte  196,98,125,24,5,228,58,0,0          // vbroadcastss  0x3ae4(%rip),%ymm8        # 3ea8 <_sk_callback_hsw+0x15f>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -7680,7 +7824,7 @@ _sk_darken_hsw:
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,95,209                  // vmaxps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,90,58,0,0           // vbroadcastss  0x3a5a(%rip),%ymm8        # 3dc4 <_sk_callback_hsw+0x14f>
+  .byte  196,98,125,24,5,108,58,0,0          // vbroadcastss  0x3a6c(%rip),%ymm8        # 3eac <_sk_callback_hsw+0x163>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,194,69,184,216                  // vfmadd231ps   %ymm8,%ymm7,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -7705,7 +7849,7 @@ _sk_lighten_hsw:
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,9,58,0,0            // vbroadcastss  0x3a09(%rip),%ymm8        # 3dc8 <_sk_callback_hsw+0x153>
+  .byte  196,98,125,24,5,27,58,0,0           // vbroadcastss  0x3a1b(%rip),%ymm8        # 3eb0 <_sk_callback_hsw+0x167>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,194,69,184,216                  // vfmadd231ps   %ymm8,%ymm7,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -7733,7 +7877,7 @@ _sk_difference_hsw:
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,172,57,0,0          // vbroadcastss  0x39ac(%rip),%ymm8        # 3dcc <_sk_callback_hsw+0x157>
+  .byte  196,98,125,24,5,190,57,0,0          // vbroadcastss  0x39be(%rip),%ymm8        # 3eb4 <_sk_callback_hsw+0x16b>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,194,69,184,216                  // vfmadd231ps   %ymm8,%ymm7,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -7755,7 +7899,7 @@ _sk_exclusion_hsw:
   .byte  197,236,89,214                      // vmulps        %ymm6,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,106,57,0,0          // vbroadcastss  0x396a(%rip),%ymm8        # 3dd0 <_sk_callback_hsw+0x15b>
+  .byte  196,98,125,24,5,124,57,0,0          // vbroadcastss  0x397c(%rip),%ymm8        # 3eb8 <_sk_callback_hsw+0x16f>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,194,69,184,216                  // vfmadd231ps   %ymm8,%ymm7,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -7765,7 +7909,7 @@ HIDDEN _sk_colorburn_hsw
 .globl _sk_colorburn_hsw
 FUNCTION(_sk_colorburn_hsw)
 _sk_colorburn_hsw:
-  .byte  196,98,125,24,5,88,57,0,0           // vbroadcastss  0x3958(%rip),%ymm8        # 3dd4 <_sk_callback_hsw+0x15f>
+  .byte  196,98,125,24,5,106,57,0,0          // vbroadcastss  0x396a(%rip),%ymm8        # 3ebc <_sk_callback_hsw+0x173>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,216                       // vmulps        %ymm0,%ymm9,%ymm11
   .byte  196,65,44,87,210                    // vxorps        %ymm10,%ymm10,%ymm10
@@ -7823,7 +7967,7 @@ HIDDEN _sk_colordodge_hsw
 FUNCTION(_sk_colordodge_hsw)
 _sk_colordodge_hsw:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
-  .byte  196,98,125,24,13,99,56,0,0          // vbroadcastss  0x3863(%rip),%ymm9        # 3dd8 <_sk_callback_hsw+0x163>
+  .byte  196,98,125,24,13,117,56,0,0         // vbroadcastss  0x3875(%rip),%ymm9        # 3ec0 <_sk_callback_hsw+0x177>
   .byte  197,52,92,215                       // vsubps        %ymm7,%ymm9,%ymm10
   .byte  197,44,89,216                       // vmulps        %ymm0,%ymm10,%ymm11
   .byte  197,52,92,203                       // vsubps        %ymm3,%ymm9,%ymm9
@@ -7876,7 +8020,7 @@ HIDDEN _sk_hardlight_hsw
 .globl _sk_hardlight_hsw
 FUNCTION(_sk_hardlight_hsw)
 _sk_hardlight_hsw:
-  .byte  196,98,125,24,5,132,55,0,0          // vbroadcastss  0x3784(%rip),%ymm8        # 3ddc <_sk_callback_hsw+0x167>
+  .byte  196,98,125,24,5,150,55,0,0          // vbroadcastss  0x3796(%rip),%ymm8        # 3ec4 <_sk_callback_hsw+0x17b>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,216                       // vmulps        %ymm0,%ymm10,%ymm11
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -7927,7 +8071,7 @@ HIDDEN _sk_overlay_hsw
 .globl _sk_overlay_hsw
 FUNCTION(_sk_overlay_hsw)
 _sk_overlay_hsw:
-  .byte  196,98,125,24,5,188,54,0,0          // vbroadcastss  0x36bc(%rip),%ymm8        # 3de0 <_sk_callback_hsw+0x16b>
+  .byte  196,98,125,24,5,206,54,0,0          // vbroadcastss  0x36ce(%rip),%ymm8        # 3ec8 <_sk_callback_hsw+0x17f>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,216                       // vmulps        %ymm0,%ymm10,%ymm11
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -7988,10 +8132,10 @@ _sk_softlight_hsw:
   .byte  196,65,20,88,197                    // vaddps        %ymm13,%ymm13,%ymm8
   .byte  196,65,60,88,192                    // vaddps        %ymm8,%ymm8,%ymm8
   .byte  196,66,61,168,192                   // vfmadd213ps   %ymm8,%ymm8,%ymm8
-  .byte  196,98,125,24,29,199,53,0,0         // vbroadcastss  0x35c7(%rip),%ymm11        # 3de8 <_sk_callback_hsw+0x173>
+  .byte  196,98,125,24,29,217,53,0,0         // vbroadcastss  0x35d9(%rip),%ymm11        # 3ed0 <_sk_callback_hsw+0x187>
   .byte  196,65,20,88,227                    // vaddps        %ymm11,%ymm13,%ymm12
   .byte  196,65,28,89,192                    // vmulps        %ymm8,%ymm12,%ymm8
-  .byte  196,98,125,24,37,184,53,0,0         // vbroadcastss  0x35b8(%rip),%ymm12        # 3dec <_sk_callback_hsw+0x177>
+  .byte  196,98,125,24,37,202,53,0,0         // vbroadcastss  0x35ca(%rip),%ymm12        # 3ed4 <_sk_callback_hsw+0x18b>
   .byte  196,66,21,184,196                   // vfmadd231ps   %ymm12,%ymm13,%ymm8
   .byte  196,65,124,82,245                   // vrsqrtps      %ymm13,%ymm14
   .byte  196,65,124,83,246                   // vrcpps        %ymm14,%ymm14
@@ -8001,7 +8145,7 @@ _sk_softlight_hsw:
   .byte  197,4,194,255,2                     // vcmpleps      %ymm7,%ymm15,%ymm15
   .byte  196,67,13,74,240,240                // vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   .byte  197,116,88,249                      // vaddps        %ymm1,%ymm1,%ymm15
-  .byte  196,98,125,24,5,123,53,0,0          // vbroadcastss  0x357b(%rip),%ymm8        # 3de4 <_sk_callback_hsw+0x16f>
+  .byte  196,98,125,24,5,141,53,0,0          // vbroadcastss  0x358d(%rip),%ymm8        # 3ecc <_sk_callback_hsw+0x183>
   .byte  196,65,60,92,237                    // vsubps        %ymm13,%ymm8,%ymm13
   .byte  197,132,92,195                      // vsubps        %ymm3,%ymm15,%ymm0
   .byte  196,98,125,168,235                  // vfmadd213ps   %ymm3,%ymm0,%ymm13
@@ -8094,7 +8238,7 @@ HIDDEN _sk_clamp_1_hsw
 .globl _sk_clamp_1_hsw
 FUNCTION(_sk_clamp_1_hsw)
 _sk_clamp_1_hsw:
-  .byte  196,98,125,24,5,0,52,0,0            // vbroadcastss  0x3400(%rip),%ymm8        # 3df0 <_sk_callback_hsw+0x17b>
+  .byte  196,98,125,24,5,18,52,0,0           // vbroadcastss  0x3412(%rip),%ymm8        # 3ed8 <_sk_callback_hsw+0x18f>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
@@ -8106,7 +8250,7 @@ HIDDEN _sk_clamp_a_hsw
 .globl _sk_clamp_a_hsw
 FUNCTION(_sk_clamp_a_hsw)
 _sk_clamp_a_hsw:
-  .byte  196,98,125,24,5,227,51,0,0          // vbroadcastss  0x33e3(%rip),%ymm8        # 3df4 <_sk_callback_hsw+0x17f>
+  .byte  196,98,125,24,5,245,51,0,0          // vbroadcastss  0x33f5(%rip),%ymm8        # 3edc <_sk_callback_hsw+0x193>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  197,252,93,195                      // vminps        %ymm3,%ymm0,%ymm0
   .byte  197,244,93,203                      // vminps        %ymm3,%ymm1,%ymm1
@@ -8192,7 +8336,7 @@ FUNCTION(_sk_unpremul_hsw)
 _sk_unpremul_hsw:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,65,100,194,200,0                // vcmpeqps      %ymm8,%ymm3,%ymm9
-  .byte  196,98,125,24,21,43,51,0,0          // vbroadcastss  0x332b(%rip),%ymm10        # 3df8 <_sk_callback_hsw+0x183>
+  .byte  196,98,125,24,21,61,51,0,0          // vbroadcastss  0x333d(%rip),%ymm10        # 3ee0 <_sk_callback_hsw+0x197>
   .byte  197,44,94,211                       // vdivps        %ymm3,%ymm10,%ymm10
   .byte  196,67,45,74,192,144                // vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
@@ -8205,16 +8349,16 @@ HIDDEN _sk_from_srgb_hsw
 .globl _sk_from_srgb_hsw
 FUNCTION(_sk_from_srgb_hsw)
 _sk_from_srgb_hsw:
-  .byte  196,98,125,24,5,12,51,0,0           // vbroadcastss  0x330c(%rip),%ymm8        # 3dfc <_sk_callback_hsw+0x187>
+  .byte  196,98,125,24,5,30,51,0,0           // vbroadcastss  0x331e(%rip),%ymm8        # 3ee4 <_sk_callback_hsw+0x19b>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  197,124,89,208                      // vmulps        %ymm0,%ymm0,%ymm10
-  .byte  196,98,125,24,29,254,50,0,0         // vbroadcastss  0x32fe(%rip),%ymm11        # 3e00 <_sk_callback_hsw+0x18b>
-  .byte  196,98,125,24,37,249,50,0,0         // vbroadcastss  0x32f9(%rip),%ymm12        # 3e04 <_sk_callback_hsw+0x18f>
+  .byte  196,98,125,24,29,16,51,0,0          // vbroadcastss  0x3310(%rip),%ymm11        # 3ee8 <_sk_callback_hsw+0x19f>
+  .byte  196,98,125,24,37,11,51,0,0          // vbroadcastss  0x330b(%rip),%ymm12        # 3eec <_sk_callback_hsw+0x1a3>
   .byte  196,65,124,40,236                   // vmovaps       %ymm12,%ymm13
   .byte  196,66,125,168,235                  // vfmadd213ps   %ymm11,%ymm0,%ymm13
-  .byte  196,98,125,24,53,234,50,0,0         // vbroadcastss  0x32ea(%rip),%ymm14        # 3e08 <_sk_callback_hsw+0x193>
+  .byte  196,98,125,24,53,252,50,0,0         // vbroadcastss  0x32fc(%rip),%ymm14        # 3ef0 <_sk_callback_hsw+0x1a7>
   .byte  196,66,45,168,238                   // vfmadd213ps   %ymm14,%ymm10,%ymm13
-  .byte  196,98,125,24,21,224,50,0,0         // vbroadcastss  0x32e0(%rip),%ymm10        # 3e0c <_sk_callback_hsw+0x197>
+  .byte  196,98,125,24,21,242,50,0,0         // vbroadcastss  0x32f2(%rip),%ymm10        # 3ef4 <_sk_callback_hsw+0x1ab>
   .byte  196,193,124,194,194,1               // vcmpltps      %ymm10,%ymm0,%ymm0
   .byte  196,195,21,74,193,0                 // vblendvps     %ymm0,%ymm9,%ymm13,%ymm0
   .byte  196,65,116,89,200                   // vmulps        %ymm8,%ymm1,%ymm9
@@ -8240,16 +8384,16 @@ _sk_to_srgb_hsw:
   .byte  197,124,82,192                      // vrsqrtps      %ymm0,%ymm8
   .byte  196,65,124,83,200                   // vrcpps        %ymm8,%ymm9
   .byte  196,65,124,82,208                   // vrsqrtps      %ymm8,%ymm10
-  .byte  196,98,125,24,5,122,50,0,0          // vbroadcastss  0x327a(%rip),%ymm8        # 3e10 <_sk_callback_hsw+0x19b>
+  .byte  196,98,125,24,5,140,50,0,0          // vbroadcastss  0x328c(%rip),%ymm8        # 3ef8 <_sk_callback_hsw+0x1af>
   .byte  196,65,124,89,216                   // vmulps        %ymm8,%ymm0,%ymm11
-  .byte  196,98,125,24,37,112,50,0,0         // vbroadcastss  0x3270(%rip),%ymm12        # 3e14 <_sk_callback_hsw+0x19f>
-  .byte  196,98,125,24,45,107,50,0,0         // vbroadcastss  0x326b(%rip),%ymm13        # 3e18 <_sk_callback_hsw+0x1a3>
+  .byte  196,98,125,24,37,130,50,0,0         // vbroadcastss  0x3282(%rip),%ymm12        # 3efc <_sk_callback_hsw+0x1b3>
+  .byte  196,98,125,24,45,125,50,0,0         // vbroadcastss  0x327d(%rip),%ymm13        # 3f00 <_sk_callback_hsw+0x1b7>
   .byte  196,66,21,168,204                   // vfmadd213ps   %ymm12,%ymm13,%ymm9
-  .byte  196,98,125,24,53,97,50,0,0          // vbroadcastss  0x3261(%rip),%ymm14        # 3e1c <_sk_callback_hsw+0x1a7>
+  .byte  196,98,125,24,53,115,50,0,0         // vbroadcastss  0x3273(%rip),%ymm14        # 3f04 <_sk_callback_hsw+0x1bb>
   .byte  196,66,13,184,202                   // vfmadd231ps   %ymm10,%ymm14,%ymm9
-  .byte  196,98,125,24,21,87,50,0,0          // vbroadcastss  0x3257(%rip),%ymm10        # 3e20 <_sk_callback_hsw+0x1ab>
+  .byte  196,98,125,24,21,105,50,0,0         // vbroadcastss  0x3269(%rip),%ymm10        # 3f08 <_sk_callback_hsw+0x1bf>
   .byte  196,65,44,93,201                    // vminps        %ymm9,%ymm10,%ymm9
-  .byte  196,98,125,24,61,77,50,0,0          // vbroadcastss  0x324d(%rip),%ymm15        # 3e24 <_sk_callback_hsw+0x1af>
+  .byte  196,98,125,24,61,95,50,0,0          // vbroadcastss  0x325f(%rip),%ymm15        # 3f0c <_sk_callback_hsw+0x1c3>
   .byte  196,193,124,194,199,1               // vcmpltps      %ymm15,%ymm0,%ymm0
   .byte  196,195,53,74,195,0                 // vblendvps     %ymm0,%ymm11,%ymm9,%ymm0
   .byte  197,124,82,201                      // vrsqrtps      %ymm1,%ymm9
@@ -8282,26 +8426,26 @@ _sk_rgb_to_hsl_hsw:
   .byte  197,124,93,201                      // vminps        %ymm1,%ymm0,%ymm9
   .byte  197,52,93,202                       // vminps        %ymm2,%ymm9,%ymm9
   .byte  196,65,60,92,209                    // vsubps        %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,29,199,49,0,0         // vbroadcastss  0x31c7(%rip),%ymm11        # 3e28 <_sk_callback_hsw+0x1b3>
+  .byte  196,98,125,24,29,217,49,0,0         // vbroadcastss  0x31d9(%rip),%ymm11        # 3f10 <_sk_callback_hsw+0x1c7>
   .byte  196,65,36,94,218                    // vdivps        %ymm10,%ymm11,%ymm11
   .byte  197,116,92,226                      // vsubps        %ymm2,%ymm1,%ymm12
   .byte  197,116,194,234,1                   // vcmpltps      %ymm2,%ymm1,%ymm13
-  .byte  196,98,125,24,53,180,49,0,0         // vbroadcastss  0x31b4(%rip),%ymm14        # 3e2c <_sk_callback_hsw+0x1b7>
+  .byte  196,98,125,24,53,198,49,0,0         // vbroadcastss  0x31c6(%rip),%ymm14        # 3f14 <_sk_callback_hsw+0x1cb>
   .byte  196,65,4,87,255                     // vxorps        %ymm15,%ymm15,%ymm15
   .byte  196,67,5,74,238,208                 // vblendvps     %ymm13,%ymm14,%ymm15,%ymm13
   .byte  196,66,37,168,229                   // vfmadd213ps   %ymm13,%ymm11,%ymm12
   .byte  197,236,92,208                      // vsubps        %ymm0,%ymm2,%ymm2
   .byte  197,124,92,233                      // vsubps        %ymm1,%ymm0,%ymm13
-  .byte  196,98,125,24,53,155,49,0,0         // vbroadcastss  0x319b(%rip),%ymm14        # 3e34 <_sk_callback_hsw+0x1bf>
+  .byte  196,98,125,24,53,173,49,0,0         // vbroadcastss  0x31ad(%rip),%ymm14        # 3f1c <_sk_callback_hsw+0x1d3>
   .byte  196,66,37,168,238                   // vfmadd213ps   %ymm14,%ymm11,%ymm13
-  .byte  196,98,125,24,53,137,49,0,0         // vbroadcastss  0x3189(%rip),%ymm14        # 3e30 <_sk_callback_hsw+0x1bb>
+  .byte  196,98,125,24,53,155,49,0,0         // vbroadcastss  0x319b(%rip),%ymm14        # 3f18 <_sk_callback_hsw+0x1cf>
   .byte  196,194,37,168,214                  // vfmadd213ps   %ymm14,%ymm11,%ymm2
   .byte  197,188,194,201,0                   // vcmpeqps      %ymm1,%ymm8,%ymm1
   .byte  196,227,21,74,202,16                // vblendvps     %ymm1,%ymm2,%ymm13,%ymm1
   .byte  197,188,194,192,0                   // vcmpeqps      %ymm0,%ymm8,%ymm0
   .byte  196,195,117,74,196,0                // vblendvps     %ymm0,%ymm12,%ymm1,%ymm0
   .byte  196,193,60,88,201                   // vaddps        %ymm9,%ymm8,%ymm1
-  .byte  196,98,125,24,29,108,49,0,0         // vbroadcastss  0x316c(%rip),%ymm11        # 3e3c <_sk_callback_hsw+0x1c7>
+  .byte  196,98,125,24,29,126,49,0,0         // vbroadcastss  0x317e(%rip),%ymm11        # 3f24 <_sk_callback_hsw+0x1db>
   .byte  196,193,116,89,211                  // vmulps        %ymm11,%ymm1,%ymm2
   .byte  197,36,194,218,1                    // vcmpltps      %ymm2,%ymm11,%ymm11
   .byte  196,65,12,92,224                    // vsubps        %ymm8,%ymm14,%ymm12
@@ -8311,7 +8455,7 @@ _sk_rgb_to_hsl_hsw:
   .byte  197,172,94,201                      // vdivps        %ymm1,%ymm10,%ymm1
   .byte  196,195,125,74,199,128              // vblendvps     %ymm8,%ymm15,%ymm0,%ymm0
   .byte  196,195,117,74,207,128              // vblendvps     %ymm8,%ymm15,%ymm1,%ymm1
-  .byte  196,98,125,24,5,47,49,0,0           // vbroadcastss  0x312f(%rip),%ymm8        # 3e38 <_sk_callback_hsw+0x1c3>
+  .byte  196,98,125,24,5,65,49,0,0           // vbroadcastss  0x3141(%rip),%ymm8        # 3f20 <_sk_callback_hsw+0x1d7>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -8328,30 +8472,30 @@ _sk_hsl_to_rgb_hsw:
   .byte  197,252,17,92,36,128                // vmovups       %ymm3,-0x80(%rsp)
   .byte  197,252,40,233                      // vmovaps       %ymm1,%ymm5
   .byte  197,252,40,224                      // vmovaps       %ymm0,%ymm4
-  .byte  196,98,125,24,5,252,48,0,0          // vbroadcastss  0x30fc(%rip),%ymm8        # 3e40 <_sk_callback_hsw+0x1cb>
+  .byte  196,98,125,24,5,14,49,0,0           // vbroadcastss  0x310e(%rip),%ymm8        # 3f28 <_sk_callback_hsw+0x1df>
   .byte  197,60,194,202,2                    // vcmpleps      %ymm2,%ymm8,%ymm9
   .byte  197,84,89,210                       // vmulps        %ymm2,%ymm5,%ymm10
   .byte  196,65,84,92,218                    // vsubps        %ymm10,%ymm5,%ymm11
   .byte  196,67,45,74,203,144                // vblendvps     %ymm9,%ymm11,%ymm10,%ymm9
   .byte  197,52,88,210                       // vaddps        %ymm2,%ymm9,%ymm10
-  .byte  196,98,125,24,13,223,48,0,0         // vbroadcastss  0x30df(%rip),%ymm9        # 3e44 <_sk_callback_hsw+0x1cf>
+  .byte  196,98,125,24,13,241,48,0,0         // vbroadcastss  0x30f1(%rip),%ymm9        # 3f2c <_sk_callback_hsw+0x1e3>
   .byte  196,66,109,170,202                  // vfmsub213ps   %ymm10,%ymm2,%ymm9
-  .byte  196,98,125,24,29,213,48,0,0         // vbroadcastss  0x30d5(%rip),%ymm11        # 3e48 <_sk_callback_hsw+0x1d3>
+  .byte  196,98,125,24,29,231,48,0,0         // vbroadcastss  0x30e7(%rip),%ymm11        # 3f30 <_sk_callback_hsw+0x1e7>
   .byte  196,65,92,88,219                    // vaddps        %ymm11,%ymm4,%ymm11
   .byte  196,67,125,8,227,1                  // vroundps      $0x1,%ymm11,%ymm12
   .byte  196,65,36,92,252                    // vsubps        %ymm12,%ymm11,%ymm15
   .byte  196,65,44,92,217                    // vsubps        %ymm9,%ymm10,%ymm11
-  .byte  196,98,125,24,45,191,48,0,0         // vbroadcastss  0x30bf(%rip),%ymm13        # 3e50 <_sk_callback_hsw+0x1db>
+  .byte  196,98,125,24,45,209,48,0,0         // vbroadcastss  0x30d1(%rip),%ymm13        # 3f38 <_sk_callback_hsw+0x1ef>
   .byte  196,193,4,89,197                    // vmulps        %ymm13,%ymm15,%ymm0
-  .byte  196,98,125,24,53,181,48,0,0         // vbroadcastss  0x30b5(%rip),%ymm14        # 3e54 <_sk_callback_hsw+0x1df>
+  .byte  196,98,125,24,53,199,48,0,0         // vbroadcastss  0x30c7(%rip),%ymm14        # 3f3c <_sk_callback_hsw+0x1f3>
   .byte  197,12,92,224                       // vsubps        %ymm0,%ymm14,%ymm12
   .byte  196,66,37,168,225                   // vfmadd213ps   %ymm9,%ymm11,%ymm12
-  .byte  196,226,125,24,29,155,48,0,0        // vbroadcastss  0x309b(%rip),%ymm3        # 3e4c <_sk_callback_hsw+0x1d7>
+  .byte  196,226,125,24,29,173,48,0,0        // vbroadcastss  0x30ad(%rip),%ymm3        # 3f34 <_sk_callback_hsw+0x1eb>
   .byte  196,193,100,194,255,2               // vcmpleps      %ymm15,%ymm3,%ymm7
   .byte  196,195,29,74,249,112               // vblendvps     %ymm7,%ymm9,%ymm12,%ymm7
   .byte  196,65,60,194,231,2                 // vcmpleps      %ymm15,%ymm8,%ymm12
   .byte  196,227,45,74,255,192               // vblendvps     %ymm12,%ymm7,%ymm10,%ymm7
-  .byte  196,98,125,24,37,134,48,0,0         // vbroadcastss  0x3086(%rip),%ymm12        # 3e58 <_sk_callback_hsw+0x1e3>
+  .byte  196,98,125,24,37,152,48,0,0         // vbroadcastss  0x3098(%rip),%ymm12        # 3f40 <_sk_callback_hsw+0x1f7>
   .byte  196,65,28,194,255,2                 // vcmpleps      %ymm15,%ymm12,%ymm15
   .byte  196,194,37,168,193                  // vfmadd213ps   %ymm9,%ymm11,%ymm0
   .byte  196,99,125,74,255,240               // vblendvps     %ymm15,%ymm7,%ymm0,%ymm15
@@ -8367,7 +8511,7 @@ _sk_hsl_to_rgb_hsw:
   .byte  197,156,194,192,2                   // vcmpleps      %ymm0,%ymm12,%ymm0
   .byte  196,194,37,168,249                  // vfmadd213ps   %ymm9,%ymm11,%ymm7
   .byte  196,227,69,74,201,0                 // vblendvps     %ymm0,%ymm1,%ymm7,%ymm1
-  .byte  196,226,125,24,5,50,48,0,0          // vbroadcastss  0x3032(%rip),%ymm0        # 3e5c <_sk_callback_hsw+0x1e7>
+  .byte  196,226,125,24,5,68,48,0,0          // vbroadcastss  0x3044(%rip),%ymm0        # 3f44 <_sk_callback_hsw+0x1fb>
   .byte  197,220,88,192                      // vaddps        %ymm0,%ymm4,%ymm0
   .byte  196,227,125,8,224,1                 // vroundps      $0x1,%ymm0,%ymm4
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
@@ -8417,11 +8561,11 @@ _sk_scale_u8_hsw:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,51                              // jne           f0a <_sk_scale_u8_hsw+0x43>
+  .byte  117,51                              // jne           fe0 <_sk_scale_u8_hsw+0x43>
   .byte  197,122,126,0                       // vmovq         (%rax),%xmm8
   .byte  196,66,125,49,192                   // vpmovzxbd     %xmm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,114,47,0,0         // vbroadcastss  0x2f72(%rip),%ymm9        # 3e60 <_sk_callback_hsw+0x1eb>
+  .byte  196,98,125,24,13,132,47,0,0         // vbroadcastss  0x2f84(%rip),%ymm9        # 3f48 <_sk_callback_hsw+0x1ff>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -8439,9 +8583,9 @@ _sk_scale_u8_hsw:
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           f12 <_sk_scale_u8_hsw+0x4b>
+  .byte  117,234                             // jne           fe8 <_sk_scale_u8_hsw+0x4b>
   .byte  196,65,249,110,193                  // vmovq         %r9,%xmm8
-  .byte  235,172                             // jmp           edb <_sk_scale_u8_hsw+0x14>
+  .byte  235,172                             // jmp           fb1 <_sk_scale_u8_hsw+0x14>
 
 HIDDEN _sk_lerp_1_float_hsw
 .globl _sk_lerp_1_float_hsw
@@ -8469,11 +8613,11 @@ _sk_lerp_u8_hsw:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,71                              // jne           fb5 <_sk_lerp_u8_hsw+0x57>
+  .byte  117,71                              // jne           108b <_sk_lerp_u8_hsw+0x57>
   .byte  197,122,126,0                       // vmovq         (%rax),%xmm8
   .byte  196,66,125,49,192                   // vpmovzxbd     %xmm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,223,46,0,0         // vbroadcastss  0x2edf(%rip),%ymm9        # 3e64 <_sk_callback_hsw+0x1ef>
+  .byte  196,98,125,24,13,241,46,0,0         // vbroadcastss  0x2ef1(%rip),%ymm9        # 3f4c <_sk_callback_hsw+0x203>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,226,61,168,196                  // vfmadd213ps   %ymm4,%ymm8,%ymm0
@@ -8495,9 +8639,9 @@ _sk_lerp_u8_hsw:
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           fbd <_sk_lerp_u8_hsw+0x5f>
+  .byte  117,234                             // jne           1093 <_sk_lerp_u8_hsw+0x5f>
   .byte  196,65,249,110,193                  // vmovq         %r9,%xmm8
-  .byte  235,152                             // jmp           f72 <_sk_lerp_u8_hsw+0x14>
+  .byte  235,152                             // jmp           1048 <_sk_lerp_u8_hsw+0x14>
 
 HIDDEN _sk_lerp_565_hsw
 .globl _sk_lerp_565_hsw
@@ -8506,23 +8650,23 @@ _sk_lerp_565_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,149,0,0,0                    // jne           107d <_sk_lerp_565_hsw+0xa3>
+  .byte  15,133,149,0,0,0                    // jne           1153 <_sk_lerp_565_hsw+0xa3>
   .byte  196,193,122,111,28,122              // vmovdqu       (%r10,%rdi,2),%xmm3
   .byte  196,226,125,51,219                  // vpmovzxwd     %xmm3,%ymm3
-  .byte  196,98,125,88,5,108,46,0,0          // vpbroadcastd  0x2e6c(%rip),%ymm8        # 3e68 <_sk_callback_hsw+0x1f3>
+  .byte  196,98,125,88,5,126,46,0,0          // vpbroadcastd  0x2e7e(%rip),%ymm8        # 3f50 <_sk_callback_hsw+0x207>
   .byte  196,65,101,219,192                  // vpand         %ymm8,%ymm3,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,93,46,0,0          // vbroadcastss  0x2e5d(%rip),%ymm9        # 3e6c <_sk_callback_hsw+0x1f7>
+  .byte  196,98,125,24,13,111,46,0,0         // vbroadcastss  0x2e6f(%rip),%ymm9        # 3f54 <_sk_callback_hsw+0x20b>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,88,13,83,46,0,0          // vpbroadcastd  0x2e53(%rip),%ymm9        # 3e70 <_sk_callback_hsw+0x1fb>
+  .byte  196,98,125,88,13,101,46,0,0         // vpbroadcastd  0x2e65(%rip),%ymm9        # 3f58 <_sk_callback_hsw+0x20f>
   .byte  196,65,101,219,201                  // vpand         %ymm9,%ymm3,%ymm9
   .byte  196,65,124,91,201                   // vcvtdq2ps     %ymm9,%ymm9
-  .byte  196,98,125,24,21,68,46,0,0          // vbroadcastss  0x2e44(%rip),%ymm10        # 3e74 <_sk_callback_hsw+0x1ff>
+  .byte  196,98,125,24,21,86,46,0,0          // vbroadcastss  0x2e56(%rip),%ymm10        # 3f5c <_sk_callback_hsw+0x213>
   .byte  196,65,52,89,202                    // vmulps        %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,88,21,58,46,0,0          // vpbroadcastd  0x2e3a(%rip),%ymm10        # 3e78 <_sk_callback_hsw+0x203>
+  .byte  196,98,125,88,21,76,46,0,0          // vpbroadcastd  0x2e4c(%rip),%ymm10        # 3f60 <_sk_callback_hsw+0x217>
   .byte  196,193,101,219,218                 // vpand         %ymm10,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,21,44,46,0,0          // vbroadcastss  0x2e2c(%rip),%ymm10        # 3e7c <_sk_callback_hsw+0x207>
+  .byte  196,98,125,24,21,62,46,0,0          // vbroadcastss  0x2e3e(%rip),%ymm10        # 3f64 <_sk_callback_hsw+0x21b>
   .byte  196,193,100,89,218                  // vmulps        %ymm10,%ymm3,%ymm3
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,226,61,168,196                  // vfmadd213ps   %ymm4,%ymm8,%ymm0
@@ -8531,16 +8675,16 @@ _sk_lerp_565_hsw:
   .byte  197,236,92,214                      // vsubps        %ymm6,%ymm2,%ymm2
   .byte  196,226,101,168,214                 // vfmadd213ps   %ymm6,%ymm3,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,5,46,0,0          // vbroadcastss  0x2e05(%rip),%ymm3        # 3e80 <_sk_callback_hsw+0x20b>
+  .byte  196,226,125,24,29,23,46,0,0         // vbroadcastss  0x2e17(%rip),%ymm3        # 3f68 <_sk_callback_hsw+0x21f>
   .byte  255,224                             // jmpq          *%rax
   .byte  65,137,200                          // mov           %ecx,%r8d
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  197,225,239,219                     // vpxor         %xmm3,%xmm3,%xmm3
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,89,255,255,255               // ja            fee <_sk_lerp_565_hsw+0x14>
+  .byte  15,135,89,255,255,255               // ja            10c4 <_sk_lerp_565_hsw+0x14>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,76,0,0,0                  // lea           0x4c(%rip),%r9        # 10ec <_sk_lerp_565_hsw+0x112>
+  .byte  76,141,13,74,0,0,0                  // lea           0x4a(%rip),%r9        # 11c0 <_sk_lerp_565_hsw+0x110>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -8552,26 +8696,27 @@ _sk_lerp_565_hsw:
   .byte  196,193,97,196,92,122,4,2           // vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm3,%xmm3
   .byte  196,193,97,196,92,122,2,1           // vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm3,%xmm3
   .byte  196,193,97,196,28,122,0             // vpinsrw       $0x0,(%r10,%rdi,2),%xmm3,%xmm3
-  .byte  233,5,255,255,255                   // jmpq          fee <_sk_lerp_565_hsw+0x14>
-  .byte  15,31,0                             // nopl          (%rax)
-  .byte  241                                 // icebp
+  .byte  233,5,255,255,255                   // jmpq          10c4 <_sk_lerp_565_hsw+0x14>
+  .byte  144                                 // nop
+  .byte  243,255                             // repz          (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
+  .byte  235,255                             // jmp           11c5 <_sk_lerp_565_hsw+0x115>
   .byte  255                                 // (bad)
-  .byte  233,255,255,255,225                 // jmpq          ffffffffe20010f4 <_sk_callback_hsw+0xffffffffe1ffd47f>
+  .byte  255,227                             // jmpq          *%rbx
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  217,255                             // fcos
+  .byte  219,255                             // (bad)
   .byte  255                                 // (bad)
-  .byte  255,209                             // callq         *%rcx
+  .byte  255,211                             // callq         *%rbx
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,201                             // dec           %ecx
+  .byte  255,203                             // dec           %ebx
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  189                                 // .byte         0xbd
+  .byte  191                                 // .byte         0xbf
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // .byte         0xff
@@ -8585,23 +8730,23 @@ _sk_load_tables_hsw:
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
   .byte  76,3,8                              // add           (%rax),%r9
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,105                             // jne           1186 <_sk_load_tables_hsw+0x7e>
+  .byte  117,105                             // jne           125a <_sk_load_tables_hsw+0x7e>
   .byte  196,193,126,111,25                  // vmovdqu       (%r9),%ymm3
-  .byte  197,229,219,13,22,48,0,0            // vpand         0x3016(%rip),%ymm3,%ymm1        # 4140 <_sk_callback_hsw+0x4cb>
+  .byte  197,229,219,13,34,48,0,0            // vpand         0x3022(%rip),%ymm3,%ymm1        # 4220 <_sk_callback_hsw+0x4d7>
   .byte  196,65,61,118,192                   // vpcmpeqd      %ymm8,%ymm8,%ymm8
   .byte  72,139,72,8                         // mov           0x8(%rax),%rcx
   .byte  76,139,72,16                        // mov           0x10(%rax),%r9
   .byte  197,237,118,210                     // vpcmpeqd      %ymm2,%ymm2,%ymm2
   .byte  196,226,109,146,4,137               // vgatherdps    %ymm2,(%rcx,%ymm1,4),%ymm0
-  .byte  196,226,101,0,21,22,48,0,0          // vpshufb       0x3016(%rip),%ymm3,%ymm2        # 4160 <_sk_callback_hsw+0x4eb>
+  .byte  196,226,101,0,21,34,48,0,0          // vpshufb       0x3022(%rip),%ymm3,%ymm2        # 4240 <_sk_callback_hsw+0x4f7>
   .byte  196,65,53,118,201                   // vpcmpeqd      %ymm9,%ymm9,%ymm9
   .byte  196,194,53,146,12,145               // vgatherdps    %ymm9,(%r9,%ymm2,4),%ymm1
   .byte  72,139,64,24                        // mov           0x18(%rax),%rax
-  .byte  196,98,101,0,13,30,48,0,0           // vpshufb       0x301e(%rip),%ymm3,%ymm9        # 4180 <_sk_callback_hsw+0x50b>
+  .byte  196,98,101,0,13,42,48,0,0           // vpshufb       0x302a(%rip),%ymm3,%ymm9        # 4260 <_sk_callback_hsw+0x517>
   .byte  196,162,61,146,20,136               // vgatherdps    %ymm8,(%rax,%ymm9,4),%ymm2
   .byte  197,229,114,211,24                  // vpsrld        $0x18,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,10,45,0,0           // vbroadcastss  0x2d0a(%rip),%ymm8        # 3e84 <_sk_callback_hsw+0x20f>
+  .byte  196,98,125,24,5,30,45,0,0           // vbroadcastss  0x2d1e(%rip),%ymm8        # 3f6c <_sk_callback_hsw+0x223>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,137,193                          // mov           %r8,%rcx
@@ -8614,7 +8759,7 @@ _sk_load_tables_hsw:
   .byte  196,193,249,110,194                 // vmovq         %r10,%xmm0
   .byte  196,226,125,33,192                  // vpmovsxbd     %xmm0,%ymm0
   .byte  196,194,125,140,25                  // vpmaskmovd    (%r9),%ymm0,%ymm3
-  .byte  233,115,255,255,255                 // jmpq          1122 <_sk_load_tables_hsw+0x1a>
+  .byte  233,115,255,255,255                 // jmpq          11f6 <_sk_load_tables_hsw+0x1a>
 
 HIDDEN _sk_load_tables_u16_be_hsw
 .globl _sk_load_tables_u16_be_hsw
@@ -8624,7 +8769,7 @@ _sk_load_tables_u16_be_hsw:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,201,0,0,0                    // jne           128e <_sk_load_tables_u16_be_hsw+0xdf>
+  .byte  15,133,201,0,0,0                    // jne           1362 <_sk_load_tables_u16_be_hsw+0xdf>
   .byte  196,1,121,16,4,72                   // vmovupd       (%r8,%r9,2),%xmm8
   .byte  196,129,121,16,84,72,16             // vmovupd       0x10(%r8,%r9,2),%xmm2
   .byte  196,129,121,16,92,72,32             // vmovupd       0x20(%r8,%r9,2),%xmm3
@@ -8640,7 +8785,7 @@ _sk_load_tables_u16_be_hsw:
   .byte  197,185,108,200                     // vpunpcklqdq   %xmm0,%xmm8,%xmm1
   .byte  197,185,109,208                     // vpunpckhqdq   %xmm0,%xmm8,%xmm2
   .byte  197,49,108,195                      // vpunpcklqdq   %xmm3,%xmm9,%xmm8
-  .byte  197,121,111,21,170,48,0,0           // vmovdqa       0x30aa(%rip),%xmm10        # 42c0 <_sk_callback_hsw+0x64b>
+  .byte  197,121,111,21,182,48,0,0           // vmovdqa       0x30b6(%rip),%xmm10        # 43a0 <_sk_callback_hsw+0x657>
   .byte  196,193,113,219,194                 // vpand         %xmm10,%xmm1,%xmm0
   .byte  196,226,125,51,200                  // vpmovzxwd     %xmm0,%ymm1
   .byte  196,65,37,118,219                   // vpcmpeqd      %ymm11,%ymm11,%ymm11
@@ -8662,36 +8807,36 @@ _sk_load_tables_u16_be_hsw:
   .byte  197,185,235,219                     // vpor          %xmm3,%xmm8,%xmm3
   .byte  196,226,125,51,219                  // vpmovzxwd     %xmm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,3,44,0,0            // vbroadcastss  0x2c03(%rip),%ymm8        # 3e88 <_sk_callback_hsw+0x213>
+  .byte  196,98,125,24,5,23,44,0,0           // vbroadcastss  0x2c17(%rip),%ymm8        # 3f70 <_sk_callback_hsw+0x227>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
   .byte  196,1,123,16,4,72                   // vmovsd        (%r8,%r9,2),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,85                              // je            12f4 <_sk_load_tables_u16_be_hsw+0x145>
+  .byte  116,85                              // je            13c8 <_sk_load_tables_u16_be_hsw+0x145>
   .byte  196,1,57,22,68,72,8                 // vmovhpd       0x8(%r8,%r9,2),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,72                              // jb            12f4 <_sk_load_tables_u16_be_hsw+0x145>
+  .byte  114,72                              // jb            13c8 <_sk_load_tables_u16_be_hsw+0x145>
   .byte  196,129,123,16,84,72,16             // vmovsd        0x10(%r8,%r9,2),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,72                              // je            1301 <_sk_load_tables_u16_be_hsw+0x152>
+  .byte  116,72                              // je            13d5 <_sk_load_tables_u16_be_hsw+0x152>
   .byte  196,129,105,22,84,72,24             // vmovhpd       0x18(%r8,%r9,2),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,59                              // jb            1301 <_sk_load_tables_u16_be_hsw+0x152>
+  .byte  114,59                              // jb            13d5 <_sk_load_tables_u16_be_hsw+0x152>
   .byte  196,129,123,16,92,72,32             // vmovsd        0x20(%r8,%r9,2),%xmm3
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,9,255,255,255                // je            11e0 <_sk_load_tables_u16_be_hsw+0x31>
+  .byte  15,132,9,255,255,255                // je            12b4 <_sk_load_tables_u16_be_hsw+0x31>
   .byte  196,129,97,22,92,72,40              // vmovhpd       0x28(%r8,%r9,2),%xmm3,%xmm3
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,248,254,255,255              // jb            11e0 <_sk_load_tables_u16_be_hsw+0x31>
+  .byte  15,130,248,254,255,255              // jb            12b4 <_sk_load_tables_u16_be_hsw+0x31>
   .byte  196,1,122,126,76,72,48              // vmovq         0x30(%r8,%r9,2),%xmm9
-  .byte  233,236,254,255,255                 // jmpq          11e0 <_sk_load_tables_u16_be_hsw+0x31>
+  .byte  233,236,254,255,255                 // jmpq          12b4 <_sk_load_tables_u16_be_hsw+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,223,254,255,255                 // jmpq          11e0 <_sk_load_tables_u16_be_hsw+0x31>
+  .byte  233,223,254,255,255                 // jmpq          12b4 <_sk_load_tables_u16_be_hsw+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
-  .byte  233,214,254,255,255                 // jmpq          11e0 <_sk_load_tables_u16_be_hsw+0x31>
+  .byte  233,214,254,255,255                 // jmpq          12b4 <_sk_load_tables_u16_be_hsw+0x31>
 
 HIDDEN _sk_load_tables_rgb_u16_be_hsw
 .globl _sk_load_tables_rgb_u16_be_hsw
@@ -8701,7 +8846,7 @@ _sk_load_tables_rgb_u16_be_hsw:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,127                       // lea           (%rdi,%rdi,2),%r9
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,193,0,0,0                    // jne           13dd <_sk_load_tables_rgb_u16_be_hsw+0xd3>
+  .byte  15,133,193,0,0,0                    // jne           14b1 <_sk_load_tables_rgb_u16_be_hsw+0xd3>
   .byte  196,129,122,111,4,72                // vmovdqu       (%r8,%r9,2),%xmm0
   .byte  196,129,122,111,84,72,12            // vmovdqu       0xc(%r8,%r9,2),%xmm2
   .byte  196,129,122,111,76,72,24            // vmovdqu       0x18(%r8,%r9,2),%xmm1
@@ -8722,7 +8867,7 @@ _sk_load_tables_rgb_u16_be_hsw:
   .byte  197,185,108,218                     // vpunpcklqdq   %xmm2,%xmm8,%xmm3
   .byte  197,185,109,210                     // vpunpckhqdq   %xmm2,%xmm8,%xmm2
   .byte  197,121,108,193                     // vpunpcklqdq   %xmm1,%xmm0,%xmm8
-  .byte  197,121,111,13,74,47,0,0            // vmovdqa       0x2f4a(%rip),%xmm9        # 42d0 <_sk_callback_hsw+0x65b>
+  .byte  197,121,111,13,86,47,0,0            // vmovdqa       0x2f56(%rip),%xmm9        # 43b0 <_sk_callback_hsw+0x667>
   .byte  196,193,97,219,193                  // vpand         %xmm9,%xmm3,%xmm0
   .byte  196,226,125,51,200                  // vpmovzxwd     %xmm0,%ymm1
   .byte  197,229,118,219                     // vpcmpeqd      %ymm3,%ymm3,%ymm3
@@ -8739,41 +8884,41 @@ _sk_load_tables_rgb_u16_be_hsw:
   .byte  196,98,125,51,194                   // vpmovzxwd     %xmm2,%ymm8
   .byte  196,162,101,146,20,128              // vgatherdps    %ymm3,(%rax,%ymm8,4),%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,177,42,0,0        // vbroadcastss  0x2ab1(%rip),%ymm3        # 3e8c <_sk_callback_hsw+0x217>
+  .byte  196,226,125,24,29,197,42,0,0        // vbroadcastss  0x2ac5(%rip),%ymm3        # 3f74 <_sk_callback_hsw+0x22b>
   .byte  255,224                             // jmpq          *%rax
   .byte  196,129,121,110,4,72                // vmovd         (%r8,%r9,2),%xmm0
   .byte  196,129,121,196,68,72,4,2           // vpinsrw       $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  117,5                               // jne           13f6 <_sk_load_tables_rgb_u16_be_hsw+0xec>
-  .byte  233,90,255,255,255                  // jmpq          1350 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  .byte  117,5                               // jne           14ca <_sk_load_tables_rgb_u16_be_hsw+0xec>
+  .byte  233,90,255,255,255                  // jmpq          1424 <_sk_load_tables_rgb_u16_be_hsw+0x46>
   .byte  196,129,121,110,76,72,6             // vmovd         0x6(%r8,%r9,2),%xmm1
   .byte  196,1,113,196,68,72,10,2            // vpinsrw       $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,26                              // jb            1425 <_sk_load_tables_rgb_u16_be_hsw+0x11b>
+  .byte  114,26                              // jb            14f9 <_sk_load_tables_rgb_u16_be_hsw+0x11b>
   .byte  196,129,121,110,76,72,12            // vmovd         0xc(%r8,%r9,2),%xmm1
   .byte  196,129,113,196,84,72,16,2          // vpinsrw       $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  117,10                              // jne           142a <_sk_load_tables_rgb_u16_be_hsw+0x120>
-  .byte  233,43,255,255,255                  // jmpq          1350 <_sk_load_tables_rgb_u16_be_hsw+0x46>
-  .byte  233,38,255,255,255                  // jmpq          1350 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  .byte  117,10                              // jne           14fe <_sk_load_tables_rgb_u16_be_hsw+0x120>
+  .byte  233,43,255,255,255                  // jmpq          1424 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  .byte  233,38,255,255,255                  // jmpq          1424 <_sk_load_tables_rgb_u16_be_hsw+0x46>
   .byte  196,129,121,110,76,72,18            // vmovd         0x12(%r8,%r9,2),%xmm1
   .byte  196,1,113,196,76,72,22,2            // vpinsrw       $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,26                              // jb            1459 <_sk_load_tables_rgb_u16_be_hsw+0x14f>
+  .byte  114,26                              // jb            152d <_sk_load_tables_rgb_u16_be_hsw+0x14f>
   .byte  196,129,121,110,76,72,24            // vmovd         0x18(%r8,%r9,2),%xmm1
   .byte  196,129,113,196,76,72,28,2          // vpinsrw       $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  117,10                              // jne           145e <_sk_load_tables_rgb_u16_be_hsw+0x154>
-  .byte  233,247,254,255,255                 // jmpq          1350 <_sk_load_tables_rgb_u16_be_hsw+0x46>
-  .byte  233,242,254,255,255                 // jmpq          1350 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  .byte  117,10                              // jne           1532 <_sk_load_tables_rgb_u16_be_hsw+0x154>
+  .byte  233,247,254,255,255                 // jmpq          1424 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  .byte  233,242,254,255,255                 // jmpq          1424 <_sk_load_tables_rgb_u16_be_hsw+0x46>
   .byte  196,129,121,110,92,72,30            // vmovd         0x1e(%r8,%r9,2),%xmm3
   .byte  196,1,97,196,92,72,34,2             // vpinsrw       $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,20                              // jb            1487 <_sk_load_tables_rgb_u16_be_hsw+0x17d>
+  .byte  114,20                              // jb            155b <_sk_load_tables_rgb_u16_be_hsw+0x17d>
   .byte  196,129,121,110,92,72,36            // vmovd         0x24(%r8,%r9,2),%xmm3
   .byte  196,129,97,196,92,72,40,2           // vpinsrw       $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
-  .byte  233,201,254,255,255                 // jmpq          1350 <_sk_load_tables_rgb_u16_be_hsw+0x46>
-  .byte  233,196,254,255,255                 // jmpq          1350 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  .byte  233,201,254,255,255                 // jmpq          1424 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  .byte  233,196,254,255,255                 // jmpq          1424 <_sk_load_tables_rgb_u16_be_hsw+0x46>
 
 HIDDEN _sk_byte_tables_hsw
 .globl _sk_byte_tables_hsw
@@ -8786,7 +8931,7 @@ _sk_byte_tables_hsw:
   .byte  65,84                               // push          %r12
   .byte  83                                  // push          %rbx
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,239,41,0,0          // vbroadcastss  0x29ef(%rip),%ymm8        # 3e90 <_sk_callback_hsw+0x21b>
+  .byte  196,98,125,24,5,3,42,0,0            // vbroadcastss  0x2a03(%rip),%ymm8        # 3f78 <_sk_callback_hsw+0x22f>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,195,249,22,192,1                // vpextrq       $0x1,%xmm0,%r8
@@ -8823,7 +8968,7 @@ _sk_byte_tables_hsw:
   .byte  196,227,121,32,197,7                // vpinsrb       $0x7,%ebp,%xmm0,%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,64,41,0,0          // vbroadcastss  0x2940(%rip),%ymm9        # 3e94 <_sk_callback_hsw+0x21f>
+  .byte  196,98,125,24,13,84,41,0,0          // vbroadcastss  0x2954(%rip),%ymm9        # 3f7c <_sk_callback_hsw+0x233>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -8984,7 +9129,7 @@ _sk_byte_tables_rgb_hsw:
   .byte  196,227,121,32,197,7                // vpinsrb       $0x7,%ebp,%xmm0,%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,121,38,0,0         // vbroadcastss  0x2679(%rip),%ymm9        # 3e98 <_sk_callback_hsw+0x223>
+  .byte  196,98,125,24,13,141,38,0,0         // vbroadcastss  0x268d(%rip),%ymm9        # 3f80 <_sk_callback_hsw+0x237>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -9147,33 +9292,33 @@ _sk_parametric_r_hsw:
   .byte  196,66,125,168,211                  // vfmadd213ps   %ymm11,%ymm0,%ymm10
   .byte  196,226,125,24,0                    // vbroadcastss  (%rax),%ymm0
   .byte  196,65,124,91,218                   // vcvtdq2ps     %ymm10,%ymm11
-  .byte  196,98,125,24,37,44,36,0,0          // vbroadcastss  0x242c(%rip),%ymm12        # 3e9c <_sk_callback_hsw+0x227>
-  .byte  196,98,125,24,45,39,36,0,0          // vbroadcastss  0x2427(%rip),%ymm13        # 3ea0 <_sk_callback_hsw+0x22b>
+  .byte  196,98,125,24,37,64,36,0,0          // vbroadcastss  0x2440(%rip),%ymm12        # 3f84 <_sk_callback_hsw+0x23b>
+  .byte  196,98,125,24,45,59,36,0,0          // vbroadcastss  0x243b(%rip),%ymm13        # 3f88 <_sk_callback_hsw+0x23f>
   .byte  196,65,44,84,213                    // vandps        %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,29,36,0,0          // vbroadcastss  0x241d(%rip),%ymm13        # 3ea4 <_sk_callback_hsw+0x22f>
+  .byte  196,98,125,24,45,49,36,0,0          // vbroadcastss  0x2431(%rip),%ymm13        # 3f8c <_sk_callback_hsw+0x243>
   .byte  196,65,44,86,213                    // vorps         %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,19,36,0,0          // vbroadcastss  0x2413(%rip),%ymm13        # 3ea8 <_sk_callback_hsw+0x233>
+  .byte  196,98,125,24,45,39,36,0,0          // vbroadcastss  0x2427(%rip),%ymm13        # 3f90 <_sk_callback_hsw+0x247>
   .byte  196,66,37,184,236                   // vfmadd231ps   %ymm12,%ymm11,%ymm13
-  .byte  196,98,125,24,29,9,36,0,0           // vbroadcastss  0x2409(%rip),%ymm11        # 3eac <_sk_callback_hsw+0x237>
+  .byte  196,98,125,24,29,29,36,0,0          // vbroadcastss  0x241d(%rip),%ymm11        # 3f94 <_sk_callback_hsw+0x24b>
   .byte  196,66,45,172,221                   // vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  .byte  196,98,125,24,37,255,35,0,0         // vbroadcastss  0x23ff(%rip),%ymm12        # 3eb0 <_sk_callback_hsw+0x23b>
+  .byte  196,98,125,24,37,19,36,0,0          // vbroadcastss  0x2413(%rip),%ymm12        # 3f98 <_sk_callback_hsw+0x24f>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,245,35,0,0         // vbroadcastss  0x23f5(%rip),%ymm12        # 3eb4 <_sk_callback_hsw+0x23f>
+  .byte  196,98,125,24,37,9,36,0,0           // vbroadcastss  0x2409(%rip),%ymm12        # 3f9c <_sk_callback_hsw+0x253>
   .byte  196,65,28,94,210                    // vdivps        %ymm10,%ymm12,%ymm10
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  196,99,125,8,208,1                  // vroundps      $0x1,%ymm0,%ymm10
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  196,98,125,24,29,214,35,0,0         // vbroadcastss  0x23d6(%rip),%ymm11        # 3eb8 <_sk_callback_hsw+0x243>
+  .byte  196,98,125,24,29,234,35,0,0         // vbroadcastss  0x23ea(%rip),%ymm11        # 3fa0 <_sk_callback_hsw+0x257>
   .byte  196,193,124,88,195                  // vaddps        %ymm11,%ymm0,%ymm0
-  .byte  196,98,125,24,29,204,35,0,0         // vbroadcastss  0x23cc(%rip),%ymm11        # 3ebc <_sk_callback_hsw+0x247>
+  .byte  196,98,125,24,29,224,35,0,0         // vbroadcastss  0x23e0(%rip),%ymm11        # 3fa4 <_sk_callback_hsw+0x25b>
   .byte  196,98,45,172,216                   // vfnmadd213ps  %ymm0,%ymm10,%ymm11
-  .byte  196,226,125,24,5,194,35,0,0         // vbroadcastss  0x23c2(%rip),%ymm0        # 3ec0 <_sk_callback_hsw+0x24b>
+  .byte  196,226,125,24,5,214,35,0,0         // vbroadcastss  0x23d6(%rip),%ymm0        # 3fa8 <_sk_callback_hsw+0x25f>
   .byte  196,193,124,92,194                  // vsubps        %ymm10,%ymm0,%ymm0
-  .byte  196,98,125,24,21,184,35,0,0         // vbroadcastss  0x23b8(%rip),%ymm10        # 3ec4 <_sk_callback_hsw+0x24f>
+  .byte  196,98,125,24,21,204,35,0,0         // vbroadcastss  0x23cc(%rip),%ymm10        # 3fac <_sk_callback_hsw+0x263>
   .byte  197,172,94,192                      // vdivps        %ymm0,%ymm10,%ymm0
   .byte  197,164,88,192                      // vaddps        %ymm0,%ymm11,%ymm0
-  .byte  196,98,125,24,21,171,35,0,0         // vbroadcastss  0x23ab(%rip),%ymm10        # 3ec8 <_sk_callback_hsw+0x253>
+  .byte  196,98,125,24,21,191,35,0,0         // vbroadcastss  0x23bf(%rip),%ymm10        # 3fb0 <_sk_callback_hsw+0x267>
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -9181,7 +9326,7 @@ _sk_parametric_r_hsw:
   .byte  196,195,125,74,193,128              // vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,124,95,192                  // vmaxps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,130,35,0,0          // vbroadcastss  0x2382(%rip),%ymm8        # 3ecc <_sk_callback_hsw+0x257>
+  .byte  196,98,125,24,5,150,35,0,0          // vbroadcastss  0x2396(%rip),%ymm8        # 3fb4 <_sk_callback_hsw+0x26b>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9201,33 +9346,33 @@ _sk_parametric_g_hsw:
   .byte  196,66,117,168,211                  // vfmadd213ps   %ymm11,%ymm1,%ymm10
   .byte  196,226,125,24,8                    // vbroadcastss  (%rax),%ymm1
   .byte  196,65,124,91,218                   // vcvtdq2ps     %ymm10,%ymm11
-  .byte  196,98,125,24,37,58,35,0,0          // vbroadcastss  0x233a(%rip),%ymm12        # 3ed0 <_sk_callback_hsw+0x25b>
-  .byte  196,98,125,24,45,53,35,0,0          // vbroadcastss  0x2335(%rip),%ymm13        # 3ed4 <_sk_callback_hsw+0x25f>
+  .byte  196,98,125,24,37,78,35,0,0          // vbroadcastss  0x234e(%rip),%ymm12        # 3fb8 <_sk_callback_hsw+0x26f>
+  .byte  196,98,125,24,45,73,35,0,0          // vbroadcastss  0x2349(%rip),%ymm13        # 3fbc <_sk_callback_hsw+0x273>
   .byte  196,65,44,84,213                    // vandps        %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,43,35,0,0          // vbroadcastss  0x232b(%rip),%ymm13        # 3ed8 <_sk_callback_hsw+0x263>
+  .byte  196,98,125,24,45,63,35,0,0          // vbroadcastss  0x233f(%rip),%ymm13        # 3fc0 <_sk_callback_hsw+0x277>
   .byte  196,65,44,86,213                    // vorps         %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,33,35,0,0          // vbroadcastss  0x2321(%rip),%ymm13        # 3edc <_sk_callback_hsw+0x267>
+  .byte  196,98,125,24,45,53,35,0,0          // vbroadcastss  0x2335(%rip),%ymm13        # 3fc4 <_sk_callback_hsw+0x27b>
   .byte  196,66,37,184,236                   // vfmadd231ps   %ymm12,%ymm11,%ymm13
-  .byte  196,98,125,24,29,23,35,0,0          // vbroadcastss  0x2317(%rip),%ymm11        # 3ee0 <_sk_callback_hsw+0x26b>
+  .byte  196,98,125,24,29,43,35,0,0          // vbroadcastss  0x232b(%rip),%ymm11        # 3fc8 <_sk_callback_hsw+0x27f>
   .byte  196,66,45,172,221                   // vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  .byte  196,98,125,24,37,13,35,0,0          // vbroadcastss  0x230d(%rip),%ymm12        # 3ee4 <_sk_callback_hsw+0x26f>
+  .byte  196,98,125,24,37,33,35,0,0          // vbroadcastss  0x2321(%rip),%ymm12        # 3fcc <_sk_callback_hsw+0x283>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,3,35,0,0           // vbroadcastss  0x2303(%rip),%ymm12        # 3ee8 <_sk_callback_hsw+0x273>
+  .byte  196,98,125,24,37,23,35,0,0          // vbroadcastss  0x2317(%rip),%ymm12        # 3fd0 <_sk_callback_hsw+0x287>
   .byte  196,65,28,94,210                    // vdivps        %ymm10,%ymm12,%ymm10
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
   .byte  196,193,116,89,202                  // vmulps        %ymm10,%ymm1,%ymm1
   .byte  196,99,125,8,209,1                  // vroundps      $0x1,%ymm1,%ymm10
   .byte  196,65,116,92,210                   // vsubps        %ymm10,%ymm1,%ymm10
-  .byte  196,98,125,24,29,228,34,0,0         // vbroadcastss  0x22e4(%rip),%ymm11        # 3eec <_sk_callback_hsw+0x277>
+  .byte  196,98,125,24,29,248,34,0,0         // vbroadcastss  0x22f8(%rip),%ymm11        # 3fd4 <_sk_callback_hsw+0x28b>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,218,34,0,0         // vbroadcastss  0x22da(%rip),%ymm11        # 3ef0 <_sk_callback_hsw+0x27b>
+  .byte  196,98,125,24,29,238,34,0,0         // vbroadcastss  0x22ee(%rip),%ymm11        # 3fd8 <_sk_callback_hsw+0x28f>
   .byte  196,98,45,172,217                   // vfnmadd213ps  %ymm1,%ymm10,%ymm11
-  .byte  196,226,125,24,13,208,34,0,0        // vbroadcastss  0x22d0(%rip),%ymm1        # 3ef4 <_sk_callback_hsw+0x27f>
+  .byte  196,226,125,24,13,228,34,0,0        // vbroadcastss  0x22e4(%rip),%ymm1        # 3fdc <_sk_callback_hsw+0x293>
   .byte  196,193,116,92,202                  // vsubps        %ymm10,%ymm1,%ymm1
-  .byte  196,98,125,24,21,198,34,0,0         // vbroadcastss  0x22c6(%rip),%ymm10        # 3ef8 <_sk_callback_hsw+0x283>
+  .byte  196,98,125,24,21,218,34,0,0         // vbroadcastss  0x22da(%rip),%ymm10        # 3fe0 <_sk_callback_hsw+0x297>
   .byte  197,172,94,201                      // vdivps        %ymm1,%ymm10,%ymm1
   .byte  197,164,88,201                      // vaddps        %ymm1,%ymm11,%ymm1
-  .byte  196,98,125,24,21,185,34,0,0         // vbroadcastss  0x22b9(%rip),%ymm10        # 3efc <_sk_callback_hsw+0x287>
+  .byte  196,98,125,24,21,205,34,0,0         // vbroadcastss  0x22cd(%rip),%ymm10        # 3fe4 <_sk_callback_hsw+0x29b>
   .byte  196,193,116,89,202                  // vmulps        %ymm10,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -9235,7 +9380,7 @@ _sk_parametric_g_hsw:
   .byte  196,195,117,74,201,128              // vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,116,95,200                  // vmaxps        %ymm8,%ymm1,%ymm1
-  .byte  196,98,125,24,5,144,34,0,0          // vbroadcastss  0x2290(%rip),%ymm8        # 3f00 <_sk_callback_hsw+0x28b>
+  .byte  196,98,125,24,5,164,34,0,0          // vbroadcastss  0x22a4(%rip),%ymm8        # 3fe8 <_sk_callback_hsw+0x29f>
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9255,33 +9400,33 @@ _sk_parametric_b_hsw:
   .byte  196,66,109,168,211                  // vfmadd213ps   %ymm11,%ymm2,%ymm10
   .byte  196,226,125,24,16                   // vbroadcastss  (%rax),%ymm2
   .byte  196,65,124,91,218                   // vcvtdq2ps     %ymm10,%ymm11
-  .byte  196,98,125,24,37,72,34,0,0          // vbroadcastss  0x2248(%rip),%ymm12        # 3f04 <_sk_callback_hsw+0x28f>
-  .byte  196,98,125,24,45,67,34,0,0          // vbroadcastss  0x2243(%rip),%ymm13        # 3f08 <_sk_callback_hsw+0x293>
+  .byte  196,98,125,24,37,92,34,0,0          // vbroadcastss  0x225c(%rip),%ymm12        # 3fec <_sk_callback_hsw+0x2a3>
+  .byte  196,98,125,24,45,87,34,0,0          // vbroadcastss  0x2257(%rip),%ymm13        # 3ff0 <_sk_callback_hsw+0x2a7>
   .byte  196,65,44,84,213                    // vandps        %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,57,34,0,0          // vbroadcastss  0x2239(%rip),%ymm13        # 3f0c <_sk_callback_hsw+0x297>
+  .byte  196,98,125,24,45,77,34,0,0          // vbroadcastss  0x224d(%rip),%ymm13        # 3ff4 <_sk_callback_hsw+0x2ab>
   .byte  196,65,44,86,213                    // vorps         %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,47,34,0,0          // vbroadcastss  0x222f(%rip),%ymm13        # 3f10 <_sk_callback_hsw+0x29b>
+  .byte  196,98,125,24,45,67,34,0,0          // vbroadcastss  0x2243(%rip),%ymm13        # 3ff8 <_sk_callback_hsw+0x2af>
   .byte  196,66,37,184,236                   // vfmadd231ps   %ymm12,%ymm11,%ymm13
-  .byte  196,98,125,24,29,37,34,0,0          // vbroadcastss  0x2225(%rip),%ymm11        # 3f14 <_sk_callback_hsw+0x29f>
+  .byte  196,98,125,24,29,57,34,0,0          // vbroadcastss  0x2239(%rip),%ymm11        # 3ffc <_sk_callback_hsw+0x2b3>
   .byte  196,66,45,172,221                   // vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  .byte  196,98,125,24,37,27,34,0,0          // vbroadcastss  0x221b(%rip),%ymm12        # 3f18 <_sk_callback_hsw+0x2a3>
+  .byte  196,98,125,24,37,47,34,0,0          // vbroadcastss  0x222f(%rip),%ymm12        # 4000 <_sk_callback_hsw+0x2b7>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,17,34,0,0          // vbroadcastss  0x2211(%rip),%ymm12        # 3f1c <_sk_callback_hsw+0x2a7>
+  .byte  196,98,125,24,37,37,34,0,0          // vbroadcastss  0x2225(%rip),%ymm12        # 4004 <_sk_callback_hsw+0x2bb>
   .byte  196,65,28,94,210                    // vdivps        %ymm10,%ymm12,%ymm10
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
   .byte  196,193,108,89,210                  // vmulps        %ymm10,%ymm2,%ymm2
   .byte  196,99,125,8,210,1                  // vroundps      $0x1,%ymm2,%ymm10
   .byte  196,65,108,92,210                   // vsubps        %ymm10,%ymm2,%ymm10
-  .byte  196,98,125,24,29,242,33,0,0         // vbroadcastss  0x21f2(%rip),%ymm11        # 3f20 <_sk_callback_hsw+0x2ab>
+  .byte  196,98,125,24,29,6,34,0,0           // vbroadcastss  0x2206(%rip),%ymm11        # 4008 <_sk_callback_hsw+0x2bf>
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
-  .byte  196,98,125,24,29,232,33,0,0         // vbroadcastss  0x21e8(%rip),%ymm11        # 3f24 <_sk_callback_hsw+0x2af>
+  .byte  196,98,125,24,29,252,33,0,0         // vbroadcastss  0x21fc(%rip),%ymm11        # 400c <_sk_callback_hsw+0x2c3>
   .byte  196,98,45,172,218                   // vfnmadd213ps  %ymm2,%ymm10,%ymm11
-  .byte  196,226,125,24,21,222,33,0,0        // vbroadcastss  0x21de(%rip),%ymm2        # 3f28 <_sk_callback_hsw+0x2b3>
+  .byte  196,226,125,24,21,242,33,0,0        // vbroadcastss  0x21f2(%rip),%ymm2        # 4010 <_sk_callback_hsw+0x2c7>
   .byte  196,193,108,92,210                  // vsubps        %ymm10,%ymm2,%ymm2
-  .byte  196,98,125,24,21,212,33,0,0         // vbroadcastss  0x21d4(%rip),%ymm10        # 3f2c <_sk_callback_hsw+0x2b7>
+  .byte  196,98,125,24,21,232,33,0,0         // vbroadcastss  0x21e8(%rip),%ymm10        # 4014 <_sk_callback_hsw+0x2cb>
   .byte  197,172,94,210                      // vdivps        %ymm2,%ymm10,%ymm2
   .byte  197,164,88,210                      // vaddps        %ymm2,%ymm11,%ymm2
-  .byte  196,98,125,24,21,199,33,0,0         // vbroadcastss  0x21c7(%rip),%ymm10        # 3f30 <_sk_callback_hsw+0x2bb>
+  .byte  196,98,125,24,21,219,33,0,0         // vbroadcastss  0x21db(%rip),%ymm10        # 4018 <_sk_callback_hsw+0x2cf>
   .byte  196,193,108,89,210                  // vmulps        %ymm10,%ymm2,%ymm2
   .byte  197,253,91,210                      // vcvtps2dq     %ymm2,%ymm2
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -9289,7 +9434,7 @@ _sk_parametric_b_hsw:
   .byte  196,195,109,74,209,128              // vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,158,33,0,0          // vbroadcastss  0x219e(%rip),%ymm8        # 3f34 <_sk_callback_hsw+0x2bf>
+  .byte  196,98,125,24,5,178,33,0,0          // vbroadcastss  0x21b2(%rip),%ymm8        # 401c <_sk_callback_hsw+0x2d3>
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9309,33 +9454,33 @@ _sk_parametric_a_hsw:
   .byte  196,66,101,168,211                  // vfmadd213ps   %ymm11,%ymm3,%ymm10
   .byte  196,226,125,24,24                   // vbroadcastss  (%rax),%ymm3
   .byte  196,65,124,91,218                   // vcvtdq2ps     %ymm10,%ymm11
-  .byte  196,98,125,24,37,86,33,0,0          // vbroadcastss  0x2156(%rip),%ymm12        # 3f38 <_sk_callback_hsw+0x2c3>
-  .byte  196,98,125,24,45,81,33,0,0          // vbroadcastss  0x2151(%rip),%ymm13        # 3f3c <_sk_callback_hsw+0x2c7>
+  .byte  196,98,125,24,37,106,33,0,0         // vbroadcastss  0x216a(%rip),%ymm12        # 4020 <_sk_callback_hsw+0x2d7>
+  .byte  196,98,125,24,45,101,33,0,0         // vbroadcastss  0x2165(%rip),%ymm13        # 4024 <_sk_callback_hsw+0x2db>
   .byte  196,65,44,84,213                    // vandps        %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,71,33,0,0          // vbroadcastss  0x2147(%rip),%ymm13        # 3f40 <_sk_callback_hsw+0x2cb>
+  .byte  196,98,125,24,45,91,33,0,0          // vbroadcastss  0x215b(%rip),%ymm13        # 4028 <_sk_callback_hsw+0x2df>
   .byte  196,65,44,86,213                    // vorps         %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,61,33,0,0          // vbroadcastss  0x213d(%rip),%ymm13        # 3f44 <_sk_callback_hsw+0x2cf>
+  .byte  196,98,125,24,45,81,33,0,0          // vbroadcastss  0x2151(%rip),%ymm13        # 402c <_sk_callback_hsw+0x2e3>
   .byte  196,66,37,184,236                   // vfmadd231ps   %ymm12,%ymm11,%ymm13
-  .byte  196,98,125,24,29,51,33,0,0          // vbroadcastss  0x2133(%rip),%ymm11        # 3f48 <_sk_callback_hsw+0x2d3>
+  .byte  196,98,125,24,29,71,33,0,0          // vbroadcastss  0x2147(%rip),%ymm11        # 4030 <_sk_callback_hsw+0x2e7>
   .byte  196,66,45,172,221                   // vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  .byte  196,98,125,24,37,41,33,0,0          // vbroadcastss  0x2129(%rip),%ymm12        # 3f4c <_sk_callback_hsw+0x2d7>
+  .byte  196,98,125,24,37,61,33,0,0          // vbroadcastss  0x213d(%rip),%ymm12        # 4034 <_sk_callback_hsw+0x2eb>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,31,33,0,0          // vbroadcastss  0x211f(%rip),%ymm12        # 3f50 <_sk_callback_hsw+0x2db>
+  .byte  196,98,125,24,37,51,33,0,0          // vbroadcastss  0x2133(%rip),%ymm12        # 4038 <_sk_callback_hsw+0x2ef>
   .byte  196,65,28,94,210                    // vdivps        %ymm10,%ymm12,%ymm10
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
   .byte  196,193,100,89,218                  // vmulps        %ymm10,%ymm3,%ymm3
   .byte  196,99,125,8,211,1                  // vroundps      $0x1,%ymm3,%ymm10
   .byte  196,65,100,92,210                   // vsubps        %ymm10,%ymm3,%ymm10
-  .byte  196,98,125,24,29,0,33,0,0           // vbroadcastss  0x2100(%rip),%ymm11        # 3f54 <_sk_callback_hsw+0x2df>
+  .byte  196,98,125,24,29,20,33,0,0          // vbroadcastss  0x2114(%rip),%ymm11        # 403c <_sk_callback_hsw+0x2f3>
   .byte  196,193,100,88,219                  // vaddps        %ymm11,%ymm3,%ymm3
-  .byte  196,98,125,24,29,246,32,0,0         // vbroadcastss  0x20f6(%rip),%ymm11        # 3f58 <_sk_callback_hsw+0x2e3>
+  .byte  196,98,125,24,29,10,33,0,0          // vbroadcastss  0x210a(%rip),%ymm11        # 4040 <_sk_callback_hsw+0x2f7>
   .byte  196,98,45,172,219                   // vfnmadd213ps  %ymm3,%ymm10,%ymm11
-  .byte  196,226,125,24,29,236,32,0,0        // vbroadcastss  0x20ec(%rip),%ymm3        # 3f5c <_sk_callback_hsw+0x2e7>
+  .byte  196,226,125,24,29,0,33,0,0          // vbroadcastss  0x2100(%rip),%ymm3        # 4044 <_sk_callback_hsw+0x2fb>
   .byte  196,193,100,92,218                  // vsubps        %ymm10,%ymm3,%ymm3
-  .byte  196,98,125,24,21,226,32,0,0         // vbroadcastss  0x20e2(%rip),%ymm10        # 3f60 <_sk_callback_hsw+0x2eb>
+  .byte  196,98,125,24,21,246,32,0,0         // vbroadcastss  0x20f6(%rip),%ymm10        # 4048 <_sk_callback_hsw+0x2ff>
   .byte  197,172,94,219                      // vdivps        %ymm3,%ymm10,%ymm3
   .byte  197,164,88,219                      // vaddps        %ymm3,%ymm11,%ymm3
-  .byte  196,98,125,24,21,213,32,0,0         // vbroadcastss  0x20d5(%rip),%ymm10        # 3f64 <_sk_callback_hsw+0x2ef>
+  .byte  196,98,125,24,21,233,32,0,0         // vbroadcastss  0x20e9(%rip),%ymm10        # 404c <_sk_callback_hsw+0x303>
   .byte  196,193,100,89,218                  // vmulps        %ymm10,%ymm3,%ymm3
   .byte  197,253,91,219                      // vcvtps2dq     %ymm3,%ymm3
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -9343,7 +9488,7 @@ _sk_parametric_a_hsw:
   .byte  196,195,101,74,217,128              // vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,100,95,216                  // vmaxps        %ymm8,%ymm3,%ymm3
-  .byte  196,98,125,24,5,172,32,0,0          // vbroadcastss  0x20ac(%rip),%ymm8        # 3f68 <_sk_callback_hsw+0x2f3>
+  .byte  196,98,125,24,5,192,32,0,0          // vbroadcastss  0x20c0(%rip),%ymm8        # 4050 <_sk_callback_hsw+0x307>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9352,26 +9497,26 @@ HIDDEN _sk_lab_to_xyz_hsw
 .globl _sk_lab_to_xyz_hsw
 FUNCTION(_sk_lab_to_xyz_hsw)
 _sk_lab_to_xyz_hsw:
-  .byte  196,98,125,24,5,158,32,0,0          // vbroadcastss  0x209e(%rip),%ymm8        # 3f6c <_sk_callback_hsw+0x2f7>
-  .byte  196,98,125,24,13,153,32,0,0         // vbroadcastss  0x2099(%rip),%ymm9        # 3f70 <_sk_callback_hsw+0x2fb>
-  .byte  196,98,125,24,21,148,32,0,0         // vbroadcastss  0x2094(%rip),%ymm10        # 3f74 <_sk_callback_hsw+0x2ff>
+  .byte  196,98,125,24,5,178,32,0,0          // vbroadcastss  0x20b2(%rip),%ymm8        # 4054 <_sk_callback_hsw+0x30b>
+  .byte  196,98,125,24,13,173,32,0,0         // vbroadcastss  0x20ad(%rip),%ymm9        # 4058 <_sk_callback_hsw+0x30f>
+  .byte  196,98,125,24,21,168,32,0,0         // vbroadcastss  0x20a8(%rip),%ymm10        # 405c <_sk_callback_hsw+0x313>
   .byte  196,194,53,168,202                  // vfmadd213ps   %ymm10,%ymm9,%ymm1
   .byte  196,194,53,168,210                  // vfmadd213ps   %ymm10,%ymm9,%ymm2
-  .byte  196,98,125,24,13,133,32,0,0         // vbroadcastss  0x2085(%rip),%ymm9        # 3f78 <_sk_callback_hsw+0x303>
+  .byte  196,98,125,24,13,153,32,0,0         // vbroadcastss  0x2099(%rip),%ymm9        # 4060 <_sk_callback_hsw+0x317>
   .byte  196,66,125,184,200                  // vfmadd231ps   %ymm8,%ymm0,%ymm9
-  .byte  196,226,125,24,5,123,32,0,0         // vbroadcastss  0x207b(%rip),%ymm0        # 3f7c <_sk_callback_hsw+0x307>
+  .byte  196,226,125,24,5,143,32,0,0         // vbroadcastss  0x208f(%rip),%ymm0        # 4064 <_sk_callback_hsw+0x31b>
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
-  .byte  196,98,125,24,5,114,32,0,0          // vbroadcastss  0x2072(%rip),%ymm8        # 3f80 <_sk_callback_hsw+0x30b>
+  .byte  196,98,125,24,5,134,32,0,0          // vbroadcastss  0x2086(%rip),%ymm8        # 4068 <_sk_callback_hsw+0x31f>
   .byte  196,98,117,168,192                  // vfmadd213ps   %ymm0,%ymm1,%ymm8
-  .byte  196,98,125,24,13,104,32,0,0         // vbroadcastss  0x2068(%rip),%ymm9        # 3f84 <_sk_callback_hsw+0x30f>
+  .byte  196,98,125,24,13,124,32,0,0         // vbroadcastss  0x207c(%rip),%ymm9        # 406c <_sk_callback_hsw+0x323>
   .byte  196,98,109,172,200                  // vfnmadd213ps  %ymm0,%ymm2,%ymm9
   .byte  196,193,60,89,200                   // vmulps        %ymm8,%ymm8,%ymm1
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
-  .byte  196,226,125,24,21,85,32,0,0         // vbroadcastss  0x2055(%rip),%ymm2        # 3f88 <_sk_callback_hsw+0x313>
+  .byte  196,226,125,24,21,105,32,0,0        // vbroadcastss  0x2069(%rip),%ymm2        # 4070 <_sk_callback_hsw+0x327>
   .byte  197,108,194,209,1                   // vcmpltps      %ymm1,%ymm2,%ymm10
-  .byte  196,98,125,24,29,75,32,0,0          // vbroadcastss  0x204b(%rip),%ymm11        # 3f8c <_sk_callback_hsw+0x317>
+  .byte  196,98,125,24,29,95,32,0,0          // vbroadcastss  0x205f(%rip),%ymm11        # 4074 <_sk_callback_hsw+0x32b>
   .byte  196,65,60,88,195                    // vaddps        %ymm11,%ymm8,%ymm8
-  .byte  196,98,125,24,37,65,32,0,0          // vbroadcastss  0x2041(%rip),%ymm12        # 3f90 <_sk_callback_hsw+0x31b>
+  .byte  196,98,125,24,37,85,32,0,0          // vbroadcastss  0x2055(%rip),%ymm12        # 4078 <_sk_callback_hsw+0x32f>
   .byte  196,65,60,89,196                    // vmulps        %ymm12,%ymm8,%ymm8
   .byte  196,99,61,74,193,160                // vblendvps     %ymm10,%ymm1,%ymm8,%ymm8
   .byte  197,252,89,200                      // vmulps        %ymm0,%ymm0,%ymm1
@@ -9386,9 +9531,9 @@ _sk_lab_to_xyz_hsw:
   .byte  196,65,52,88,203                    // vaddps        %ymm11,%ymm9,%ymm9
   .byte  196,65,52,89,204                    // vmulps        %ymm12,%ymm9,%ymm9
   .byte  196,227,53,74,208,32                // vblendvps     %ymm2,%ymm0,%ymm9,%ymm2
-  .byte  196,226,125,24,5,246,31,0,0         // vbroadcastss  0x1ff6(%rip),%ymm0        # 3f94 <_sk_callback_hsw+0x31f>
+  .byte  196,226,125,24,5,10,32,0,0          // vbroadcastss  0x200a(%rip),%ymm0        # 407c <_sk_callback_hsw+0x333>
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
-  .byte  196,98,125,24,5,237,31,0,0          // vbroadcastss  0x1fed(%rip),%ymm8        # 3f98 <_sk_callback_hsw+0x323>
+  .byte  196,98,125,24,5,1,32,0,0            // vbroadcastss  0x2001(%rip),%ymm8        # 4080 <_sk_callback_hsw+0x337>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9402,11 +9547,11 @@ _sk_load_a8_hsw:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,45                              // jne           1ff1 <_sk_load_a8_hsw+0x3d>
+  .byte  117,45                              // jne           20c5 <_sk_load_a8_hsw+0x3d>
   .byte  197,250,126,0                       // vmovq         (%rax),%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,194,31,0,0        // vbroadcastss  0x1fc2(%rip),%ymm1        # 3f9c <_sk_callback_hsw+0x327>
+  .byte  196,226,125,24,13,214,31,0,0        // vbroadcastss  0x1fd6(%rip),%ymm1        # 4084 <_sk_callback_hsw+0x33b>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -9423,9 +9568,9 @@ _sk_load_a8_hsw:
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           1ff9 <_sk_load_a8_hsw+0x45>
+  .byte  117,234                             // jne           20cd <_sk_load_a8_hsw+0x45>
   .byte  196,193,249,110,193                 // vmovq         %r9,%xmm0
-  .byte  235,178                             // jmp           1fc8 <_sk_load_a8_hsw+0x14>
+  .byte  235,178                             // jmp           209c <_sk_load_a8_hsw+0x14>
 
 HIDDEN _sk_gather_a8_hsw
 .globl _sk_gather_a8_hsw
@@ -9471,7 +9616,7 @@ _sk_gather_a8_hsw:
   .byte  196,227,121,32,192,7                // vpinsrb       $0x7,%eax,%xmm0,%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,205,30,0,0        // vbroadcastss  0x1ecd(%rip),%ymm1        # 3fa0 <_sk_callback_hsw+0x32b>
+  .byte  196,226,125,24,13,225,30,0,0        // vbroadcastss  0x1ee1(%rip),%ymm1        # 4088 <_sk_callback_hsw+0x33f>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -9489,14 +9634,14 @@ FUNCTION(_sk_store_a8_hsw)
 _sk_store_a8_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,168,30,0,0          // vbroadcastss  0x1ea8(%rip),%ymm8        # 3fa4 <_sk_callback_hsw+0x32f>
+  .byte  196,98,125,24,5,188,30,0,0          // vbroadcastss  0x1ebc(%rip),%ymm8        # 408c <_sk_callback_hsw+0x343>
   .byte  196,65,100,89,192                   // vmulps        %ymm8,%ymm3,%ymm8
   .byte  196,65,125,91,192                   // vcvtps2dq     %ymm8,%ymm8
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  196,65,57,103,192                   // vpackuswb     %xmm8,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           2125 <_sk_store_a8_hsw+0x37>
+  .byte  117,10                              // jne           21f9 <_sk_store_a8_hsw+0x37>
   .byte  196,65,123,17,4,58                  // vmovsd        %xmm8,(%r10,%rdi,1)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9504,10 +9649,10 @@ _sk_store_a8_hsw:
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            2121 <_sk_store_a8_hsw+0x33>
+  .byte  119,236                             // ja            21f5 <_sk_store_a8_hsw+0x33>
   .byte  196,66,121,48,192                   // vpmovzxbw     %xmm8,%xmm8
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,67,0,0,0                  // lea           0x43(%rip),%r9        # 2188 <_sk_store_a8_hsw+0x9a>
+  .byte  76,141,13,67,0,0,0                  // lea           0x43(%rip),%r9        # 225c <_sk_store_a8_hsw+0x9a>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9518,7 +9663,7 @@ _sk_store_a8_hsw:
   .byte  196,67,121,20,68,58,2,4             // vpextrb       $0x4,%xmm8,0x2(%r10,%rdi,1)
   .byte  196,67,121,20,68,58,1,2             // vpextrb       $0x2,%xmm8,0x1(%r10,%rdi,1)
   .byte  196,67,121,20,4,58,0                // vpextrb       $0x0,%xmm8,(%r10,%rdi,1)
-  .byte  235,154                             // jmp           2121 <_sk_store_a8_hsw+0x33>
+  .byte  235,154                             // jmp           21f5 <_sk_store_a8_hsw+0x33>
   .byte  144                                 // nop
   .byte  246,255                             // idiv          %bh
   .byte  255                                 // (bad)
@@ -9552,14 +9697,14 @@ _sk_load_g8_hsw:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,50                              // jne           21e6 <_sk_load_g8_hsw+0x42>
+  .byte  117,50                              // jne           22ba <_sk_load_g8_hsw+0x42>
   .byte  197,250,126,0                       // vmovq         (%rax),%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,222,29,0,0        // vbroadcastss  0x1dde(%rip),%ymm1        # 3fa8 <_sk_callback_hsw+0x333>
+  .byte  196,226,125,24,13,242,29,0,0        // vbroadcastss  0x1df2(%rip),%ymm1        # 4090 <_sk_callback_hsw+0x347>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,211,29,0,0        // vbroadcastss  0x1dd3(%rip),%ymm3        # 3fac <_sk_callback_hsw+0x337>
+  .byte  196,226,125,24,29,231,29,0,0        // vbroadcastss  0x1de7(%rip),%ymm3        # 4094 <_sk_callback_hsw+0x34b>
   .byte  76,137,193                          // mov           %r8,%rcx
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
@@ -9573,9 +9718,9 @@ _sk_load_g8_hsw:
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           21ee <_sk_load_g8_hsw+0x4a>
+  .byte  117,234                             // jne           22c2 <_sk_load_g8_hsw+0x4a>
   .byte  196,193,249,110,193                 // vmovq         %r9,%xmm0
-  .byte  235,173                             // jmp           21b8 <_sk_load_g8_hsw+0x14>
+  .byte  235,173                             // jmp           228c <_sk_load_g8_hsw+0x14>
 
 HIDDEN _sk_gather_g8_hsw
 .globl _sk_gather_g8_hsw
@@ -9621,10 +9766,10 @@ _sk_gather_g8_hsw:
   .byte  196,227,121,32,192,7                // vpinsrb       $0x7,%eax,%xmm0,%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,232,28,0,0        // vbroadcastss  0x1ce8(%rip),%ymm1        # 3fb0 <_sk_callback_hsw+0x33b>
+  .byte  196,226,125,24,13,252,28,0,0        // vbroadcastss  0x1cfc(%rip),%ymm1        # 4098 <_sk_callback_hsw+0x34f>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,221,28,0,0        // vbroadcastss  0x1cdd(%rip),%ymm3        # 3fb4 <_sk_callback_hsw+0x33f>
+  .byte  196,226,125,24,29,241,28,0,0        // vbroadcastss  0x1cf1(%rip),%ymm3        # 409c <_sk_callback_hsw+0x353>
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
   .byte  91                                  // pop           %rbx
@@ -9640,9 +9785,9 @@ _sk_gather_i8_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  73,137,192                          // mov           %rax,%r8
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  116,5                               // je            22f7 <_sk_gather_i8_hsw+0xf>
+  .byte  116,5                               // je            23cb <_sk_gather_i8_hsw+0xf>
   .byte  76,137,192                          // mov           %r8,%rax
-  .byte  235,2                               // jmp           22f9 <_sk_gather_i8_hsw+0x11>
+  .byte  235,2                               // jmp           23cd <_sk_gather_i8_hsw+0x11>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,87                               // push          %r15
   .byte  65,86                               // push          %r14
@@ -9680,14 +9825,14 @@ _sk_gather_i8_hsw:
   .byte  73,139,64,8                         // mov           0x8(%r8),%rax
   .byte  197,245,118,201                     // vpcmpeqd      %ymm1,%ymm1,%ymm1
   .byte  196,226,117,144,28,128              // vpgatherdd    %ymm1,(%rax,%ymm0,4),%ymm3
-  .byte  197,229,219,5,249,29,0,0            // vpand         0x1df9(%rip),%ymm3,%ymm0        # 41a0 <_sk_callback_hsw+0x52b>
+  .byte  197,229,219,5,5,30,0,0              // vpand         0x1e05(%rip),%ymm3,%ymm0        # 4280 <_sk_callback_hsw+0x537>
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,5,4,28,0,0            // vbroadcastss  0x1c04(%rip),%ymm8        # 3fb8 <_sk_callback_hsw+0x343>
+  .byte  196,98,125,24,5,24,28,0,0           // vbroadcastss  0x1c18(%rip),%ymm8        # 40a0 <_sk_callback_hsw+0x357>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,226,101,0,13,254,29,0,0         // vpshufb       0x1dfe(%rip),%ymm3,%ymm1        # 41c0 <_sk_callback_hsw+0x54b>
+  .byte  196,226,101,0,13,10,30,0,0          // vpshufb       0x1e0a(%rip),%ymm3,%ymm1        # 42a0 <_sk_callback_hsw+0x557>
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  196,226,101,0,21,12,30,0,0          // vpshufb       0x1e0c(%rip),%ymm3,%ymm2        # 41e0 <_sk_callback_hsw+0x56b>
+  .byte  196,226,101,0,21,24,30,0,0          // vpshufb       0x1e18(%rip),%ymm3,%ymm2        # 42c0 <_sk_callback_hsw+0x577>
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  197,229,114,211,24                  // vpsrld        $0x18,%ymm3,%ymm3
@@ -9708,35 +9853,35 @@ _sk_load_565_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,114                             // jne           2474 <_sk_load_565_hsw+0x7c>
+  .byte  117,114                             // jne           2548 <_sk_load_565_hsw+0x7c>
   .byte  196,193,122,111,4,122               // vmovdqu       (%r10,%rdi,2),%xmm0
   .byte  196,226,125,51,208                  // vpmovzxwd     %xmm0,%ymm2
-  .byte  196,226,125,88,5,166,27,0,0         // vpbroadcastd  0x1ba6(%rip),%ymm0        # 3fbc <_sk_callback_hsw+0x347>
+  .byte  196,226,125,88,5,186,27,0,0         // vpbroadcastd  0x1bba(%rip),%ymm0        # 40a4 <_sk_callback_hsw+0x35b>
   .byte  197,237,219,192                     // vpand         %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,153,27,0,0        // vbroadcastss  0x1b99(%rip),%ymm1        # 3fc0 <_sk_callback_hsw+0x34b>
+  .byte  196,226,125,24,13,173,27,0,0        // vbroadcastss  0x1bad(%rip),%ymm1        # 40a8 <_sk_callback_hsw+0x35f>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,88,13,144,27,0,0        // vpbroadcastd  0x1b90(%rip),%ymm1        # 3fc4 <_sk_callback_hsw+0x34f>
+  .byte  196,226,125,88,13,164,27,0,0        // vpbroadcastd  0x1ba4(%rip),%ymm1        # 40ac <_sk_callback_hsw+0x363>
   .byte  197,237,219,201                     // vpand         %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,131,27,0,0        // vbroadcastss  0x1b83(%rip),%ymm3        # 3fc8 <_sk_callback_hsw+0x353>
+  .byte  196,226,125,24,29,151,27,0,0        // vbroadcastss  0x1b97(%rip),%ymm3        # 40b0 <_sk_callback_hsw+0x367>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,88,29,122,27,0,0        // vpbroadcastd  0x1b7a(%rip),%ymm3        # 3fcc <_sk_callback_hsw+0x357>
+  .byte  196,226,125,88,29,142,27,0,0        // vpbroadcastd  0x1b8e(%rip),%ymm3        # 40b4 <_sk_callback_hsw+0x36b>
   .byte  197,237,219,211                     // vpand         %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,109,27,0,0        // vbroadcastss  0x1b6d(%rip),%ymm3        # 3fd0 <_sk_callback_hsw+0x35b>
+  .byte  196,226,125,24,29,129,27,0,0        // vbroadcastss  0x1b81(%rip),%ymm3        # 40b8 <_sk_callback_hsw+0x36f>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,98,27,0,0         // vbroadcastss  0x1b62(%rip),%ymm3        # 3fd4 <_sk_callback_hsw+0x35f>
+  .byte  196,226,125,24,29,118,27,0,0        // vbroadcastss  0x1b76(%rip),%ymm3        # 40bc <_sk_callback_hsw+0x373>
   .byte  255,224                             // jmpq          *%rax
   .byte  65,137,200                          // mov           %ecx,%r8d
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  197,249,239,192                     // vpxor         %xmm0,%xmm0,%xmm0
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,128                             // ja            2408 <_sk_load_565_hsw+0x10>
+  .byte  119,128                             // ja            24dc <_sk_load_565_hsw+0x10>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,73,0,0,0                  // lea           0x49(%rip),%r9        # 24dc <_sk_load_565_hsw+0xe4>
+  .byte  76,141,13,73,0,0,0                  // lea           0x49(%rip),%r9        # 25b0 <_sk_load_565_hsw+0xe4>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9748,7 +9893,7 @@ _sk_load_565_hsw:
   .byte  196,193,121,196,68,122,4,2          // vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,68,122,2,1          // vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,4,122,0             // vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  .byte  233,44,255,255,255                  // jmpq          2408 <_sk_load_565_hsw+0x10>
+  .byte  233,44,255,255,255                  // jmpq          24dc <_sk_load_565_hsw+0x10>
   .byte  244                                 // hlt
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
@@ -9818,23 +9963,23 @@ _sk_gather_565_hsw:
   .byte  65,15,183,4,88                      // movzwl        (%r8,%rbx,2),%eax
   .byte  197,249,196,192,7                   // vpinsrw       $0x7,%eax,%xmm0,%xmm0
   .byte  196,226,125,51,208                  // vpmovzxwd     %xmm0,%ymm2
-  .byte  196,226,125,88,5,37,26,0,0          // vpbroadcastd  0x1a25(%rip),%ymm0        # 3fd8 <_sk_callback_hsw+0x363>
+  .byte  196,226,125,88,5,57,26,0,0          // vpbroadcastd  0x1a39(%rip),%ymm0        # 40c0 <_sk_callback_hsw+0x377>
   .byte  197,237,219,192                     // vpand         %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,24,26,0,0         // vbroadcastss  0x1a18(%rip),%ymm1        # 3fdc <_sk_callback_hsw+0x367>
+  .byte  196,226,125,24,13,44,26,0,0         // vbroadcastss  0x1a2c(%rip),%ymm1        # 40c4 <_sk_callback_hsw+0x37b>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,88,13,15,26,0,0         // vpbroadcastd  0x1a0f(%rip),%ymm1        # 3fe0 <_sk_callback_hsw+0x36b>
+  .byte  196,226,125,88,13,35,26,0,0         // vpbroadcastd  0x1a23(%rip),%ymm1        # 40c8 <_sk_callback_hsw+0x37f>
   .byte  197,237,219,201                     // vpand         %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,2,26,0,0          // vbroadcastss  0x1a02(%rip),%ymm3        # 3fe4 <_sk_callback_hsw+0x36f>
+  .byte  196,226,125,24,29,22,26,0,0         // vbroadcastss  0x1a16(%rip),%ymm3        # 40cc <_sk_callback_hsw+0x383>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,88,29,249,25,0,0        // vpbroadcastd  0x19f9(%rip),%ymm3        # 3fe8 <_sk_callback_hsw+0x373>
+  .byte  196,226,125,88,29,13,26,0,0         // vpbroadcastd  0x1a0d(%rip),%ymm3        # 40d0 <_sk_callback_hsw+0x387>
   .byte  197,237,219,211                     // vpand         %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,236,25,0,0        // vbroadcastss  0x19ec(%rip),%ymm3        # 3fec <_sk_callback_hsw+0x377>
+  .byte  196,226,125,24,29,0,26,0,0          // vbroadcastss  0x1a00(%rip),%ymm3        # 40d4 <_sk_callback_hsw+0x38b>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,225,25,0,0        // vbroadcastss  0x19e1(%rip),%ymm3        # 3ff0 <_sk_callback_hsw+0x37b>
+  .byte  196,226,125,24,29,245,25,0,0        // vbroadcastss  0x19f5(%rip),%ymm3        # 40d8 <_sk_callback_hsw+0x38f>
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,94                               // pop           %r14
@@ -9847,11 +9992,11 @@ FUNCTION(_sk_store_565_hsw)
 _sk_store_565_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,206,25,0,0          // vbroadcastss  0x19ce(%rip),%ymm8        # 3ff4 <_sk_callback_hsw+0x37f>
+  .byte  196,98,125,24,5,226,25,0,0          // vbroadcastss  0x19e2(%rip),%ymm8        # 40dc <_sk_callback_hsw+0x393>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,53,114,241,11               // vpslld        $0xb,%ymm9,%ymm9
-  .byte  196,98,125,24,21,185,25,0,0         // vbroadcastss  0x19b9(%rip),%ymm10        # 3ff8 <_sk_callback_hsw+0x383>
+  .byte  196,98,125,24,21,205,25,0,0         // vbroadcastss  0x19cd(%rip),%ymm10        # 40e0 <_sk_callback_hsw+0x397>
   .byte  196,65,116,89,210                   // vmulps        %ymm10,%ymm1,%ymm10
   .byte  196,65,125,91,210                   // vcvtps2dq     %ymm10,%ymm10
   .byte  196,193,45,114,242,5                // vpslld        $0x5,%ymm10,%ymm10
@@ -9862,7 +10007,7 @@ _sk_store_565_hsw:
   .byte  196,67,125,57,193,1                 // vextracti128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           267d <_sk_store_565_hsw+0x65>
+  .byte  117,10                              // jne           2751 <_sk_store_565_hsw+0x65>
   .byte  196,65,122,127,4,122                // vmovdqu       %xmm8,(%r10,%rdi,2)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9870,9 +10015,9 @@ _sk_store_565_hsw:
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            2679 <_sk_store_565_hsw+0x61>
+  .byte  119,236                             // ja            274d <_sk_store_565_hsw+0x61>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,68,0,0,0                  // lea           0x44(%rip),%r9        # 26dc <_sk_store_565_hsw+0xc4>
+  .byte  76,141,13,68,0,0,0                  // lea           0x44(%rip),%r9        # 27b0 <_sk_store_565_hsw+0xc4>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9883,7 +10028,7 @@ _sk_store_565_hsw:
   .byte  196,67,121,21,68,122,4,2            // vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   .byte  196,67,121,21,68,122,2,1            // vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   .byte  196,67,121,21,4,122,0               // vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  .byte  235,159                             // jmp           2679 <_sk_store_565_hsw+0x61>
+  .byte  235,159                             // jmp           274d <_sk_store_565_hsw+0x61>
   .byte  102,144                             // xchg          %ax,%ax
   .byte  245                                 // cmc
   .byte  255                                 // (bad)
@@ -9916,28 +10061,28 @@ _sk_load_4444_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,138,0,0,0                    // jne           2790 <_sk_load_4444_hsw+0x98>
+  .byte  15,133,138,0,0,0                    // jne           2864 <_sk_load_4444_hsw+0x98>
   .byte  196,193,122,111,4,122               // vmovdqu       (%r10,%rdi,2),%xmm0
   .byte  196,226,125,51,216                  // vpmovzxwd     %xmm0,%ymm3
-  .byte  196,226,125,88,5,226,24,0,0         // vpbroadcastd  0x18e2(%rip),%ymm0        # 3ffc <_sk_callback_hsw+0x387>
+  .byte  196,226,125,88,5,246,24,0,0         // vpbroadcastd  0x18f6(%rip),%ymm0        # 40e4 <_sk_callback_hsw+0x39b>
   .byte  197,229,219,192                     // vpand         %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,213,24,0,0        // vbroadcastss  0x18d5(%rip),%ymm1        # 4000 <_sk_callback_hsw+0x38b>
+  .byte  196,226,125,24,13,233,24,0,0        // vbroadcastss  0x18e9(%rip),%ymm1        # 40e8 <_sk_callback_hsw+0x39f>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,88,13,204,24,0,0        // vpbroadcastd  0x18cc(%rip),%ymm1        # 4004 <_sk_callback_hsw+0x38f>
+  .byte  196,226,125,88,13,224,24,0,0        // vpbroadcastd  0x18e0(%rip),%ymm1        # 40ec <_sk_callback_hsw+0x3a3>
   .byte  197,229,219,201                     // vpand         %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,191,24,0,0        // vbroadcastss  0x18bf(%rip),%ymm2        # 4008 <_sk_callback_hsw+0x393>
+  .byte  196,226,125,24,21,211,24,0,0        // vbroadcastss  0x18d3(%rip),%ymm2        # 40f0 <_sk_callback_hsw+0x3a7>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,88,21,182,24,0,0        // vpbroadcastd  0x18b6(%rip),%ymm2        # 400c <_sk_callback_hsw+0x397>
+  .byte  196,226,125,88,21,202,24,0,0        // vpbroadcastd  0x18ca(%rip),%ymm2        # 40f4 <_sk_callback_hsw+0x3ab>
   .byte  197,229,219,210                     // vpand         %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,169,24,0,0          // vbroadcastss  0x18a9(%rip),%ymm8        # 4010 <_sk_callback_hsw+0x39b>
+  .byte  196,98,125,24,5,189,24,0,0          // vbroadcastss  0x18bd(%rip),%ymm8        # 40f8 <_sk_callback_hsw+0x3af>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,88,5,159,24,0,0          // vpbroadcastd  0x189f(%rip),%ymm8        # 4014 <_sk_callback_hsw+0x39f>
+  .byte  196,98,125,88,5,179,24,0,0          // vpbroadcastd  0x18b3(%rip),%ymm8        # 40fc <_sk_callback_hsw+0x3b3>
   .byte  196,193,101,219,216                 // vpand         %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,145,24,0,0          // vbroadcastss  0x1891(%rip),%ymm8        # 4018 <_sk_callback_hsw+0x3a3>
+  .byte  196,98,125,24,5,165,24,0,0          // vbroadcastss  0x18a5(%rip),%ymm8        # 4100 <_sk_callback_hsw+0x3b7>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9946,9 +10091,9 @@ _sk_load_4444_hsw:
   .byte  197,249,239,192                     // vpxor         %xmm0,%xmm0,%xmm0
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,100,255,255,255              // ja            270c <_sk_load_4444_hsw+0x14>
+  .byte  15,135,100,255,255,255              // ja            27e0 <_sk_load_4444_hsw+0x14>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,73,0,0,0                  // lea           0x49(%rip),%r9        # 27fc <_sk_load_4444_hsw+0x104>
+  .byte  76,141,13,73,0,0,0                  // lea           0x49(%rip),%r9        # 28d0 <_sk_load_4444_hsw+0x104>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -9960,7 +10105,7 @@ _sk_load_4444_hsw:
   .byte  196,193,121,196,68,122,4,2          // vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,68,122,2,1          // vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,4,122,0             // vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  .byte  233,16,255,255,255                  // jmpq          270c <_sk_load_4444_hsw+0x14>
+  .byte  233,16,255,255,255                  // jmpq          27e0 <_sk_load_4444_hsw+0x14>
   .byte  244                                 // hlt
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
@@ -10030,25 +10175,25 @@ _sk_gather_4444_hsw:
   .byte  65,15,183,4,88                      // movzwl        (%r8,%rbx,2),%eax
   .byte  197,249,196,192,7                   // vpinsrw       $0x7,%eax,%xmm0,%xmm0
   .byte  196,226,125,51,216                  // vpmovzxwd     %xmm0,%ymm3
-  .byte  196,226,125,88,5,73,23,0,0          // vpbroadcastd  0x1749(%rip),%ymm0        # 401c <_sk_callback_hsw+0x3a7>
+  .byte  196,226,125,88,5,93,23,0,0          // vpbroadcastd  0x175d(%rip),%ymm0        # 4104 <_sk_callback_hsw+0x3bb>
   .byte  197,229,219,192                     // vpand         %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,60,23,0,0         // vbroadcastss  0x173c(%rip),%ymm1        # 4020 <_sk_callback_hsw+0x3ab>
+  .byte  196,226,125,24,13,80,23,0,0         // vbroadcastss  0x1750(%rip),%ymm1        # 4108 <_sk_callback_hsw+0x3bf>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,88,13,51,23,0,0         // vpbroadcastd  0x1733(%rip),%ymm1        # 4024 <_sk_callback_hsw+0x3af>
+  .byte  196,226,125,88,13,71,23,0,0         // vpbroadcastd  0x1747(%rip),%ymm1        # 410c <_sk_callback_hsw+0x3c3>
   .byte  197,229,219,201                     // vpand         %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,38,23,0,0         // vbroadcastss  0x1726(%rip),%ymm2        # 4028 <_sk_callback_hsw+0x3b3>
+  .byte  196,226,125,24,21,58,23,0,0         // vbroadcastss  0x173a(%rip),%ymm2        # 4110 <_sk_callback_hsw+0x3c7>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,88,21,29,23,0,0         // vpbroadcastd  0x171d(%rip),%ymm2        # 402c <_sk_callback_hsw+0x3b7>
+  .byte  196,226,125,88,21,49,23,0,0         // vpbroadcastd  0x1731(%rip),%ymm2        # 4114 <_sk_callback_hsw+0x3cb>
   .byte  197,229,219,210                     // vpand         %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,16,23,0,0           // vbroadcastss  0x1710(%rip),%ymm8        # 4030 <_sk_callback_hsw+0x3bb>
+  .byte  196,98,125,24,5,36,23,0,0           // vbroadcastss  0x1724(%rip),%ymm8        # 4118 <_sk_callback_hsw+0x3cf>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,88,5,6,23,0,0            // vpbroadcastd  0x1706(%rip),%ymm8        # 4034 <_sk_callback_hsw+0x3bf>
+  .byte  196,98,125,88,5,26,23,0,0           // vpbroadcastd  0x171a(%rip),%ymm8        # 411c <_sk_callback_hsw+0x3d3>
   .byte  196,193,101,219,216                 // vpand         %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,248,22,0,0          // vbroadcastss  0x16f8(%rip),%ymm8        # 4038 <_sk_callback_hsw+0x3c3>
+  .byte  196,98,125,24,5,12,23,0,0           // vbroadcastss  0x170c(%rip),%ymm8        # 4120 <_sk_callback_hsw+0x3d7>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -10063,7 +10208,7 @@ FUNCTION(_sk_store_4444_hsw)
 _sk_store_4444_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,222,22,0,0          // vbroadcastss  0x16de(%rip),%ymm8        # 403c <_sk_callback_hsw+0x3c7>
+  .byte  196,98,125,24,5,242,22,0,0          // vbroadcastss  0x16f2(%rip),%ymm8        # 4124 <_sk_callback_hsw+0x3db>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,53,114,241,12               // vpslld        $0xc,%ymm9,%ymm9
@@ -10081,7 +10226,7 @@ _sk_store_4444_hsw:
   .byte  196,67,125,57,193,1                 // vextracti128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           29c1 <_sk_store_4444_hsw+0x71>
+  .byte  117,10                              // jne           2a95 <_sk_store_4444_hsw+0x71>
   .byte  196,65,122,127,4,122                // vmovdqu       %xmm8,(%r10,%rdi,2)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -10089,9 +10234,9 @@ _sk_store_4444_hsw:
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            29bd <_sk_store_4444_hsw+0x6d>
+  .byte  119,236                             // ja            2a91 <_sk_store_4444_hsw+0x6d>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,68,0,0,0                  // lea           0x44(%rip),%r9        # 2a20 <_sk_store_4444_hsw+0xd0>
+  .byte  76,141,13,68,0,0,0                  // lea           0x44(%rip),%r9        # 2af4 <_sk_store_4444_hsw+0xd0>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -10102,7 +10247,7 @@ _sk_store_4444_hsw:
   .byte  196,67,121,21,68,122,4,2            // vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   .byte  196,67,121,21,68,122,2,1            // vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   .byte  196,67,121,21,4,122,0               // vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  .byte  235,159                             // jmp           29bd <_sk_store_4444_hsw+0x6d>
+  .byte  235,159                             // jmp           2a91 <_sk_store_4444_hsw+0x6d>
   .byte  102,144                             // xchg          %ax,%ax
   .byte  245                                 // cmc
   .byte  255                                 // (bad)
@@ -10137,16 +10282,16 @@ _sk_load_8888_hsw:
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
   .byte  76,3,8                              // add           (%rax),%r9
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,88                              // jne           2aa9 <_sk_load_8888_hsw+0x6d>
+  .byte  117,88                              // jne           2b7d <_sk_load_8888_hsw+0x6d>
   .byte  196,193,126,111,25                  // vmovdqu       (%r9),%ymm3
-  .byte  197,229,219,5,162,23,0,0            // vpand         0x17a2(%rip),%ymm3,%ymm0        # 4200 <_sk_callback_hsw+0x58b>
+  .byte  197,229,219,5,174,23,0,0            // vpand         0x17ae(%rip),%ymm3,%ymm0        # 42e0 <_sk_callback_hsw+0x597>
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,5,213,21,0,0          // vbroadcastss  0x15d5(%rip),%ymm8        # 4040 <_sk_callback_hsw+0x3cb>
+  .byte  196,98,125,24,5,233,21,0,0          // vbroadcastss  0x15e9(%rip),%ymm8        # 4128 <_sk_callback_hsw+0x3df>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,226,101,0,13,167,23,0,0         // vpshufb       0x17a7(%rip),%ymm3,%ymm1        # 4220 <_sk_callback_hsw+0x5ab>
+  .byte  196,226,101,0,13,179,23,0,0         // vpshufb       0x17b3(%rip),%ymm3,%ymm1        # 4300 <_sk_callback_hsw+0x5b7>
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  196,226,101,0,21,181,23,0,0         // vpshufb       0x17b5(%rip),%ymm3,%ymm2        # 4240 <_sk_callback_hsw+0x5cb>
+  .byte  196,226,101,0,21,193,23,0,0         // vpshufb       0x17c1(%rip),%ymm3,%ymm2        # 4320 <_sk_callback_hsw+0x5d7>
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  197,229,114,211,24                  // vpsrld        $0x18,%ymm3,%ymm3
@@ -10163,7 +10308,7 @@ _sk_load_8888_hsw:
   .byte  196,225,249,110,192                 // vmovq         %rax,%xmm0
   .byte  196,226,125,33,192                  // vpmovsxbd     %xmm0,%ymm0
   .byte  196,194,125,140,25                  // vpmaskmovd    (%r9),%ymm0,%ymm3
-  .byte  235,135                             // jmp           2a56 <_sk_load_8888_hsw+0x1a>
+  .byte  235,135                             // jmp           2b2a <_sk_load_8888_hsw+0x1a>
 
 HIDDEN _sk_gather_8888_hsw
 .globl _sk_gather_8888_hsw
@@ -10178,14 +10323,14 @@ _sk_gather_8888_hsw:
   .byte  197,245,254,192                     // vpaddd        %ymm0,%ymm1,%ymm0
   .byte  197,245,118,201                     // vpcmpeqd      %ymm1,%ymm1,%ymm1
   .byte  196,194,117,144,28,128              // vpgatherdd    %ymm1,(%r8,%ymm0,4),%ymm3
-  .byte  197,229,219,5,99,23,0,0             // vpand         0x1763(%rip),%ymm3,%ymm0        # 4260 <_sk_callback_hsw+0x5eb>
+  .byte  197,229,219,5,111,23,0,0            // vpand         0x176f(%rip),%ymm3,%ymm0        # 4340 <_sk_callback_hsw+0x5f7>
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,5,58,21,0,0           // vbroadcastss  0x153a(%rip),%ymm8        # 4044 <_sk_callback_hsw+0x3cf>
+  .byte  196,98,125,24,5,78,21,0,0           // vbroadcastss  0x154e(%rip),%ymm8        # 412c <_sk_callback_hsw+0x3e3>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,226,101,0,13,104,23,0,0         // vpshufb       0x1768(%rip),%ymm3,%ymm1        # 4280 <_sk_callback_hsw+0x60b>
+  .byte  196,226,101,0,13,116,23,0,0         // vpshufb       0x1774(%rip),%ymm3,%ymm1        # 4360 <_sk_callback_hsw+0x617>
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  196,226,101,0,21,118,23,0,0         // vpshufb       0x1776(%rip),%ymm3,%ymm2        # 42a0 <_sk_callback_hsw+0x62b>
+  .byte  196,226,101,0,21,130,23,0,0         // vpshufb       0x1782(%rip),%ymm3,%ymm2        # 4380 <_sk_callback_hsw+0x637>
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  197,229,114,211,24                  // vpsrld        $0x18,%ymm3,%ymm3
@@ -10202,7 +10347,7 @@ _sk_store_8888_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
   .byte  76,3,8                              // add           (%rax),%r9
-  .byte  196,98,125,24,5,234,20,0,0          // vbroadcastss  0x14ea(%rip),%ymm8        # 4048 <_sk_callback_hsw+0x3d3>
+  .byte  196,98,125,24,5,254,20,0,0          // vbroadcastss  0x14fe(%rip),%ymm8        # 4130 <_sk_callback_hsw+0x3e7>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,65,116,89,208                   // vmulps        %ymm8,%ymm1,%ymm10
@@ -10218,7 +10363,7 @@ _sk_store_8888_hsw:
   .byte  196,65,45,235,192                   // vpor          %ymm8,%ymm10,%ymm8
   .byte  196,65,53,235,192                   // vpor          %ymm8,%ymm9,%ymm8
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,12                              // jne           2bb8 <_sk_store_8888_hsw+0x73>
+  .byte  117,12                              // jne           2c8c <_sk_store_8888_hsw+0x73>
   .byte  196,65,126,127,1                    // vmovdqu       %ymm8,(%r9)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,137,193                          // mov           %r8,%rcx
@@ -10231,7 +10376,7 @@ _sk_store_8888_hsw:
   .byte  196,97,249,110,200                  // vmovq         %rax,%xmm9
   .byte  196,66,125,33,201                   // vpmovsxbd     %xmm9,%ymm9
   .byte  196,66,53,142,1                     // vpmaskmovd    %ymm8,%ymm9,(%r9)
-  .byte  235,211                             // jmp           2bb1 <_sk_store_8888_hsw+0x6c>
+  .byte  235,211                             // jmp           2c85 <_sk_store_8888_hsw+0x6c>
 
 HIDDEN _sk_load_f16_hsw
 .globl _sk_load_f16_hsw
@@ -10240,7 +10385,7 @@ _sk_load_f16_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,97                              // jne           2c49 <_sk_load_f16_hsw+0x6b>
+  .byte  117,97                              // jne           2d1d <_sk_load_f16_hsw+0x6b>
   .byte  197,121,16,4,248                    // vmovupd       (%rax,%rdi,8),%xmm8
   .byte  197,249,16,84,248,16                // vmovupd       0x10(%rax,%rdi,8),%xmm2
   .byte  197,249,16,92,248,32                // vmovupd       0x20(%rax,%rdi,8),%xmm3
@@ -10266,29 +10411,29 @@ _sk_load_f16_hsw:
   .byte  197,123,16,4,248                    // vmovsd        (%rax,%rdi,8),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,79                              // je            2ca8 <_sk_load_f16_hsw+0xca>
+  .byte  116,79                              // je            2d7c <_sk_load_f16_hsw+0xca>
   .byte  197,57,22,68,248,8                  // vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,67                              // jb            2ca8 <_sk_load_f16_hsw+0xca>
+  .byte  114,67                              // jb            2d7c <_sk_load_f16_hsw+0xca>
   .byte  197,251,16,84,248,16                // vmovsd        0x10(%rax,%rdi,8),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,68                              // je            2cb5 <_sk_load_f16_hsw+0xd7>
+  .byte  116,68                              // je            2d89 <_sk_load_f16_hsw+0xd7>
   .byte  197,233,22,84,248,24                // vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,56                              // jb            2cb5 <_sk_load_f16_hsw+0xd7>
+  .byte  114,56                              // jb            2d89 <_sk_load_f16_hsw+0xd7>
   .byte  197,251,16,92,248,32                // vmovsd        0x20(%rax,%rdi,8),%xmm3
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,114,255,255,255              // je            2bff <_sk_load_f16_hsw+0x21>
+  .byte  15,132,114,255,255,255              // je            2cd3 <_sk_load_f16_hsw+0x21>
   .byte  197,225,22,92,248,40                // vmovhpd       0x28(%rax,%rdi,8),%xmm3,%xmm3
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,98,255,255,255               // jb            2bff <_sk_load_f16_hsw+0x21>
+  .byte  15,130,98,255,255,255               // jb            2cd3 <_sk_load_f16_hsw+0x21>
   .byte  197,122,126,76,248,48               // vmovq         0x30(%rax,%rdi,8),%xmm9
-  .byte  233,87,255,255,255                  // jmpq          2bff <_sk_load_f16_hsw+0x21>
+  .byte  233,87,255,255,255                  // jmpq          2cd3 <_sk_load_f16_hsw+0x21>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,74,255,255,255                  // jmpq          2bff <_sk_load_f16_hsw+0x21>
+  .byte  233,74,255,255,255                  // jmpq          2cd3 <_sk_load_f16_hsw+0x21>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
-  .byte  233,65,255,255,255                  // jmpq          2bff <_sk_load_f16_hsw+0x21>
+  .byte  233,65,255,255,255                  // jmpq          2cd3 <_sk_load_f16_hsw+0x21>
 
 HIDDEN _sk_gather_f16_hsw
 .globl _sk_gather_f16_hsw
@@ -10346,7 +10491,7 @@ _sk_store_f16_hsw:
   .byte  196,65,57,98,205                    // vpunpckldq    %xmm13,%xmm8,%xmm9
   .byte  196,65,57,106,197                   // vpunpckhdq    %xmm13,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,27                              // jne           2dad <_sk_store_f16_hsw+0x65>
+  .byte  117,27                              // jne           2e81 <_sk_store_f16_hsw+0x65>
   .byte  197,120,17,28,248                   // vmovups       %xmm11,(%rax,%rdi,8)
   .byte  197,120,17,84,248,16                // vmovups       %xmm10,0x10(%rax,%rdi,8)
   .byte  197,120,17,76,248,32                // vmovups       %xmm9,0x20(%rax,%rdi,8)
@@ -10355,22 +10500,22 @@ _sk_store_f16_hsw:
   .byte  255,224                             // jmpq          *%rax
   .byte  197,121,214,28,248                  // vmovq         %xmm11,(%rax,%rdi,8)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,241                             // je            2da9 <_sk_store_f16_hsw+0x61>
+  .byte  116,241                             // je            2e7d <_sk_store_f16_hsw+0x61>
   .byte  197,121,23,92,248,8                 // vmovhpd       %xmm11,0x8(%rax,%rdi,8)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,229                             // jb            2da9 <_sk_store_f16_hsw+0x61>
+  .byte  114,229                             // jb            2e7d <_sk_store_f16_hsw+0x61>
   .byte  197,121,214,84,248,16               // vmovq         %xmm10,0x10(%rax,%rdi,8)
-  .byte  116,221                             // je            2da9 <_sk_store_f16_hsw+0x61>
+  .byte  116,221                             // je            2e7d <_sk_store_f16_hsw+0x61>
   .byte  197,121,23,84,248,24                // vmovhpd       %xmm10,0x18(%rax,%rdi,8)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,209                             // jb            2da9 <_sk_store_f16_hsw+0x61>
+  .byte  114,209                             // jb            2e7d <_sk_store_f16_hsw+0x61>
   .byte  197,121,214,76,248,32               // vmovq         %xmm9,0x20(%rax,%rdi,8)
-  .byte  116,201                             // je            2da9 <_sk_store_f16_hsw+0x61>
+  .byte  116,201                             // je            2e7d <_sk_store_f16_hsw+0x61>
   .byte  197,121,23,76,248,40                // vmovhpd       %xmm9,0x28(%rax,%rdi,8)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,189                             // jb            2da9 <_sk_store_f16_hsw+0x61>
+  .byte  114,189                             // jb            2e7d <_sk_store_f16_hsw+0x61>
   .byte  197,121,214,68,248,48               // vmovq         %xmm8,0x30(%rax,%rdi,8)
-  .byte  235,181                             // jmp           2da9 <_sk_store_f16_hsw+0x61>
+  .byte  235,181                             // jmp           2e7d <_sk_store_f16_hsw+0x61>
 
 HIDDEN _sk_load_u16_be_hsw
 .globl _sk_load_u16_be_hsw
@@ -10380,7 +10525,7 @@ _sk_load_u16_be_hsw:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,189,0,0,0,0                // lea           0x0(,%rdi,4),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,204,0,0,0                    // jne           2ed6 <_sk_load_u16_be_hsw+0xe2>
+  .byte  15,133,204,0,0,0                    // jne           2faa <_sk_load_u16_be_hsw+0xe2>
   .byte  196,65,121,16,4,64                  // vmovupd       (%r8,%rax,2),%xmm8
   .byte  196,193,121,16,84,64,16             // vmovupd       0x10(%r8,%rax,2),%xmm2
   .byte  196,193,121,16,92,64,32             // vmovupd       0x20(%r8,%rax,2),%xmm3
@@ -10399,7 +10544,7 @@ _sk_load_u16_be_hsw:
   .byte  197,241,235,192                     // vpor          %xmm0,%xmm1,%xmm0
   .byte  196,226,125,51,192                  // vpmovzxwd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,21,225,17,0,0         // vbroadcastss  0x11e1(%rip),%ymm10        # 404c <_sk_callback_hsw+0x3d7>
+  .byte  196,98,125,24,21,245,17,0,0         // vbroadcastss  0x11f5(%rip),%ymm10        # 4134 <_sk_callback_hsw+0x3eb>
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  197,185,109,202                     // vpunpckhqdq   %xmm2,%xmm8,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -10427,29 +10572,29 @@ _sk_load_u16_be_hsw:
   .byte  196,65,123,16,4,64                  // vmovsd        (%r8,%rax,2),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,85                              // je            2f3c <_sk_load_u16_be_hsw+0x148>
+  .byte  116,85                              // je            3010 <_sk_load_u16_be_hsw+0x148>
   .byte  196,65,57,22,68,64,8                // vmovhpd       0x8(%r8,%rax,2),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,72                              // jb            2f3c <_sk_load_u16_be_hsw+0x148>
+  .byte  114,72                              // jb            3010 <_sk_load_u16_be_hsw+0x148>
   .byte  196,193,123,16,84,64,16             // vmovsd        0x10(%r8,%rax,2),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,72                              // je            2f49 <_sk_load_u16_be_hsw+0x155>
+  .byte  116,72                              // je            301d <_sk_load_u16_be_hsw+0x155>
   .byte  196,193,105,22,84,64,24             // vmovhpd       0x18(%r8,%rax,2),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,59                              // jb            2f49 <_sk_load_u16_be_hsw+0x155>
+  .byte  114,59                              // jb            301d <_sk_load_u16_be_hsw+0x155>
   .byte  196,193,123,16,92,64,32             // vmovsd        0x20(%r8,%rax,2),%xmm3
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,6,255,255,255                // je            2e25 <_sk_load_u16_be_hsw+0x31>
+  .byte  15,132,6,255,255,255                // je            2ef9 <_sk_load_u16_be_hsw+0x31>
   .byte  196,193,97,22,92,64,40              // vmovhpd       0x28(%r8,%rax,2),%xmm3,%xmm3
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,245,254,255,255              // jb            2e25 <_sk_load_u16_be_hsw+0x31>
+  .byte  15,130,245,254,255,255              // jb            2ef9 <_sk_load_u16_be_hsw+0x31>
   .byte  196,65,122,126,76,64,48             // vmovq         0x30(%r8,%rax,2),%xmm9
-  .byte  233,233,254,255,255                 // jmpq          2e25 <_sk_load_u16_be_hsw+0x31>
+  .byte  233,233,254,255,255                 // jmpq          2ef9 <_sk_load_u16_be_hsw+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,220,254,255,255                 // jmpq          2e25 <_sk_load_u16_be_hsw+0x31>
+  .byte  233,220,254,255,255                 // jmpq          2ef9 <_sk_load_u16_be_hsw+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
-  .byte  233,211,254,255,255                 // jmpq          2e25 <_sk_load_u16_be_hsw+0x31>
+  .byte  233,211,254,255,255                 // jmpq          2ef9 <_sk_load_u16_be_hsw+0x31>
 
 HIDDEN _sk_load_rgb_u16_be_hsw
 .globl _sk_load_rgb_u16_be_hsw
@@ -10459,7 +10604,7 @@ _sk_load_rgb_u16_be_hsw:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,127                        // lea           (%rdi,%rdi,2),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,204,0,0,0                    // jne           3030 <_sk_load_rgb_u16_be_hsw+0xde>
+  .byte  15,133,204,0,0,0                    // jne           3104 <_sk_load_rgb_u16_be_hsw+0xde>
   .byte  196,193,122,111,4,64                // vmovdqu       (%r8,%rax,2),%xmm0
   .byte  196,193,122,111,84,64,12            // vmovdqu       0xc(%r8,%rax,2),%xmm2
   .byte  196,193,122,111,76,64,24            // vmovdqu       0x18(%r8,%rax,2),%xmm1
@@ -10483,7 +10628,7 @@ _sk_load_rgb_u16_be_hsw:
   .byte  197,241,235,192                     // vpor          %xmm0,%xmm1,%xmm0
   .byte  196,226,125,51,192                  // vpmovzxwd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,21,114,16,0,0         // vbroadcastss  0x1072(%rip),%ymm10        # 4050 <_sk_callback_hsw+0x3db>
+  .byte  196,98,125,24,21,134,16,0,0         // vbroadcastss  0x1086(%rip),%ymm10        # 4138 <_sk_callback_hsw+0x3ef>
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  197,185,109,202                     // vpunpckhqdq   %xmm2,%xmm8,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -10500,41 +10645,41 @@ _sk_load_rgb_u16_be_hsw:
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,210                  // vmulps        %ymm10,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,38,16,0,0         // vbroadcastss  0x1026(%rip),%ymm3        # 4054 <_sk_callback_hsw+0x3df>
+  .byte  196,226,125,24,29,58,16,0,0         // vbroadcastss  0x103a(%rip),%ymm3        # 413c <_sk_callback_hsw+0x3f3>
   .byte  255,224                             // jmpq          *%rax
   .byte  196,193,121,110,4,64                // vmovd         (%r8,%rax,2),%xmm0
   .byte  196,193,121,196,68,64,4,2           // vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  117,5                               // jne           3049 <_sk_load_rgb_u16_be_hsw+0xf7>
-  .byte  233,79,255,255,255                  // jmpq          2f98 <_sk_load_rgb_u16_be_hsw+0x46>
+  .byte  117,5                               // jne           311d <_sk_load_rgb_u16_be_hsw+0xf7>
+  .byte  233,79,255,255,255                  // jmpq          306c <_sk_load_rgb_u16_be_hsw+0x46>
   .byte  196,193,121,110,76,64,6             // vmovd         0x6(%r8,%rax,2),%xmm1
   .byte  196,65,113,196,68,64,10,2           // vpinsrw       $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,26                              // jb            3078 <_sk_load_rgb_u16_be_hsw+0x126>
+  .byte  114,26                              // jb            314c <_sk_load_rgb_u16_be_hsw+0x126>
   .byte  196,193,121,110,76,64,12            // vmovd         0xc(%r8,%rax,2),%xmm1
   .byte  196,193,113,196,84,64,16,2          // vpinsrw       $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  117,10                              // jne           307d <_sk_load_rgb_u16_be_hsw+0x12b>
-  .byte  233,32,255,255,255                  // jmpq          2f98 <_sk_load_rgb_u16_be_hsw+0x46>
-  .byte  233,27,255,255,255                  // jmpq          2f98 <_sk_load_rgb_u16_be_hsw+0x46>
+  .byte  117,10                              // jne           3151 <_sk_load_rgb_u16_be_hsw+0x12b>
+  .byte  233,32,255,255,255                  // jmpq          306c <_sk_load_rgb_u16_be_hsw+0x46>
+  .byte  233,27,255,255,255                  // jmpq          306c <_sk_load_rgb_u16_be_hsw+0x46>
   .byte  196,193,121,110,76,64,18            // vmovd         0x12(%r8,%rax,2),%xmm1
   .byte  196,65,113,196,76,64,22,2           // vpinsrw       $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,26                              // jb            30ac <_sk_load_rgb_u16_be_hsw+0x15a>
+  .byte  114,26                              // jb            3180 <_sk_load_rgb_u16_be_hsw+0x15a>
   .byte  196,193,121,110,76,64,24            // vmovd         0x18(%r8,%rax,2),%xmm1
   .byte  196,193,113,196,76,64,28,2          // vpinsrw       $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  117,10                              // jne           30b1 <_sk_load_rgb_u16_be_hsw+0x15f>
-  .byte  233,236,254,255,255                 // jmpq          2f98 <_sk_load_rgb_u16_be_hsw+0x46>
-  .byte  233,231,254,255,255                 // jmpq          2f98 <_sk_load_rgb_u16_be_hsw+0x46>
+  .byte  117,10                              // jne           3185 <_sk_load_rgb_u16_be_hsw+0x15f>
+  .byte  233,236,254,255,255                 // jmpq          306c <_sk_load_rgb_u16_be_hsw+0x46>
+  .byte  233,231,254,255,255                 // jmpq          306c <_sk_load_rgb_u16_be_hsw+0x46>
   .byte  196,193,121,110,92,64,30            // vmovd         0x1e(%r8,%rax,2),%xmm3
   .byte  196,65,97,196,92,64,34,2            // vpinsrw       $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,20                              // jb            30da <_sk_load_rgb_u16_be_hsw+0x188>
+  .byte  114,20                              // jb            31ae <_sk_load_rgb_u16_be_hsw+0x188>
   .byte  196,193,121,110,92,64,36            // vmovd         0x24(%r8,%rax,2),%xmm3
   .byte  196,193,97,196,92,64,40,2           // vpinsrw       $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
-  .byte  233,190,254,255,255                 // jmpq          2f98 <_sk_load_rgb_u16_be_hsw+0x46>
-  .byte  233,185,254,255,255                 // jmpq          2f98 <_sk_load_rgb_u16_be_hsw+0x46>
+  .byte  233,190,254,255,255                 // jmpq          306c <_sk_load_rgb_u16_be_hsw+0x46>
+  .byte  233,185,254,255,255                 // jmpq          306c <_sk_load_rgb_u16_be_hsw+0x46>
 
 HIDDEN _sk_store_u16_be_hsw
 .globl _sk_store_u16_be_hsw
@@ -10543,7 +10688,7 @@ _sk_store_u16_be_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,189,0,0,0,0                // lea           0x0(,%rdi,4),%rax
-  .byte  196,98,125,24,5,99,15,0,0           // vbroadcastss  0xf63(%rip),%ymm8        # 4058 <_sk_callback_hsw+0x3e3>
+  .byte  196,98,125,24,5,119,15,0,0          // vbroadcastss  0xf77(%rip),%ymm8        # 4140 <_sk_callback_hsw+0x3f7>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,67,125,25,202,1                 // vextractf128  $0x1,%ymm9,%xmm10
@@ -10581,7 +10726,7 @@ _sk_store_u16_be_hsw:
   .byte  196,65,17,98,200                    // vpunpckldq    %xmm8,%xmm13,%xmm9
   .byte  196,65,17,106,192                   // vpunpckhdq    %xmm8,%xmm13,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,31                              // jne           31d9 <_sk_store_u16_be_hsw+0xfa>
+  .byte  117,31                              // jne           32ad <_sk_store_u16_be_hsw+0xfa>
   .byte  196,65,120,17,28,64                 // vmovups       %xmm11,(%r8,%rax,2)
   .byte  196,65,120,17,84,64,16              // vmovups       %xmm10,0x10(%r8,%rax,2)
   .byte  196,65,120,17,76,64,32              // vmovups       %xmm9,0x20(%r8,%rax,2)
@@ -10590,22 +10735,22 @@ _sk_store_u16_be_hsw:
   .byte  255,224                             // jmpq          *%rax
   .byte  196,65,121,214,28,64                // vmovq         %xmm11,(%r8,%rax,2)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,240                             // je            31d5 <_sk_store_u16_be_hsw+0xf6>
+  .byte  116,240                             // je            32a9 <_sk_store_u16_be_hsw+0xf6>
   .byte  196,65,121,23,92,64,8               // vmovhpd       %xmm11,0x8(%r8,%rax,2)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,227                             // jb            31d5 <_sk_store_u16_be_hsw+0xf6>
+  .byte  114,227                             // jb            32a9 <_sk_store_u16_be_hsw+0xf6>
   .byte  196,65,121,214,84,64,16             // vmovq         %xmm10,0x10(%r8,%rax,2)
-  .byte  116,218                             // je            31d5 <_sk_store_u16_be_hsw+0xf6>
+  .byte  116,218                             // je            32a9 <_sk_store_u16_be_hsw+0xf6>
   .byte  196,65,121,23,84,64,24              // vmovhpd       %xmm10,0x18(%r8,%rax,2)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,205                             // jb            31d5 <_sk_store_u16_be_hsw+0xf6>
+  .byte  114,205                             // jb            32a9 <_sk_store_u16_be_hsw+0xf6>
   .byte  196,65,121,214,76,64,32             // vmovq         %xmm9,0x20(%r8,%rax,2)
-  .byte  116,196                             // je            31d5 <_sk_store_u16_be_hsw+0xf6>
+  .byte  116,196                             // je            32a9 <_sk_store_u16_be_hsw+0xf6>
   .byte  196,65,121,23,76,64,40              // vmovhpd       %xmm9,0x28(%r8,%rax,2)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,183                             // jb            31d5 <_sk_store_u16_be_hsw+0xf6>
+  .byte  114,183                             // jb            32a9 <_sk_store_u16_be_hsw+0xf6>
   .byte  196,65,121,214,68,64,48             // vmovq         %xmm8,0x30(%r8,%rax,2)
-  .byte  235,174                             // jmp           31d5 <_sk_store_u16_be_hsw+0xf6>
+  .byte  235,174                             // jmp           32a9 <_sk_store_u16_be_hsw+0xf6>
 
 HIDDEN _sk_load_f32_hsw
 .globl _sk_load_f32_hsw
@@ -10613,10 +10758,10 @@ FUNCTION(_sk_load_f32_hsw)
 _sk_load_f32_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  119,110                             // ja            329d <_sk_load_f32_hsw+0x76>
+  .byte  119,110                             // ja            3371 <_sk_load_f32_hsw+0x76>
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
-  .byte  76,141,21,135,0,0,0                 // lea           0x87(%rip),%r10        # 32c8 <_sk_load_f32_hsw+0xa1>
+  .byte  76,141,21,135,0,0,0                 // lea           0x87(%rip),%r10        # 339c <_sk_load_f32_hsw+0xa1>
   .byte  73,99,4,138                         // movslq        (%r10,%rcx,4),%rax
   .byte  76,1,208                            // add           %r10,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -10677,7 +10822,7 @@ _sk_store_f32_hsw:
   .byte  196,65,37,20,196                    // vunpcklpd     %ymm12,%ymm11,%ymm8
   .byte  196,65,37,21,220                    // vunpckhpd     %ymm12,%ymm11,%ymm11
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,55                              // jne           3355 <_sk_store_f32_hsw+0x6d>
+  .byte  117,55                              // jne           3429 <_sk_store_f32_hsw+0x6d>
   .byte  196,67,45,24,225,1                  // vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
   .byte  196,67,61,24,235,1                  // vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
   .byte  196,67,45,6,201,49                  // vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
@@ -10690,22 +10835,22 @@ _sk_store_f32_hsw:
   .byte  255,224                             // jmpq          *%rax
   .byte  196,65,121,17,20,128                // vmovupd       %xmm10,(%r8,%rax,4)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,240                             // je            3351 <_sk_store_f32_hsw+0x69>
+  .byte  116,240                             // je            3425 <_sk_store_f32_hsw+0x69>
   .byte  196,65,121,17,76,128,16             // vmovupd       %xmm9,0x10(%r8,%rax,4)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,227                             // jb            3351 <_sk_store_f32_hsw+0x69>
+  .byte  114,227                             // jb            3425 <_sk_store_f32_hsw+0x69>
   .byte  196,65,121,17,68,128,32             // vmovupd       %xmm8,0x20(%r8,%rax,4)
-  .byte  116,218                             // je            3351 <_sk_store_f32_hsw+0x69>
+  .byte  116,218                             // je            3425 <_sk_store_f32_hsw+0x69>
   .byte  196,65,121,17,92,128,48             // vmovupd       %xmm11,0x30(%r8,%rax,4)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,205                             // jb            3351 <_sk_store_f32_hsw+0x69>
+  .byte  114,205                             // jb            3425 <_sk_store_f32_hsw+0x69>
   .byte  196,67,125,25,84,128,64,1           // vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
-  .byte  116,195                             // je            3351 <_sk_store_f32_hsw+0x69>
+  .byte  116,195                             // je            3425 <_sk_store_f32_hsw+0x69>
   .byte  196,67,125,25,76,128,80,1           // vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,181                             // jb            3351 <_sk_store_f32_hsw+0x69>
+  .byte  114,181                             // jb            3425 <_sk_store_f32_hsw+0x69>
   .byte  196,67,125,25,68,128,96,1           // vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
-  .byte  235,171                             // jmp           3351 <_sk_store_f32_hsw+0x69>
+  .byte  235,171                             // jmp           3425 <_sk_store_f32_hsw+0x69>
 
 HIDDEN _sk_clamp_x_hsw
 .globl _sk_clamp_x_hsw
@@ -10815,11 +10960,11 @@ HIDDEN _sk_luminance_to_alpha_hsw
 .globl _sk_luminance_to_alpha_hsw
 FUNCTION(_sk_luminance_to_alpha_hsw)
 _sk_luminance_to_alpha_hsw:
-  .byte  196,226,125,24,29,125,11,0,0        // vbroadcastss  0xb7d(%rip),%ymm3        # 405c <_sk_callback_hsw+0x3e7>
-  .byte  196,98,125,24,5,120,11,0,0          // vbroadcastss  0xb78(%rip),%ymm8        # 4060 <_sk_callback_hsw+0x3eb>
+  .byte  196,226,125,24,29,145,11,0,0        // vbroadcastss  0xb91(%rip),%ymm3        # 4144 <_sk_callback_hsw+0x3fb>
+  .byte  196,98,125,24,5,140,11,0,0          // vbroadcastss  0xb8c(%rip),%ymm8        # 4148 <_sk_callback_hsw+0x3ff>
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  196,226,125,184,203                 // vfmadd231ps   %ymm3,%ymm0,%ymm1
-  .byte  196,226,125,24,29,105,11,0,0        // vbroadcastss  0xb69(%rip),%ymm3        # 4064 <_sk_callback_hsw+0x3ef>
+  .byte  196,226,125,24,29,125,11,0,0        // vbroadcastss  0xb7d(%rip),%ymm3        # 414c <_sk_callback_hsw+0x403>
   .byte  196,226,109,168,217                 // vfmadd213ps   %ymm1,%ymm2,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -10964,7 +11109,7 @@ _sk_linear_gradient_hsw:
   .byte  196,98,125,24,72,28                 // vbroadcastss  0x1c(%rax),%ymm9
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  15,132,143,0,0,0                    // je            37d3 <_sk_linear_gradient_hsw+0xb5>
+  .byte  15,132,143,0,0,0                    // je            38a7 <_sk_linear_gradient_hsw+0xb5>
   .byte  72,139,64,8                         // mov           0x8(%rax),%rax
   .byte  72,131,192,32                       // add           $0x20,%rax
   .byte  196,65,28,87,228                    // vxorps        %ymm12,%ymm12,%ymm12
@@ -10991,8 +11136,8 @@ _sk_linear_gradient_hsw:
   .byte  196,67,13,74,201,208                // vblendvps     %ymm13,%ymm9,%ymm14,%ymm9
   .byte  72,131,192,36                       // add           $0x24,%rax
   .byte  73,255,200                          // dec           %r8
-  .byte  117,140                             // jne           375d <_sk_linear_gradient_hsw+0x3f>
-  .byte  235,17                              // jmp           37e4 <_sk_linear_gradient_hsw+0xc6>
+  .byte  117,140                             // jne           3831 <_sk_linear_gradient_hsw+0x3f>
+  .byte  235,17                              // jmp           38b8 <_sk_linear_gradient_hsw+0xc6>
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  197,236,87,210                      // vxorps        %ymm2,%ymm2,%ymm2
   .byte  197,228,87,219                      // vxorps        %ymm3,%ymm3,%ymm3
@@ -11039,24 +11184,24 @@ _sk_xy_to_polar_unit_hsw:
   .byte  196,65,52,95,226                    // vmaxps        %ymm10,%ymm9,%ymm12
   .byte  196,65,36,94,220                    // vdivps        %ymm12,%ymm11,%ymm11
   .byte  196,65,36,89,227                    // vmulps        %ymm11,%ymm11,%ymm12
-  .byte  196,98,125,24,45,233,7,0,0          // vbroadcastss  0x7e9(%rip),%ymm13        # 4068 <_sk_callback_hsw+0x3f3>
-  .byte  196,98,125,24,53,228,7,0,0          // vbroadcastss  0x7e4(%rip),%ymm14        # 406c <_sk_callback_hsw+0x3f7>
+  .byte  196,98,125,24,45,253,7,0,0          // vbroadcastss  0x7fd(%rip),%ymm13        # 4150 <_sk_callback_hsw+0x407>
+  .byte  196,98,125,24,53,248,7,0,0          // vbroadcastss  0x7f8(%rip),%ymm14        # 4154 <_sk_callback_hsw+0x40b>
   .byte  196,66,29,184,245                   // vfmadd231ps   %ymm13,%ymm12,%ymm14
-  .byte  196,98,125,24,45,218,7,0,0          // vbroadcastss  0x7da(%rip),%ymm13        # 4070 <_sk_callback_hsw+0x3fb>
+  .byte  196,98,125,24,45,238,7,0,0          // vbroadcastss  0x7ee(%rip),%ymm13        # 4158 <_sk_callback_hsw+0x40f>
   .byte  196,66,29,184,238                   // vfmadd231ps   %ymm14,%ymm12,%ymm13
-  .byte  196,98,125,24,53,208,7,0,0          // vbroadcastss  0x7d0(%rip),%ymm14        # 4074 <_sk_callback_hsw+0x3ff>
+  .byte  196,98,125,24,53,228,7,0,0          // vbroadcastss  0x7e4(%rip),%ymm14        # 415c <_sk_callback_hsw+0x413>
   .byte  196,66,29,184,245                   // vfmadd231ps   %ymm13,%ymm12,%ymm14
   .byte  196,65,36,89,222                    // vmulps        %ymm14,%ymm11,%ymm11
   .byte  196,65,52,194,202,1                 // vcmpltps      %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,187,7,0,0          // vbroadcastss  0x7bb(%rip),%ymm10        # 4078 <_sk_callback_hsw+0x403>
+  .byte  196,98,125,24,21,207,7,0,0          // vbroadcastss  0x7cf(%rip),%ymm10        # 4160 <_sk_callback_hsw+0x417>
   .byte  196,65,44,92,211                    // vsubps        %ymm11,%ymm10,%ymm10
   .byte  196,67,37,74,202,144                // vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   .byte  196,193,124,194,192,1               // vcmpltps      %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,21,165,7,0,0          // vbroadcastss  0x7a5(%rip),%ymm10        # 407c <_sk_callback_hsw+0x407>
+  .byte  196,98,125,24,21,185,7,0,0          // vbroadcastss  0x7b9(%rip),%ymm10        # 4164 <_sk_callback_hsw+0x41b>
   .byte  196,65,44,92,209                    // vsubps        %ymm9,%ymm10,%ymm10
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  196,65,116,194,200,1                // vcmpltps      %ymm8,%ymm1,%ymm9
-  .byte  196,98,125,24,21,143,7,0,0          // vbroadcastss  0x78f(%rip),%ymm10        # 4080 <_sk_callback_hsw+0x40b>
+  .byte  196,98,125,24,21,163,7,0,0          // vbroadcastss  0x7a3(%rip),%ymm10        # 4168 <_sk_callback_hsw+0x41f>
   .byte  197,44,92,208                       // vsubps        %ymm0,%ymm10,%ymm10
   .byte  196,195,125,74,194,144              // vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   .byte  196,65,124,194,200,3                // vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -11069,7 +11214,7 @@ HIDDEN _sk_save_xy_hsw
 FUNCTION(_sk_save_xy_hsw)
 _sk_save_xy_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,110,7,0,0           // vbroadcastss  0x76e(%rip),%ymm8        # 4084 <_sk_callback_hsw+0x40f>
+  .byte  196,98,125,24,5,130,7,0,0           // vbroadcastss  0x782(%rip),%ymm8        # 416c <_sk_callback_hsw+0x423>
   .byte  196,65,124,88,200                   // vaddps        %ymm8,%ymm0,%ymm9
   .byte  196,67,125,8,209,1                  // vroundps      $0x1,%ymm9,%ymm10
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
@@ -11103,9 +11248,9 @@ HIDDEN _sk_bilinear_nx_hsw
 FUNCTION(_sk_bilinear_nx_hsw)
 _sk_bilinear_nx_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,2,7,0,0            // vbroadcastss  0x702(%rip),%ymm0        # 4088 <_sk_callback_hsw+0x413>
+  .byte  196,226,125,24,5,22,7,0,0           // vbroadcastss  0x716(%rip),%ymm0        # 4170 <_sk_callback_hsw+0x427>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,249,6,0,0           // vbroadcastss  0x6f9(%rip),%ymm8        # 408c <_sk_callback_hsw+0x417>
+  .byte  196,98,125,24,5,13,7,0,0            // vbroadcastss  0x70d(%rip),%ymm8        # 4174 <_sk_callback_hsw+0x42b>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -11116,7 +11261,7 @@ HIDDEN _sk_bilinear_px_hsw
 FUNCTION(_sk_bilinear_px_hsw)
 _sk_bilinear_px_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,225,6,0,0          // vbroadcastss  0x6e1(%rip),%ymm0        # 4090 <_sk_callback_hsw+0x41b>
+  .byte  196,226,125,24,5,245,6,0,0          // vbroadcastss  0x6f5(%rip),%ymm0        # 4178 <_sk_callback_hsw+0x42f>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -11128,9 +11273,9 @@ HIDDEN _sk_bilinear_ny_hsw
 FUNCTION(_sk_bilinear_ny_hsw)
 _sk_bilinear_ny_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,197,6,0,0         // vbroadcastss  0x6c5(%rip),%ymm1        # 4094 <_sk_callback_hsw+0x41f>
+  .byte  196,226,125,24,13,217,6,0,0         // vbroadcastss  0x6d9(%rip),%ymm1        # 417c <_sk_callback_hsw+0x433>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,187,6,0,0           // vbroadcastss  0x6bb(%rip),%ymm8        # 4098 <_sk_callback_hsw+0x423>
+  .byte  196,98,125,24,5,207,6,0,0           // vbroadcastss  0x6cf(%rip),%ymm8        # 4180 <_sk_callback_hsw+0x437>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -11141,7 +11286,7 @@ HIDDEN _sk_bilinear_py_hsw
 FUNCTION(_sk_bilinear_py_hsw)
 _sk_bilinear_py_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,163,6,0,0         // vbroadcastss  0x6a3(%rip),%ymm1        # 409c <_sk_callback_hsw+0x427>
+  .byte  196,226,125,24,13,183,6,0,0         // vbroadcastss  0x6b7(%rip),%ymm1        # 4184 <_sk_callback_hsw+0x43b>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -11153,13 +11298,13 @@ HIDDEN _sk_bicubic_n3x_hsw
 FUNCTION(_sk_bicubic_n3x_hsw)
 _sk_bicubic_n3x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,134,6,0,0          // vbroadcastss  0x686(%rip),%ymm0        # 40a0 <_sk_callback_hsw+0x42b>
+  .byte  196,226,125,24,5,154,6,0,0          // vbroadcastss  0x69a(%rip),%ymm0        # 4188 <_sk_callback_hsw+0x43f>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,125,6,0,0           // vbroadcastss  0x67d(%rip),%ymm8        # 40a4 <_sk_callback_hsw+0x42f>
+  .byte  196,98,125,24,5,145,6,0,0           // vbroadcastss  0x691(%rip),%ymm8        # 418c <_sk_callback_hsw+0x443>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,110,6,0,0          // vbroadcastss  0x66e(%rip),%ymm10        # 40a8 <_sk_callback_hsw+0x433>
-  .byte  196,98,125,24,29,105,6,0,0          // vbroadcastss  0x669(%rip),%ymm11        # 40ac <_sk_callback_hsw+0x437>
+  .byte  196,98,125,24,21,130,6,0,0          // vbroadcastss  0x682(%rip),%ymm10        # 4190 <_sk_callback_hsw+0x447>
+  .byte  196,98,125,24,29,125,6,0,0          // vbroadcastss  0x67d(%rip),%ymm11        # 4194 <_sk_callback_hsw+0x44b>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,36,89,193                    // vmulps        %ymm9,%ymm11,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -11171,16 +11316,16 @@ HIDDEN _sk_bicubic_n1x_hsw
 FUNCTION(_sk_bicubic_n1x_hsw)
 _sk_bicubic_n1x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,76,6,0,0           // vbroadcastss  0x64c(%rip),%ymm0        # 40b0 <_sk_callback_hsw+0x43b>
+  .byte  196,226,125,24,5,96,6,0,0           // vbroadcastss  0x660(%rip),%ymm0        # 4198 <_sk_callback_hsw+0x44f>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,67,6,0,0            // vbroadcastss  0x643(%rip),%ymm8        # 40b4 <_sk_callback_hsw+0x43f>
+  .byte  196,98,125,24,5,87,6,0,0            // vbroadcastss  0x657(%rip),%ymm8        # 419c <_sk_callback_hsw+0x453>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,57,6,0,0           // vbroadcastss  0x639(%rip),%ymm9        # 40b8 <_sk_callback_hsw+0x443>
-  .byte  196,98,125,24,21,52,6,0,0           // vbroadcastss  0x634(%rip),%ymm10        # 40bc <_sk_callback_hsw+0x447>
+  .byte  196,98,125,24,13,77,6,0,0           // vbroadcastss  0x64d(%rip),%ymm9        # 41a0 <_sk_callback_hsw+0x457>
+  .byte  196,98,125,24,21,72,6,0,0           // vbroadcastss  0x648(%rip),%ymm10        # 41a4 <_sk_callback_hsw+0x45b>
   .byte  196,66,61,168,209                   // vfmadd213ps   %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,13,42,6,0,0           // vbroadcastss  0x62a(%rip),%ymm9        # 40c0 <_sk_callback_hsw+0x44b>
+  .byte  196,98,125,24,13,62,6,0,0           // vbroadcastss  0x63e(%rip),%ymm9        # 41a8 <_sk_callback_hsw+0x45f>
   .byte  196,66,61,184,202                   // vfmadd231ps   %ymm10,%ymm8,%ymm9
-  .byte  196,98,125,24,21,32,6,0,0           // vbroadcastss  0x620(%rip),%ymm10        # 40c4 <_sk_callback_hsw+0x44f>
+  .byte  196,98,125,24,21,52,6,0,0           // vbroadcastss  0x634(%rip),%ymm10        # 41ac <_sk_callback_hsw+0x463>
   .byte  196,66,61,184,209                   // vfmadd231ps   %ymm9,%ymm8,%ymm10
   .byte  197,124,17,144,128,0,0,0            // vmovups       %ymm10,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -11191,14 +11336,14 @@ HIDDEN _sk_bicubic_p1x_hsw
 FUNCTION(_sk_bicubic_p1x_hsw)
 _sk_bicubic_p1x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,8,6,0,0             // vbroadcastss  0x608(%rip),%ymm8        # 40c8 <_sk_callback_hsw+0x453>
+  .byte  196,98,125,24,5,28,6,0,0            // vbroadcastss  0x61c(%rip),%ymm8        # 41b0 <_sk_callback_hsw+0x467>
   .byte  197,188,88,0                        // vaddps        (%rax),%ymm8,%ymm0
   .byte  197,124,16,72,64                    // vmovups       0x40(%rax),%ymm9
-  .byte  196,98,125,24,21,250,5,0,0          // vbroadcastss  0x5fa(%rip),%ymm10        # 40cc <_sk_callback_hsw+0x457>
-  .byte  196,98,125,24,29,245,5,0,0          // vbroadcastss  0x5f5(%rip),%ymm11        # 40d0 <_sk_callback_hsw+0x45b>
+  .byte  196,98,125,24,21,14,6,0,0           // vbroadcastss  0x60e(%rip),%ymm10        # 41b4 <_sk_callback_hsw+0x46b>
+  .byte  196,98,125,24,29,9,6,0,0            // vbroadcastss  0x609(%rip),%ymm11        # 41b8 <_sk_callback_hsw+0x46f>
   .byte  196,66,53,168,218                   // vfmadd213ps   %ymm10,%ymm9,%ymm11
   .byte  196,66,53,168,216                   // vfmadd213ps   %ymm8,%ymm9,%ymm11
-  .byte  196,98,125,24,5,230,5,0,0           // vbroadcastss  0x5e6(%rip),%ymm8        # 40d4 <_sk_callback_hsw+0x45f>
+  .byte  196,98,125,24,5,250,5,0,0           // vbroadcastss  0x5fa(%rip),%ymm8        # 41bc <_sk_callback_hsw+0x473>
   .byte  196,66,53,184,195                   // vfmadd231ps   %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -11209,12 +11354,12 @@ HIDDEN _sk_bicubic_p3x_hsw
 FUNCTION(_sk_bicubic_p3x_hsw)
 _sk_bicubic_p3x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,206,5,0,0          // vbroadcastss  0x5ce(%rip),%ymm0        # 40d8 <_sk_callback_hsw+0x463>
+  .byte  196,226,125,24,5,226,5,0,0          // vbroadcastss  0x5e2(%rip),%ymm0        # 41c0 <_sk_callback_hsw+0x477>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,187,5,0,0          // vbroadcastss  0x5bb(%rip),%ymm10        # 40dc <_sk_callback_hsw+0x467>
-  .byte  196,98,125,24,29,182,5,0,0          // vbroadcastss  0x5b6(%rip),%ymm11        # 40e0 <_sk_callback_hsw+0x46b>
+  .byte  196,98,125,24,21,207,5,0,0          // vbroadcastss  0x5cf(%rip),%ymm10        # 41c4 <_sk_callback_hsw+0x47b>
+  .byte  196,98,125,24,29,202,5,0,0          // vbroadcastss  0x5ca(%rip),%ymm11        # 41c8 <_sk_callback_hsw+0x47f>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,52,89,195                    // vmulps        %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -11226,13 +11371,13 @@ HIDDEN _sk_bicubic_n3y_hsw
 FUNCTION(_sk_bicubic_n3y_hsw)
 _sk_bicubic_n3y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,153,5,0,0         // vbroadcastss  0x599(%rip),%ymm1        # 40e4 <_sk_callback_hsw+0x46f>
+  .byte  196,226,125,24,13,173,5,0,0         // vbroadcastss  0x5ad(%rip),%ymm1        # 41cc <_sk_callback_hsw+0x483>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,143,5,0,0           // vbroadcastss  0x58f(%rip),%ymm8        # 40e8 <_sk_callback_hsw+0x473>
+  .byte  196,98,125,24,5,163,5,0,0           // vbroadcastss  0x5a3(%rip),%ymm8        # 41d0 <_sk_callback_hsw+0x487>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,128,5,0,0          // vbroadcastss  0x580(%rip),%ymm10        # 40ec <_sk_callback_hsw+0x477>
-  .byte  196,98,125,24,29,123,5,0,0          // vbroadcastss  0x57b(%rip),%ymm11        # 40f0 <_sk_callback_hsw+0x47b>
+  .byte  196,98,125,24,21,148,5,0,0          // vbroadcastss  0x594(%rip),%ymm10        # 41d4 <_sk_callback_hsw+0x48b>
+  .byte  196,98,125,24,29,143,5,0,0          // vbroadcastss  0x58f(%rip),%ymm11        # 41d8 <_sk_callback_hsw+0x48f>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,36,89,193                    // vmulps        %ymm9,%ymm11,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -11244,16 +11389,16 @@ HIDDEN _sk_bicubic_n1y_hsw
 FUNCTION(_sk_bicubic_n1y_hsw)
 _sk_bicubic_n1y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,94,5,0,0          // vbroadcastss  0x55e(%rip),%ymm1        # 40f4 <_sk_callback_hsw+0x47f>
+  .byte  196,226,125,24,13,114,5,0,0         // vbroadcastss  0x572(%rip),%ymm1        # 41dc <_sk_callback_hsw+0x493>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,84,5,0,0            // vbroadcastss  0x554(%rip),%ymm8        # 40f8 <_sk_callback_hsw+0x483>
+  .byte  196,98,125,24,5,104,5,0,0           // vbroadcastss  0x568(%rip),%ymm8        # 41e0 <_sk_callback_hsw+0x497>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,74,5,0,0           // vbroadcastss  0x54a(%rip),%ymm9        # 40fc <_sk_callback_hsw+0x487>
-  .byte  196,98,125,24,21,69,5,0,0           // vbroadcastss  0x545(%rip),%ymm10        # 4100 <_sk_callback_hsw+0x48b>
+  .byte  196,98,125,24,13,94,5,0,0           // vbroadcastss  0x55e(%rip),%ymm9        # 41e4 <_sk_callback_hsw+0x49b>
+  .byte  196,98,125,24,21,89,5,0,0           // vbroadcastss  0x559(%rip),%ymm10        # 41e8 <_sk_callback_hsw+0x49f>
   .byte  196,66,61,168,209                   // vfmadd213ps   %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,13,59,5,0,0           // vbroadcastss  0x53b(%rip),%ymm9        # 4104 <_sk_callback_hsw+0x48f>
+  .byte  196,98,125,24,13,79,5,0,0           // vbroadcastss  0x54f(%rip),%ymm9        # 41ec <_sk_callback_hsw+0x4a3>
   .byte  196,66,61,184,202                   // vfmadd231ps   %ymm10,%ymm8,%ymm9
-  .byte  196,98,125,24,21,49,5,0,0           // vbroadcastss  0x531(%rip),%ymm10        # 4108 <_sk_callback_hsw+0x493>
+  .byte  196,98,125,24,21,69,5,0,0           // vbroadcastss  0x545(%rip),%ymm10        # 41f0 <_sk_callback_hsw+0x4a7>
   .byte  196,66,61,184,209                   // vfmadd231ps   %ymm9,%ymm8,%ymm10
   .byte  197,124,17,144,160,0,0,0            // vmovups       %ymm10,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -11264,14 +11409,14 @@ HIDDEN _sk_bicubic_p1y_hsw
 FUNCTION(_sk_bicubic_p1y_hsw)
 _sk_bicubic_p1y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,25,5,0,0            // vbroadcastss  0x519(%rip),%ymm8        # 410c <_sk_callback_hsw+0x497>
+  .byte  196,98,125,24,5,45,5,0,0            // vbroadcastss  0x52d(%rip),%ymm8        # 41f4 <_sk_callback_hsw+0x4ab>
   .byte  197,188,88,72,32                    // vaddps        0x20(%rax),%ymm8,%ymm1
   .byte  197,124,16,72,96                    // vmovups       0x60(%rax),%ymm9
-  .byte  196,98,125,24,21,10,5,0,0           // vbroadcastss  0x50a(%rip),%ymm10        # 4110 <_sk_callback_hsw+0x49b>
-  .byte  196,98,125,24,29,5,5,0,0            // vbroadcastss  0x505(%rip),%ymm11        # 4114 <_sk_callback_hsw+0x49f>
+  .byte  196,98,125,24,21,30,5,0,0           // vbroadcastss  0x51e(%rip),%ymm10        # 41f8 <_sk_callback_hsw+0x4af>
+  .byte  196,98,125,24,29,25,5,0,0           // vbroadcastss  0x519(%rip),%ymm11        # 41fc <_sk_callback_hsw+0x4b3>
   .byte  196,66,53,168,218                   // vfmadd213ps   %ymm10,%ymm9,%ymm11
   .byte  196,66,53,168,216                   // vfmadd213ps   %ymm8,%ymm9,%ymm11
-  .byte  196,98,125,24,5,246,4,0,0           // vbroadcastss  0x4f6(%rip),%ymm8        # 4118 <_sk_callback_hsw+0x4a3>
+  .byte  196,98,125,24,5,10,5,0,0            // vbroadcastss  0x50a(%rip),%ymm8        # 4200 <_sk_callback_hsw+0x4b7>
   .byte  196,66,53,184,195                   // vfmadd231ps   %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -11282,12 +11427,12 @@ HIDDEN _sk_bicubic_p3y_hsw
 FUNCTION(_sk_bicubic_p3y_hsw)
 _sk_bicubic_p3y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,222,4,0,0         // vbroadcastss  0x4de(%rip),%ymm1        # 411c <_sk_callback_hsw+0x4a7>
+  .byte  196,226,125,24,13,242,4,0,0         // vbroadcastss  0x4f2(%rip),%ymm1        # 4204 <_sk_callback_hsw+0x4bb>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,202,4,0,0          // vbroadcastss  0x4ca(%rip),%ymm10        # 4120 <_sk_callback_hsw+0x4ab>
-  .byte  196,98,125,24,29,197,4,0,0          // vbroadcastss  0x4c5(%rip),%ymm11        # 4124 <_sk_callback_hsw+0x4af>
+  .byte  196,98,125,24,21,222,4,0,0          // vbroadcastss  0x4de(%rip),%ymm10        # 4208 <_sk_callback_hsw+0x4bf>
+  .byte  196,98,125,24,29,217,4,0,0          // vbroadcastss  0x4d9(%rip),%ymm11        # 420c <_sk_callback_hsw+0x4c3>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,52,89,195                    // vmulps        %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -11374,9 +11519,17 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  128,63,0                            // cmpb          $0x0,(%rdi)
-  .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
-  .byte  63                                  // (bad)
+  .byte  128,63,1                            // cmpb          $0x1,(%rdi)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,4,0                               // add           %al,(%rax,%rax,1)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  2,0                                 // add           (%rax),%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  33,8                                // and           %ecx,(%rax)
+  .byte  130                                 // (bad)
+  .byte  60,0                                // cmp           $0x0,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  191,0,0,128,63                      // mov           $0x3f800000,%edi
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -11398,11 +11551,13 @@ BALIGN4
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
   .byte  63                                  // (bad)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  128,191,0,0,224,64,0                // cmpb          $0x0,0x40e00000(%rdi)
+  .byte  128,63,0                            // cmpb          $0x0,(%rdi)
+  .byte  0,128,191,0,0,224                   // add           %al,-0x1fffff41(%rax)
+  .byte  64,0,0                              // add           %al,(%rax)
+  .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
   .byte  63                                  // (bad)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  128,63,145                          // cmpb          $0x91,(%rdi)
+  .byte  145                                 // xchg          %eax,%ecx
   .byte  131,158,61,92,143,50,63             // sbbl          $0x3f,0x328f5c3d(%rsi)
   .byte  154                                 // (bad)
   .byte  153                                 // cltd
@@ -11446,7 +11601,7 @@ BALIGN4
   .byte  190,129,128,128,59                  // mov           $0x3b808081,%esi
   .byte  129,128,128,59,0,248,0,0,8,33       // addl          $0x21080000,-0x7ffc480(%rax)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        3e79 <.literal4+0xdd>
+  .byte  224,7                               // loopne        3f61 <.literal4+0xf1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -11460,10 +11615,10 @@ BALIGN4
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
   .byte  0,52,255                            // add           %dh,(%rdi,%rdi,8)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3ea4 <.literal4+0x108>
+  .byte  127,0                               // jg            3f8c <.literal4+0x11c>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            3f1d <.literal4+0x181>
+  .byte  119,115                             // ja            4005 <.literal4+0x195>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -11477,10 +11632,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3ed8 <.literal4+0x13c>
+  .byte  127,0                               // jg            3fc0 <.literal4+0x150>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            3f51 <.literal4+0x1b5>
+  .byte  119,115                             // ja            4039 <.literal4+0x1c9>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -11494,10 +11649,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3f0c <.literal4+0x170>
+  .byte  127,0                               // jg            3ff4 <.literal4+0x184>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            3f85 <.literal4+0x1e9>
+  .byte  119,115                             // ja            406d <.literal4+0x1fd>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -11511,10 +11666,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3f40 <.literal4+0x1a4>
+  .byte  127,0                               // jg            4028 <.literal4+0x1b8>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            3fb9 <.literal4+0x21d>
+  .byte  119,115                             // ja            40a1 <.literal4+0x231>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -11527,7 +11682,7 @@ BALIGN4
   .byte  0,75,0                              // add           %cl,0x0(%rbx)
   .byte  0,128,63,0,0,200                    // add           %al,-0x37ffffc1(%rax)
   .byte  66,0,0                              // rex.X         add %al,(%rax)
-  .byte  127,67                              // jg            3fb7 <.literal4+0x21b>
+  .byte  127,67                              // jg            409f <.literal4+0x22f>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,195                               // add           %al,%bl
   .byte  0,0                                 // add           %al,(%rax)
@@ -11539,10 +11694,10 @@ BALIGN4
   .byte  190,80,128,3,62                     // mov           $0x3e038050,%esi
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           3fd7 <.literal4+0x23b>
+  .byte  118,63                              // jbe           40bf <.literal4+0x24f>
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            3feb <.literal4+0x24f>
+  .byte  127,67                              // jg            40d3 <.literal4+0x263>
   .byte  129,128,128,59,0,0,128,63,129,128   // addl          $0x80813f80,0x3b80(%rax)
   .byte  128,59,0                            // cmpb          $0x0,(%rbx)
   .byte  0,128,63,129,128,128                // add           %al,-0x7f7f7ec1(%rax)
@@ -11551,7 +11706,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        3fcd <.literal4+0x231>
+  .byte  224,7                               // loopne        40b5 <.literal4+0x245>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -11563,7 +11718,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        3fe9 <.literal4+0x24d>
+  .byte  224,7                               // loopne        40d1 <.literal4+0x261>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -11574,7 +11729,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  248                                 // clc
   .byte  65,0,0                              // add           %al,(%r8)
-  .byte  124,66                              // jl            403e <.literal4+0x2a2>
+  .byte  124,66                              // jl            4126 <.literal4+0x2b6>
   .byte  0,240                               // add           %dh,%al
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,55,0,15                 // mov           %ecx,0xf003788(%rax)
@@ -11592,9 +11747,9 @@ BALIGN4
   .byte  137,136,136,59,15,0                 // mov           %ecx,0xf3b88(%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,61,0,0                  // mov           %ecx,0x3d88(%rax)
-  .byte  112,65                              // jo            4081 <.literal4+0x2e5>
+  .byte  112,65                              // jo            4169 <.literal4+0x2f9>
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            408f <.literal4+0x2f3>
+  .byte  127,67                              // jg            4177 <.literal4+0x307>
   .byte  128,0,128                           // addb          $0x80,(%rax)
   .byte  55                                  // (bad)
   .byte  128,0,128                           // addb          $0x80,(%rax)
@@ -11602,7 +11757,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  255                                 // (bad)
-  .byte  127,71                              // jg            40a3 <.literal4+0x307>
+  .byte  127,71                              // jg            418b <.literal4+0x31b>
   .byte  208                                 // (bad)
   .byte  179,89                              // mov           $0x59,%bl
   .byte  62,89                               // ds            pop %rcx
@@ -11699,16 +11854,16 @@ BALIGN32
   .byte  0,0                                 // add           %al,(%rax)
   .byte  1,255                               // add           %edi,%edi
   .byte  255                                 // (bad)
-  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004168 <_sk_callback_hsw+0xa0004f3>
+  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004248 <_sk_callback_hsw+0xa0004ff>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004170 <_sk_callback_hsw+0x120004fb>
+  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004250 <_sk_callback_hsw+0x12000507>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004178 <_sk_callback_hsw+0x1a000503>
+  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004258 <_sk_callback_hsw+0x1a00050f>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004180 <_sk_callback_hsw+0x300050b>
+  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004260 <_sk_callback_hsw+0x3000517>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -11751,16 +11906,16 @@ BALIGN32
   .byte  0,0                                 // add           %al,(%rax)
   .byte  1,255                               // add           %edi,%edi
   .byte  255                                 // (bad)
-  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a0041c8 <_sk_callback_hsw+0xa000553>
+  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a0042a8 <_sk_callback_hsw+0xa00055f>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 120041d0 <_sk_callback_hsw+0x1200055b>
+  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 120042b0 <_sk_callback_hsw+0x12000567>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a0041d8 <_sk_callback_hsw+0x1a000563>
+  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a0042b8 <_sk_callback_hsw+0x1a00056f>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 30041e0 <_sk_callback_hsw+0x300056b>
+  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 30042c0 <_sk_callback_hsw+0x3000577>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -11803,16 +11958,16 @@ BALIGN32
   .byte  0,0                                 // add           %al,(%rax)
   .byte  1,255                               // add           %edi,%edi
   .byte  255                                 // (bad)
-  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004228 <_sk_callback_hsw+0xa0005b3>
+  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004308 <_sk_callback_hsw+0xa0005bf>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004230 <_sk_callback_hsw+0x120005bb>
+  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004310 <_sk_callback_hsw+0x120005c7>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004238 <_sk_callback_hsw+0x1a0005c3>
+  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004318 <_sk_callback_hsw+0x1a0005cf>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004240 <_sk_callback_hsw+0x30005cb>
+  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004320 <_sk_callback_hsw+0x30005d7>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -11855,16 +12010,16 @@ BALIGN32
   .byte  0,0                                 // add           %al,(%rax)
   .byte  1,255                               // add           %edi,%edi
   .byte  255                                 // (bad)
-  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004288 <_sk_callback_hsw+0xa000613>
+  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004368 <_sk_callback_hsw+0xa00061f>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004290 <_sk_callback_hsw+0x1200061b>
+  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004370 <_sk_callback_hsw+0x12000627>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004298 <_sk_callback_hsw+0x1a000623>
+  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004378 <_sk_callback_hsw+0x1a00062f>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 30042a0 <_sk_callback_hsw+0x300062b>
+  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004380 <_sk_callback_hsw+0x3000637>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -11985,14 +12140,14 @@ _sk_seed_shader_avx:
   .byte  197,249,112,192,0                   // vpshufd       $0x0,%xmm0,%xmm0
   .byte  196,227,125,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,131,83,0,0        // vbroadcastss  0x5383(%rip),%ymm1        # 544c <_sk_callback_avx+0x125>
+  .byte  196,226,125,24,13,207,84,0,0        // vbroadcastss  0x54cf(%rip),%ymm1        # 5598 <_sk_callback_avx+0x125>
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
   .byte  197,252,88,2                        // vaddps        (%rdx),%ymm0,%ymm0
   .byte  196,226,125,24,16                   // vbroadcastss  (%rax),%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  197,236,88,201                      // vaddps        %ymm1,%ymm2,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,21,103,83,0,0        // vbroadcastss  0x5367(%rip),%ymm2        # 5450 <_sk_callback_avx+0x129>
+  .byte  196,226,125,24,21,179,84,0,0        // vbroadcastss  0x54b3(%rip),%ymm2        # 559c <_sk_callback_avx+0x129>
   .byte  197,228,87,219                      // vxorps        %ymm3,%ymm3,%ymm3
   .byte  197,220,87,228                      // vxorps        %ymm4,%ymm4,%ymm4
   .byte  197,212,87,237                      // vxorps        %ymm5,%ymm5,%ymm5
@@ -12000,6 +12155,72 @@ _sk_seed_shader_avx:
   .byte  197,196,87,255                      // vxorps        %ymm7,%ymm7,%ymm7
   .byte  255,224                             // jmpq          *%rax
 
+HIDDEN _sk_dither_avx
+.globl _sk_dither_avx
+FUNCTION(_sk_dither_avx)
+_sk_dither_avx:
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  197,121,110,199                     // vmovd         %edi,%xmm8
+  .byte  196,65,121,112,192,0                // vpshufd       $0x0,%xmm8,%xmm8
+  .byte  196,67,61,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
+  .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
+  .byte  197,60,88,2                         // vaddps        (%rdx),%ymm8,%ymm8
+  .byte  196,65,126,91,192                   // vcvttps2dq    %ymm8,%ymm8
+  .byte  76,139,0                            // mov           (%rax),%r8
+  .byte  196,66,125,24,8                     // vbroadcastss  (%r8),%ymm9
+  .byte  196,65,60,87,209                    // vxorps        %ymm9,%ymm8,%ymm10
+  .byte  196,98,125,24,29,107,84,0,0         // vbroadcastss  0x546b(%rip),%ymm11        # 55a0 <_sk_callback_avx+0x12d>
+  .byte  196,65,44,84,203                    // vandps        %ymm11,%ymm10,%ymm9
+  .byte  196,193,25,114,241,5                // vpslld        $0x5,%xmm9,%xmm12
+  .byte  196,67,125,25,201,1                 // vextractf128  $0x1,%ymm9,%xmm9
+  .byte  196,193,49,114,241,5                // vpslld        $0x5,%xmm9,%xmm9
+  .byte  196,67,29,24,201,1                  // vinsertf128   $0x1,%xmm9,%ymm12,%ymm9
+  .byte  196,65,60,84,219                    // vandps        %ymm11,%ymm8,%ymm11
+  .byte  196,193,25,114,243,4                // vpslld        $0x4,%xmm11,%xmm12
+  .byte  196,67,125,25,219,1                 // vextractf128  $0x1,%ymm11,%xmm11
+  .byte  196,193,33,114,243,4                // vpslld        $0x4,%xmm11,%xmm11
+  .byte  196,67,29,24,219,1                  // vinsertf128   $0x1,%xmm11,%ymm12,%ymm11
+  .byte  196,98,125,24,37,44,84,0,0          // vbroadcastss  0x542c(%rip),%ymm12        # 55a4 <_sk_callback_avx+0x131>
+  .byte  196,98,125,24,45,39,84,0,0          // vbroadcastss  0x5427(%rip),%ymm13        # 55a8 <_sk_callback_avx+0x135>
+  .byte  196,65,44,84,245                    // vandps        %ymm13,%ymm10,%ymm14
+  .byte  196,193,1,114,246,2                 // vpslld        $0x2,%xmm14,%xmm15
+  .byte  196,67,125,25,246,1                 // vextractf128  $0x1,%ymm14,%xmm14
+  .byte  196,193,9,114,246,2                 // vpslld        $0x2,%xmm14,%xmm14
+  .byte  196,67,5,24,246,1                   // vinsertf128   $0x1,%xmm14,%ymm15,%ymm14
+  .byte  196,65,60,84,237                    // vandps        %ymm13,%ymm8,%ymm13
+  .byte  196,65,17,254,253                   // vpaddd        %xmm13,%xmm13,%xmm15
+  .byte  196,67,125,25,237,1                 // vextractf128  $0x1,%ymm13,%xmm13
+  .byte  196,65,17,254,237                   // vpaddd        %xmm13,%xmm13,%xmm13
+  .byte  196,67,5,24,237,1                   // vinsertf128   $0x1,%xmm13,%ymm15,%ymm13
+  .byte  196,65,44,84,212                    // vandps        %ymm12,%ymm10,%ymm10
+  .byte  196,193,1,114,210,1                 // vpsrld        $0x1,%xmm10,%xmm15
+  .byte  196,67,125,25,210,1                 // vextractf128  $0x1,%ymm10,%xmm10
+  .byte  196,193,41,114,210,1                // vpsrld        $0x1,%xmm10,%xmm10
+  .byte  196,67,5,24,210,1                   // vinsertf128   $0x1,%xmm10,%ymm15,%ymm10
+  .byte  196,65,60,84,196                    // vandps        %ymm12,%ymm8,%ymm8
+  .byte  196,193,25,114,208,2                // vpsrld        $0x2,%xmm8,%xmm12
+  .byte  196,67,125,25,192,1                 // vextractf128  $0x1,%ymm8,%xmm8
+  .byte  196,193,57,114,208,2                // vpsrld        $0x2,%xmm8,%xmm8
+  .byte  196,67,29,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm12,%ymm8
+  .byte  196,65,20,86,219                    // vorps         %ymm11,%ymm13,%ymm11
+  .byte  196,65,36,86,192                    // vorps         %ymm8,%ymm11,%ymm8
+  .byte  196,65,52,86,206                    // vorps         %ymm14,%ymm9,%ymm9
+  .byte  196,65,60,86,193                    // vorps         %ymm9,%ymm8,%ymm8
+  .byte  196,65,60,86,194                    // vorps         %ymm10,%ymm8,%ymm8
+  .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
+  .byte  196,98,125,24,13,146,83,0,0         // vbroadcastss  0x5392(%rip),%ymm9        # 55ac <_sk_callback_avx+0x139>
+  .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
+  .byte  196,98,125,24,13,136,83,0,0         // vbroadcastss  0x5388(%rip),%ymm9        # 55b0 <_sk_callback_avx+0x13d>
+  .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
+  .byte  196,98,125,24,72,8                  // vbroadcastss  0x8(%rax),%ymm9
+  .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
+  .byte  197,60,89,195                       // vmulps        %ymm3,%ymm8,%ymm8
+  .byte  197,188,88,192                      // vaddps        %ymm0,%ymm8,%ymm0
+  .byte  197,188,88,201                      // vaddps        %ymm1,%ymm8,%ymm1
+  .byte  197,188,88,210                      // vaddps        %ymm2,%ymm8,%ymm2
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  255,224                             // jmpq          *%rax
+
 HIDDEN _sk_constant_color_avx
 .globl _sk_constant_color_avx
 FUNCTION(_sk_constant_color_avx)
@@ -12028,7 +12249,7 @@ HIDDEN _sk_srcatop_avx
 FUNCTION(_sk_srcatop_avx)
 _sk_srcatop_avx:
   .byte  197,252,89,199                      // vmulps        %ymm7,%ymm0,%ymm0
-  .byte  196,98,125,24,5,23,83,0,0           // vbroadcastss  0x5317(%rip),%ymm8        # 5454 <_sk_callback_avx+0x12d>
+  .byte  196,98,125,24,5,42,83,0,0           // vbroadcastss  0x532a(%rip),%ymm8        # 55b4 <_sk_callback_avx+0x141>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,204                       // vmulps        %ymm4,%ymm8,%ymm9
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -12049,7 +12270,7 @@ HIDDEN _sk_dstatop_avx
 FUNCTION(_sk_dstatop_avx)
 _sk_dstatop_avx:
   .byte  197,100,89,196                      // vmulps        %ymm4,%ymm3,%ymm8
-  .byte  196,98,125,24,13,217,82,0,0         // vbroadcastss  0x52d9(%rip),%ymm9        # 5458 <_sk_callback_avx+0x131>
+  .byte  196,98,125,24,13,236,82,0,0         // vbroadcastss  0x52ec(%rip),%ymm9        # 55b8 <_sk_callback_avx+0x145>
   .byte  197,52,92,207                       // vsubps        %ymm7,%ymm9,%ymm9
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
   .byte  197,188,88,192                      // vaddps        %ymm0,%ymm8,%ymm0
@@ -12091,7 +12312,7 @@ HIDDEN _sk_srcout_avx
 .globl _sk_srcout_avx
 FUNCTION(_sk_srcout_avx)
 _sk_srcout_avx:
-  .byte  196,98,125,24,5,120,82,0,0          // vbroadcastss  0x5278(%rip),%ymm8        # 545c <_sk_callback_avx+0x135>
+  .byte  196,98,125,24,5,139,82,0,0          // vbroadcastss  0x528b(%rip),%ymm8        # 55bc <_sk_callback_avx+0x149>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -12104,7 +12325,7 @@ HIDDEN _sk_dstout_avx
 .globl _sk_dstout_avx
 FUNCTION(_sk_dstout_avx)
 _sk_dstout_avx:
-  .byte  196,226,125,24,5,91,82,0,0          // vbroadcastss  0x525b(%rip),%ymm0        # 5460 <_sk_callback_avx+0x139>
+  .byte  196,226,125,24,5,110,82,0,0         // vbroadcastss  0x526e(%rip),%ymm0        # 55c0 <_sk_callback_avx+0x14d>
   .byte  197,252,92,219                      // vsubps        %ymm3,%ymm0,%ymm3
   .byte  197,228,89,196                      // vmulps        %ymm4,%ymm3,%ymm0
   .byte  197,228,89,205                      // vmulps        %ymm5,%ymm3,%ymm1
@@ -12117,7 +12338,7 @@ HIDDEN _sk_srcover_avx
 .globl _sk_srcover_avx
 FUNCTION(_sk_srcover_avx)
 _sk_srcover_avx:
-  .byte  196,98,125,24,5,62,82,0,0           // vbroadcastss  0x523e(%rip),%ymm8        # 5464 <_sk_callback_avx+0x13d>
+  .byte  196,98,125,24,5,81,82,0,0           // vbroadcastss  0x5251(%rip),%ymm8        # 55c4 <_sk_callback_avx+0x151>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,204                       // vmulps        %ymm4,%ymm8,%ymm9
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -12134,7 +12355,7 @@ HIDDEN _sk_dstover_avx
 .globl _sk_dstover_avx
 FUNCTION(_sk_dstover_avx)
 _sk_dstover_avx:
-  .byte  196,98,125,24,5,17,82,0,0           // vbroadcastss  0x5211(%rip),%ymm8        # 5468 <_sk_callback_avx+0x141>
+  .byte  196,98,125,24,5,36,82,0,0           // vbroadcastss  0x5224(%rip),%ymm8        # 55c8 <_sk_callback_avx+0x155>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,252,88,196                      // vaddps        %ymm4,%ymm0,%ymm0
@@ -12162,7 +12383,7 @@ HIDDEN _sk_multiply_avx
 .globl _sk_multiply_avx
 FUNCTION(_sk_multiply_avx)
 _sk_multiply_avx:
-  .byte  196,98,125,24,5,208,81,0,0          // vbroadcastss  0x51d0(%rip),%ymm8        # 546c <_sk_callback_avx+0x145>
+  .byte  196,98,125,24,5,227,81,0,0          // vbroadcastss  0x51e3(%rip),%ymm8        # 55cc <_sk_callback_avx+0x159>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,208                       // vmulps        %ymm0,%ymm9,%ymm10
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -12222,7 +12443,7 @@ HIDDEN _sk_xor__avx
 .globl _sk_xor__avx
 FUNCTION(_sk_xor__avx)
 _sk_xor__avx:
-  .byte  196,98,125,24,5,31,81,0,0           // vbroadcastss  0x511f(%rip),%ymm8        # 5470 <_sk_callback_avx+0x149>
+  .byte  196,98,125,24,5,50,81,0,0           // vbroadcastss  0x5132(%rip),%ymm8        # 55d0 <_sk_callback_avx+0x15d>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -12259,7 +12480,7 @@ _sk_darken_avx:
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,95,209                  // vmaxps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,159,80,0,0          // vbroadcastss  0x509f(%rip),%ymm8        # 5474 <_sk_callback_avx+0x14d>
+  .byte  196,98,125,24,5,178,80,0,0          // vbroadcastss  0x50b2(%rip),%ymm8        # 55d4 <_sk_callback_avx+0x161>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -12285,7 +12506,7 @@ _sk_lighten_avx:
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,75,80,0,0           // vbroadcastss  0x504b(%rip),%ymm8        # 5478 <_sk_callback_avx+0x151>
+  .byte  196,98,125,24,5,94,80,0,0           // vbroadcastss  0x505e(%rip),%ymm8        # 55d8 <_sk_callback_avx+0x165>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -12314,7 +12535,7 @@ _sk_difference_avx:
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,235,79,0,0          // vbroadcastss  0x4feb(%rip),%ymm8        # 547c <_sk_callback_avx+0x155>
+  .byte  196,98,125,24,5,254,79,0,0          // vbroadcastss  0x4ffe(%rip),%ymm8        # 55dc <_sk_callback_avx+0x169>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -12337,7 +12558,7 @@ _sk_exclusion_avx:
   .byte  197,236,89,214                      // vmulps        %ymm6,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,166,79,0,0          // vbroadcastss  0x4fa6(%rip),%ymm8        # 5480 <_sk_callback_avx+0x159>
+  .byte  196,98,125,24,5,185,79,0,0          // vbroadcastss  0x4fb9(%rip),%ymm8        # 55e0 <_sk_callback_avx+0x16d>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -12348,7 +12569,7 @@ HIDDEN _sk_colorburn_avx
 .globl _sk_colorburn_avx
 FUNCTION(_sk_colorburn_avx)
 _sk_colorburn_avx:
-  .byte  196,98,125,24,5,145,79,0,0          // vbroadcastss  0x4f91(%rip),%ymm8        # 5484 <_sk_callback_avx+0x15d>
+  .byte  196,98,125,24,5,164,79,0,0          // vbroadcastss  0x4fa4(%rip),%ymm8        # 55e4 <_sk_callback_avx+0x171>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,216                       // vmulps        %ymm0,%ymm9,%ymm11
   .byte  196,65,44,87,210                    // vxorps        %ymm10,%ymm10,%ymm10
@@ -12410,7 +12631,7 @@ HIDDEN _sk_colordodge_avx
 FUNCTION(_sk_colordodge_avx)
 _sk_colordodge_avx:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
-  .byte  196,98,125,24,13,141,78,0,0         // vbroadcastss  0x4e8d(%rip),%ymm9        # 5488 <_sk_callback_avx+0x161>
+  .byte  196,98,125,24,13,160,78,0,0         // vbroadcastss  0x4ea0(%rip),%ymm9        # 55e8 <_sk_callback_avx+0x175>
   .byte  197,52,92,215                       // vsubps        %ymm7,%ymm9,%ymm10
   .byte  197,44,89,216                       // vmulps        %ymm0,%ymm10,%ymm11
   .byte  197,52,92,203                       // vsubps        %ymm3,%ymm9,%ymm9
@@ -12467,7 +12688,7 @@ HIDDEN _sk_hardlight_avx
 .globl _sk_hardlight_avx
 FUNCTION(_sk_hardlight_avx)
 _sk_hardlight_avx:
-  .byte  196,98,125,24,5,159,77,0,0          // vbroadcastss  0x4d9f(%rip),%ymm8        # 548c <_sk_callback_avx+0x165>
+  .byte  196,98,125,24,5,178,77,0,0          // vbroadcastss  0x4db2(%rip),%ymm8        # 55ec <_sk_callback_avx+0x179>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,200                       // vmulps        %ymm0,%ymm10,%ymm9
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -12522,7 +12743,7 @@ HIDDEN _sk_overlay_avx
 .globl _sk_overlay_avx
 FUNCTION(_sk_overlay_avx)
 _sk_overlay_avx:
-  .byte  196,98,125,24,5,200,76,0,0          // vbroadcastss  0x4cc8(%rip),%ymm8        # 5490 <_sk_callback_avx+0x169>
+  .byte  196,98,125,24,5,219,76,0,0          // vbroadcastss  0x4cdb(%rip),%ymm8        # 55f0 <_sk_callback_avx+0x17d>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,200                       // vmulps        %ymm0,%ymm10,%ymm9
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -12588,10 +12809,10 @@ _sk_softlight_avx:
   .byte  196,65,60,88,192                    // vaddps        %ymm8,%ymm8,%ymm8
   .byte  196,65,60,89,216                    // vmulps        %ymm8,%ymm8,%ymm11
   .byte  196,65,60,88,195                    // vaddps        %ymm11,%ymm8,%ymm8
-  .byte  196,98,125,24,29,191,75,0,0         // vbroadcastss  0x4bbf(%rip),%ymm11        # 5498 <_sk_callback_avx+0x171>
+  .byte  196,98,125,24,29,210,75,0,0         // vbroadcastss  0x4bd2(%rip),%ymm11        # 55f8 <_sk_callback_avx+0x185>
   .byte  196,65,28,88,235                    // vaddps        %ymm11,%ymm12,%ymm13
   .byte  196,65,20,89,192                    // vmulps        %ymm8,%ymm13,%ymm8
-  .byte  196,98,125,24,45,176,75,0,0         // vbroadcastss  0x4bb0(%rip),%ymm13        # 549c <_sk_callback_avx+0x175>
+  .byte  196,98,125,24,45,195,75,0,0         // vbroadcastss  0x4bc3(%rip),%ymm13        # 55fc <_sk_callback_avx+0x189>
   .byte  196,65,28,89,245                    // vmulps        %ymm13,%ymm12,%ymm14
   .byte  196,65,12,88,192                    // vaddps        %ymm8,%ymm14,%ymm8
   .byte  196,65,124,82,244                   // vrsqrtps      %ymm12,%ymm14
@@ -12602,7 +12823,7 @@ _sk_softlight_avx:
   .byte  197,4,194,255,2                     // vcmpleps      %ymm7,%ymm15,%ymm15
   .byte  196,67,13,74,240,240                // vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   .byte  197,116,88,249                      // vaddps        %ymm1,%ymm1,%ymm15
-  .byte  196,98,125,24,5,110,75,0,0          // vbroadcastss  0x4b6e(%rip),%ymm8        # 5494 <_sk_callback_avx+0x16d>
+  .byte  196,98,125,24,5,129,75,0,0          // vbroadcastss  0x4b81(%rip),%ymm8        # 55f4 <_sk_callback_avx+0x181>
   .byte  196,65,60,92,228                    // vsubps        %ymm12,%ymm8,%ymm12
   .byte  197,132,92,195                      // vsubps        %ymm3,%ymm15,%ymm0
   .byte  196,65,124,89,228                   // vmulps        %ymm12,%ymm0,%ymm12
@@ -12709,7 +12930,7 @@ HIDDEN _sk_clamp_1_avx
 .globl _sk_clamp_1_avx
 FUNCTION(_sk_clamp_1_avx)
 _sk_clamp_1_avx:
-  .byte  196,98,125,24,5,190,73,0,0          // vbroadcastss  0x49be(%rip),%ymm8        # 54a0 <_sk_callback_avx+0x179>
+  .byte  196,98,125,24,5,209,73,0,0          // vbroadcastss  0x49d1(%rip),%ymm8        # 5600 <_sk_callback_avx+0x18d>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
@@ -12721,7 +12942,7 @@ HIDDEN _sk_clamp_a_avx
 .globl _sk_clamp_a_avx
 FUNCTION(_sk_clamp_a_avx)
 _sk_clamp_a_avx:
-  .byte  196,98,125,24,5,161,73,0,0          // vbroadcastss  0x49a1(%rip),%ymm8        # 54a4 <_sk_callback_avx+0x17d>
+  .byte  196,98,125,24,5,180,73,0,0          // vbroadcastss  0x49b4(%rip),%ymm8        # 5604 <_sk_callback_avx+0x191>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  197,252,93,195                      // vminps        %ymm3,%ymm0,%ymm0
   .byte  197,244,93,203                      // vminps        %ymm3,%ymm1,%ymm1
@@ -12807,7 +13028,7 @@ FUNCTION(_sk_unpremul_avx)
 _sk_unpremul_avx:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,65,100,194,200,0                // vcmpeqps      %ymm8,%ymm3,%ymm9
-  .byte  196,98,125,24,21,233,72,0,0         // vbroadcastss  0x48e9(%rip),%ymm10        # 54a8 <_sk_callback_avx+0x181>
+  .byte  196,98,125,24,21,252,72,0,0         // vbroadcastss  0x48fc(%rip),%ymm10        # 5608 <_sk_callback_avx+0x195>
   .byte  197,44,94,211                       // vdivps        %ymm3,%ymm10,%ymm10
   .byte  196,67,45,74,192,144                // vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
@@ -12820,17 +13041,17 @@ HIDDEN _sk_from_srgb_avx
 .globl _sk_from_srgb_avx
 FUNCTION(_sk_from_srgb_avx)
 _sk_from_srgb_avx:
-  .byte  196,98,125,24,5,202,72,0,0          // vbroadcastss  0x48ca(%rip),%ymm8        # 54ac <_sk_callback_avx+0x185>
+  .byte  196,98,125,24,5,221,72,0,0          // vbroadcastss  0x48dd(%rip),%ymm8        # 560c <_sk_callback_avx+0x199>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  197,124,89,208                      // vmulps        %ymm0,%ymm0,%ymm10
-  .byte  196,98,125,24,29,188,72,0,0         // vbroadcastss  0x48bc(%rip),%ymm11        # 54b0 <_sk_callback_avx+0x189>
+  .byte  196,98,125,24,29,207,72,0,0         // vbroadcastss  0x48cf(%rip),%ymm11        # 5610 <_sk_callback_avx+0x19d>
   .byte  196,65,124,89,227                   // vmulps        %ymm11,%ymm0,%ymm12
-  .byte  196,98,125,24,45,178,72,0,0         // vbroadcastss  0x48b2(%rip),%ymm13        # 54b4 <_sk_callback_avx+0x18d>
+  .byte  196,98,125,24,45,197,72,0,0         // vbroadcastss  0x48c5(%rip),%ymm13        # 5614 <_sk_callback_avx+0x1a1>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,44,89,212                    // vmulps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,163,72,0,0         // vbroadcastss  0x48a3(%rip),%ymm12        # 54b8 <_sk_callback_avx+0x191>
+  .byte  196,98,125,24,37,182,72,0,0         // vbroadcastss  0x48b6(%rip),%ymm12        # 5618 <_sk_callback_avx+0x1a5>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,53,153,72,0,0         // vbroadcastss  0x4899(%rip),%ymm14        # 54bc <_sk_callback_avx+0x195>
+  .byte  196,98,125,24,53,172,72,0,0         // vbroadcastss  0x48ac(%rip),%ymm14        # 561c <_sk_callback_avx+0x1a9>
   .byte  196,193,124,194,198,1               // vcmpltps      %ymm14,%ymm0,%ymm0
   .byte  196,195,45,74,193,0                 // vblendvps     %ymm0,%ymm9,%ymm10,%ymm0
   .byte  196,65,116,89,200                   // vmulps        %ymm8,%ymm1,%ymm9
@@ -12859,18 +13080,18 @@ _sk_to_srgb_avx:
   .byte  197,124,82,192                      // vrsqrtps      %ymm0,%ymm8
   .byte  196,65,124,83,200                   // vrcpps        %ymm8,%ymm9
   .byte  196,65,124,82,208                   // vrsqrtps      %ymm8,%ymm10
-  .byte  196,98,125,24,5,36,72,0,0           // vbroadcastss  0x4824(%rip),%ymm8        # 54c0 <_sk_callback_avx+0x199>
+  .byte  196,98,125,24,5,55,72,0,0           // vbroadcastss  0x4837(%rip),%ymm8        # 5620 <_sk_callback_avx+0x1ad>
   .byte  196,65,124,89,216                   // vmulps        %ymm8,%ymm0,%ymm11
-  .byte  196,98,125,24,37,26,72,0,0          // vbroadcastss  0x481a(%rip),%ymm12        # 54c4 <_sk_callback_avx+0x19d>
+  .byte  196,98,125,24,37,45,72,0,0          // vbroadcastss  0x482d(%rip),%ymm12        # 5624 <_sk_callback_avx+0x1b1>
   .byte  196,65,52,89,204                    // vmulps        %ymm12,%ymm9,%ymm9
-  .byte  196,98,125,24,45,16,72,0,0          // vbroadcastss  0x4810(%rip),%ymm13        # 54c8 <_sk_callback_avx+0x1a1>
+  .byte  196,98,125,24,45,35,72,0,0          // vbroadcastss  0x4823(%rip),%ymm13        # 5628 <_sk_callback_avx+0x1b5>
   .byte  196,65,52,88,205                    // vaddps        %ymm13,%ymm9,%ymm9
-  .byte  196,98,125,24,53,6,72,0,0           // vbroadcastss  0x4806(%rip),%ymm14        # 54cc <_sk_callback_avx+0x1a5>
+  .byte  196,98,125,24,53,25,72,0,0          // vbroadcastss  0x4819(%rip),%ymm14        # 562c <_sk_callback_avx+0x1b9>
   .byte  196,65,44,89,214                    // vmulps        %ymm14,%ymm10,%ymm10
   .byte  196,65,44,88,201                    // vaddps        %ymm9,%ymm10,%ymm9
-  .byte  196,98,125,24,21,247,71,0,0         // vbroadcastss  0x47f7(%rip),%ymm10        # 54d0 <_sk_callback_avx+0x1a9>
+  .byte  196,98,125,24,21,10,72,0,0          // vbroadcastss  0x480a(%rip),%ymm10        # 5630 <_sk_callback_avx+0x1bd>
   .byte  196,65,44,93,201                    // vminps        %ymm9,%ymm10,%ymm9
-  .byte  196,98,125,24,61,237,71,0,0         // vbroadcastss  0x47ed(%rip),%ymm15        # 54d4 <_sk_callback_avx+0x1ad>
+  .byte  196,98,125,24,61,0,72,0,0           // vbroadcastss  0x4800(%rip),%ymm15        # 5634 <_sk_callback_avx+0x1c1>
   .byte  196,193,124,194,199,1               // vcmpltps      %ymm15,%ymm0,%ymm0
   .byte  196,195,53,74,195,0                 // vblendvps     %ymm0,%ymm11,%ymm9,%ymm0
   .byte  197,124,82,201                      // vrsqrtps      %ymm1,%ymm9
@@ -12907,7 +13128,7 @@ _sk_rgb_to_hsl_avx:
   .byte  197,124,93,201                      // vminps        %ymm1,%ymm0,%ymm9
   .byte  197,52,93,202                       // vminps        %ymm2,%ymm9,%ymm9
   .byte  196,65,60,92,209                    // vsubps        %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,29,83,71,0,0          // vbroadcastss  0x4753(%rip),%ymm11        # 54d8 <_sk_callback_avx+0x1b1>
+  .byte  196,98,125,24,29,102,71,0,0         // vbroadcastss  0x4766(%rip),%ymm11        # 5638 <_sk_callback_avx+0x1c5>
   .byte  196,65,36,94,218                    // vdivps        %ymm10,%ymm11,%ymm11
   .byte  197,116,92,226                      // vsubps        %ymm2,%ymm1,%ymm12
   .byte  196,65,28,89,227                    // vmulps        %ymm11,%ymm12,%ymm12
@@ -12917,19 +13138,19 @@ _sk_rgb_to_hsl_avx:
   .byte  196,193,108,89,211                  // vmulps        %ymm11,%ymm2,%ymm2
   .byte  197,252,92,201                      // vsubps        %ymm1,%ymm0,%ymm1
   .byte  196,193,116,89,203                  // vmulps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,44,71,0,0          // vbroadcastss  0x472c(%rip),%ymm11        # 54e4 <_sk_callback_avx+0x1bd>
+  .byte  196,98,125,24,29,63,71,0,0          // vbroadcastss  0x473f(%rip),%ymm11        # 5644 <_sk_callback_avx+0x1d1>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,26,71,0,0          // vbroadcastss  0x471a(%rip),%ymm11        # 54e0 <_sk_callback_avx+0x1b9>
+  .byte  196,98,125,24,29,45,71,0,0          // vbroadcastss  0x472d(%rip),%ymm11        # 5640 <_sk_callback_avx+0x1cd>
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,227,117,74,202,224              // vblendvps     %ymm14,%ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,2,71,0,0          // vbroadcastss  0x4702(%rip),%ymm2        # 54dc <_sk_callback_avx+0x1b5>
+  .byte  196,226,125,24,21,21,71,0,0         // vbroadcastss  0x4715(%rip),%ymm2        # 563c <_sk_callback_avx+0x1c9>
   .byte  196,65,12,87,246                    // vxorps        %ymm14,%ymm14,%ymm14
   .byte  196,227,13,74,210,208               // vblendvps     %ymm13,%ymm2,%ymm14,%ymm2
   .byte  197,188,194,192,0                   // vcmpeqps      %ymm0,%ymm8,%ymm0
   .byte  196,193,108,88,212                  // vaddps        %ymm12,%ymm2,%ymm2
   .byte  196,227,117,74,194,0                // vblendvps     %ymm0,%ymm2,%ymm1,%ymm0
   .byte  196,193,60,88,201                   // vaddps        %ymm9,%ymm8,%ymm1
-  .byte  196,98,125,24,37,233,70,0,0         // vbroadcastss  0x46e9(%rip),%ymm12        # 54ec <_sk_callback_avx+0x1c5>
+  .byte  196,98,125,24,37,252,70,0,0         // vbroadcastss  0x46fc(%rip),%ymm12        # 564c <_sk_callback_avx+0x1d9>
   .byte  196,193,116,89,212                  // vmulps        %ymm12,%ymm1,%ymm2
   .byte  197,28,194,226,1                    // vcmpltps      %ymm2,%ymm12,%ymm12
   .byte  196,65,36,92,216                    // vsubps        %ymm8,%ymm11,%ymm11
@@ -12939,7 +13160,7 @@ _sk_rgb_to_hsl_avx:
   .byte  197,172,94,201                      // vdivps        %ymm1,%ymm10,%ymm1
   .byte  196,195,125,74,198,128              // vblendvps     %ymm8,%ymm14,%ymm0,%ymm0
   .byte  196,195,117,74,206,128              // vblendvps     %ymm8,%ymm14,%ymm1,%ymm1
-  .byte  196,98,125,24,5,172,70,0,0          // vbroadcastss  0x46ac(%rip),%ymm8        # 54e8 <_sk_callback_avx+0x1c1>
+  .byte  196,98,125,24,5,191,70,0,0          // vbroadcastss  0x46bf(%rip),%ymm8        # 5648 <_sk_callback_avx+0x1d5>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -12956,7 +13177,7 @@ _sk_hsl_to_rgb_avx:
   .byte  197,252,17,92,36,128                // vmovups       %ymm3,-0x80(%rsp)
   .byte  197,252,40,225                      // vmovaps       %ymm1,%ymm4
   .byte  197,252,40,216                      // vmovaps       %ymm0,%ymm3
-  .byte  196,98,125,24,5,121,70,0,0          // vbroadcastss  0x4679(%rip),%ymm8        # 54f0 <_sk_callback_avx+0x1c9>
+  .byte  196,98,125,24,5,140,70,0,0          // vbroadcastss  0x468c(%rip),%ymm8        # 5650 <_sk_callback_avx+0x1dd>
   .byte  197,60,194,202,2                    // vcmpleps      %ymm2,%ymm8,%ymm9
   .byte  197,92,89,210                       // vmulps        %ymm2,%ymm4,%ymm10
   .byte  196,65,92,92,218                    // vsubps        %ymm10,%ymm4,%ymm11
@@ -12964,23 +13185,23 @@ _sk_hsl_to_rgb_avx:
   .byte  197,52,88,210                       // vaddps        %ymm2,%ymm9,%ymm10
   .byte  197,108,88,202                      // vaddps        %ymm2,%ymm2,%ymm9
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,29,83,70,0,0          // vbroadcastss  0x4653(%rip),%ymm11        # 54f4 <_sk_callback_avx+0x1cd>
+  .byte  196,98,125,24,29,102,70,0,0         // vbroadcastss  0x4666(%rip),%ymm11        # 5654 <_sk_callback_avx+0x1e1>
   .byte  196,65,100,88,219                   // vaddps        %ymm11,%ymm3,%ymm11
   .byte  196,67,125,8,227,1                  // vroundps      $0x1,%ymm11,%ymm12
   .byte  196,65,36,92,252                    // vsubps        %ymm12,%ymm11,%ymm15
   .byte  196,65,44,92,217                    // vsubps        %ymm9,%ymm10,%ymm11
-  .byte  196,98,125,24,37,61,70,0,0          // vbroadcastss  0x463d(%rip),%ymm12        # 54fc <_sk_callback_avx+0x1d5>
+  .byte  196,98,125,24,37,80,70,0,0          // vbroadcastss  0x4650(%rip),%ymm12        # 565c <_sk_callback_avx+0x1e9>
   .byte  196,193,4,89,196                    // vmulps        %ymm12,%ymm15,%ymm0
-  .byte  196,98,125,24,45,51,70,0,0          // vbroadcastss  0x4633(%rip),%ymm13        # 5500 <_sk_callback_avx+0x1d9>
+  .byte  196,98,125,24,45,70,70,0,0          // vbroadcastss  0x4646(%rip),%ymm13        # 5660 <_sk_callback_avx+0x1ed>
   .byte  197,20,92,240                       // vsubps        %ymm0,%ymm13,%ymm14
   .byte  196,65,36,89,246                    // vmulps        %ymm14,%ymm11,%ymm14
   .byte  196,65,52,88,246                    // vaddps        %ymm14,%ymm9,%ymm14
-  .byte  196,226,125,24,13,20,70,0,0         // vbroadcastss  0x4614(%rip),%ymm1        # 54f8 <_sk_callback_avx+0x1d1>
+  .byte  196,226,125,24,13,39,70,0,0         // vbroadcastss  0x4627(%rip),%ymm1        # 5658 <_sk_callback_avx+0x1e5>
   .byte  196,193,116,194,255,2               // vcmpleps      %ymm15,%ymm1,%ymm7
   .byte  196,195,13,74,249,112               // vblendvps     %ymm7,%ymm9,%ymm14,%ymm7
   .byte  196,65,60,194,247,2                 // vcmpleps      %ymm15,%ymm8,%ymm14
   .byte  196,227,45,74,255,224               // vblendvps     %ymm14,%ymm7,%ymm10,%ymm7
-  .byte  196,98,125,24,53,255,69,0,0         // vbroadcastss  0x45ff(%rip),%ymm14        # 5504 <_sk_callback_avx+0x1dd>
+  .byte  196,98,125,24,53,18,70,0,0          // vbroadcastss  0x4612(%rip),%ymm14        # 5664 <_sk_callback_avx+0x1f1>
   .byte  196,65,12,194,255,2                 // vcmpleps      %ymm15,%ymm14,%ymm15
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -12999,7 +13220,7 @@ _sk_hsl_to_rgb_avx:
   .byte  197,164,89,247                      // vmulps        %ymm7,%ymm11,%ymm6
   .byte  197,180,88,246                      // vaddps        %ymm6,%ymm9,%ymm6
   .byte  196,227,77,74,237,0                 // vblendvps     %ymm0,%ymm5,%ymm6,%ymm5
-  .byte  196,226,125,24,5,161,69,0,0         // vbroadcastss  0x45a1(%rip),%ymm0        # 5508 <_sk_callback_avx+0x1e1>
+  .byte  196,226,125,24,5,180,69,0,0         // vbroadcastss  0x45b4(%rip),%ymm0        # 5668 <_sk_callback_avx+0x1f5>
   .byte  197,228,88,192                      // vaddps        %ymm0,%ymm3,%ymm0
   .byte  196,227,125,8,216,1                 // vroundps      $0x1,%ymm0,%ymm3
   .byte  197,252,92,195                      // vsubps        %ymm3,%ymm0,%ymm0
@@ -13051,14 +13272,14 @@ _sk_scale_u8_avx:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,68                              // jne           105e <_sk_scale_u8_avx+0x54>
+  .byte  117,68                              // jne           11ab <_sk_scale_u8_avx+0x54>
   .byte  197,122,126,0                       // vmovq         (%rax),%xmm8
   .byte  196,66,121,49,200                   // vpmovzxbd     %xmm8,%xmm9
   .byte  196,67,121,4,192,229                // vpermilps     $0xe5,%xmm8,%xmm8
   .byte  196,66,121,49,192                   // vpmovzxbd     %xmm8,%xmm8
   .byte  196,67,53,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,202,68,0,0         // vbroadcastss  0x44ca(%rip),%ymm9        # 550c <_sk_callback_avx+0x1e5>
+  .byte  196,98,125,24,13,221,68,0,0         // vbroadcastss  0x44dd(%rip),%ymm9        # 566c <_sk_callback_avx+0x1f9>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -13076,9 +13297,9 @@ _sk_scale_u8_avx:
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           1066 <_sk_scale_u8_avx+0x5c>
+  .byte  117,234                             // jne           11b3 <_sk_scale_u8_avx+0x5c>
   .byte  196,65,249,110,193                  // vmovq         %r9,%xmm8
-  .byte  235,155                             // jmp           101e <_sk_scale_u8_avx+0x14>
+  .byte  235,155                             // jmp           116b <_sk_scale_u8_avx+0x14>
 
 HIDDEN _sk_lerp_1_float_avx
 .globl _sk_lerp_1_float_avx
@@ -13110,14 +13331,14 @@ _sk_lerp_u8_avx:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,104                             // jne           113a <_sk_lerp_u8_avx+0x78>
+  .byte  117,104                             // jne           1287 <_sk_lerp_u8_avx+0x78>
   .byte  197,122,126,0                       // vmovq         (%rax),%xmm8
   .byte  196,66,121,49,200                   // vpmovzxbd     %xmm8,%xmm9
   .byte  196,67,121,4,192,229                // vpermilps     $0xe5,%xmm8,%xmm8
   .byte  196,66,121,49,192                   // vpmovzxbd     %xmm8,%xmm8
   .byte  196,67,53,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,22,68,0,0          // vbroadcastss  0x4416(%rip),%ymm9        # 5510 <_sk_callback_avx+0x1e9>
+  .byte  196,98,125,24,13,41,68,0,0          // vbroadcastss  0x4429(%rip),%ymm9        # 5670 <_sk_callback_avx+0x1fd>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
@@ -13143,9 +13364,9 @@ _sk_lerp_u8_avx:
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           1142 <_sk_lerp_u8_avx+0x80>
+  .byte  117,234                             // jne           128f <_sk_lerp_u8_avx+0x80>
   .byte  196,65,249,110,193                  // vmovq         %r9,%xmm8
-  .byte  233,116,255,255,255                 // jmpq          10d6 <_sk_lerp_u8_avx+0x14>
+  .byte  233,116,255,255,255                 // jmpq          1223 <_sk_lerp_u8_avx+0x14>
 
 HIDDEN _sk_lerp_565_avx
 .globl _sk_lerp_565_avx
@@ -13154,26 +13375,26 @@ _sk_lerp_565_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,174,0,0,0                    // jne           121e <_sk_lerp_565_avx+0xbc>
+  .byte  15,133,174,0,0,0                    // jne           136b <_sk_lerp_565_avx+0xbc>
   .byte  196,65,122,111,4,122                // vmovdqu       (%r10,%rdi,2),%xmm8
   .byte  197,225,239,219                     // vpxor         %xmm3,%xmm3,%xmm3
   .byte  197,185,105,219                     // vpunpckhwd    %xmm3,%xmm8,%xmm3
   .byte  196,66,121,51,192                   // vpmovzxwd     %xmm8,%xmm8
   .byte  196,227,61,24,219,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
-  .byte  196,98,125,24,5,130,67,0,0          // vbroadcastss  0x4382(%rip),%ymm8        # 5514 <_sk_callback_avx+0x1ed>
+  .byte  196,98,125,24,5,149,67,0,0          // vbroadcastss  0x4395(%rip),%ymm8        # 5674 <_sk_callback_avx+0x201>
   .byte  196,65,100,84,192                   // vandps        %ymm8,%ymm3,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,115,67,0,0         // vbroadcastss  0x4373(%rip),%ymm9        # 5518 <_sk_callback_avx+0x1f1>
+  .byte  196,98,125,24,13,134,67,0,0         // vbroadcastss  0x4386(%rip),%ymm9        # 5678 <_sk_callback_avx+0x205>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,105,67,0,0         // vbroadcastss  0x4369(%rip),%ymm9        # 551c <_sk_callback_avx+0x1f5>
+  .byte  196,98,125,24,13,124,67,0,0         // vbroadcastss  0x437c(%rip),%ymm9        # 567c <_sk_callback_avx+0x209>
   .byte  196,65,100,84,201                   // vandps        %ymm9,%ymm3,%ymm9
   .byte  196,65,124,91,201                   // vcvtdq2ps     %ymm9,%ymm9
-  .byte  196,98,125,24,21,90,67,0,0          // vbroadcastss  0x435a(%rip),%ymm10        # 5520 <_sk_callback_avx+0x1f9>
+  .byte  196,98,125,24,21,109,67,0,0         // vbroadcastss  0x436d(%rip),%ymm10        # 5680 <_sk_callback_avx+0x20d>
   .byte  196,65,52,89,202                    // vmulps        %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,80,67,0,0          // vbroadcastss  0x4350(%rip),%ymm10        # 5524 <_sk_callback_avx+0x1fd>
+  .byte  196,98,125,24,21,99,67,0,0          // vbroadcastss  0x4363(%rip),%ymm10        # 5684 <_sk_callback_avx+0x211>
   .byte  196,193,100,84,218                  // vandps        %ymm10,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,21,66,67,0,0          // vbroadcastss  0x4342(%rip),%ymm10        # 5528 <_sk_callback_avx+0x201>
+  .byte  196,98,125,24,21,85,67,0,0          // vbroadcastss  0x4355(%rip),%ymm10        # 5688 <_sk_callback_avx+0x215>
   .byte  196,193,100,89,218                  // vmulps        %ymm10,%ymm3,%ymm3
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
@@ -13185,16 +13406,16 @@ _sk_lerp_565_avx:
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  197,236,88,214                      // vaddps        %ymm6,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,16,67,0,0         // vbroadcastss  0x4310(%rip),%ymm3        # 552c <_sk_callback_avx+0x205>
+  .byte  196,226,125,24,29,35,67,0,0         // vbroadcastss  0x4323(%rip),%ymm3        # 568c <_sk_callback_avx+0x219>
   .byte  255,224                             // jmpq          *%rax
   .byte  65,137,200                          // mov           %ecx,%r8d
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  196,65,57,239,192                   // vpxor         %xmm8,%xmm8,%xmm8
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,63,255,255,255               // ja            1176 <_sk_lerp_565_avx+0x14>
+  .byte  15,135,63,255,255,255               // ja            12c3 <_sk_lerp_565_avx+0x14>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,74,0,0,0                  // lea           0x4a(%rip),%r9        # 128c <_sk_lerp_565_avx+0x12a>
+  .byte  76,141,13,73,0,0,0                  // lea           0x49(%rip),%r9        # 13d8 <_sk_lerp_565_avx+0x129>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -13206,27 +13427,27 @@ _sk_lerp_565_avx:
   .byte  196,65,57,196,68,122,4,2            // vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
   .byte  196,65,57,196,68,122,2,1            // vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
   .byte  196,65,57,196,4,122,0               // vpinsrw       $0x0,(%r10,%rdi,2),%xmm8,%xmm8
-  .byte  233,235,254,255,255                 // jmpq          1176 <_sk_lerp_565_avx+0x14>
-  .byte  144                                 // nop
-  .byte  243,255                             // repz          (bad)
+  .byte  233,235,254,255,255                 // jmpq          12c3 <_sk_lerp_565_avx+0x14>
+  .byte  244                                 // hlt
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  235,255                             // jmp           1291 <_sk_lerp_565_avx+0x12f>
   .byte  255                                 // (bad)
-  .byte  255,227                             // jmpq          *%rbx
+  .byte  236                                 // in            (%dx),%al
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
+  .byte  255,228                             // jmpq          *%rsp
   .byte  255                                 // (bad)
-  .byte  219,255                             // (bad)
   .byte  255                                 // (bad)
-  .byte  255,211                             // callq         *%rbx
   .byte  255                                 // (bad)
+  .byte  220,255                             // fdivr         %st,%st(7)
   .byte  255                                 // (bad)
-  .byte  255,203                             // dec           %ebx
+  .byte  255,212                             // callq         *%rsp
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
+  .byte  255,204                             // dec           %esp
   .byte  255                                 // (bad)
-  .byte  191                                 // .byte         0xbf
+  .byte  255                                 // (bad)
+  .byte  255,192                             // inc           %eax
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // .byte         0xff
@@ -13238,7 +13459,7 @@ _sk_load_tables_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,26,2,0,0                     // jne           14d0 <_sk_load_tables_avx+0x228>
+  .byte  15,133,26,2,0,0                     // jne           161c <_sk_load_tables_avx+0x228>
   .byte  196,65,124,16,4,184                 // vmovups       (%r8,%rdi,4),%ymm8
   .byte  85                                  // push          %rbp
   .byte  65,87                               // push          %r15
@@ -13246,7 +13467,7 @@ _sk_load_tables_avx:
   .byte  65,85                               // push          %r13
   .byte  65,84                               // push          %r12
   .byte  83                                  // push          %rbx
-  .byte  197,124,40,13,50,69,0,0             // vmovaps       0x4532(%rip),%ymm9        # 5800 <_sk_callback_avx+0x4d9>
+  .byte  197,124,40,13,70,69,0,0             // vmovaps       0x4546(%rip),%ymm9        # 5960 <_sk_callback_avx+0x4ed>
   .byte  196,193,60,84,193                   // vandps        %ymm9,%ymm8,%ymm0
   .byte  196,193,249,126,193                 // vmovq         %xmm0,%r9
   .byte  69,137,203                          // mov           %r9d,%r11d
@@ -13338,7 +13559,7 @@ _sk_load_tables_avx:
   .byte  196,193,97,114,210,24               // vpsrld        $0x18,%xmm10,%xmm3
   .byte  196,227,61,24,219,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,115,64,0,0          // vbroadcastss  0x4073(%rip),%ymm8        # 5530 <_sk_callback_avx+0x209>
+  .byte  196,98,125,24,5,135,64,0,0          // vbroadcastss  0x4087(%rip),%ymm8        # 5690 <_sk_callback_avx+0x21d>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -13353,9 +13574,9 @@ _sk_load_tables_avx:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  65,254,201                          // dec           %r9b
   .byte  65,128,249,6                        // cmp           $0x6,%r9b
-  .byte  15,135,211,253,255,255              // ja            12bc <_sk_load_tables_avx+0x14>
+  .byte  15,135,211,253,255,255              // ja            1408 <_sk_load_tables_avx+0x14>
   .byte  69,15,182,201                       // movzbl        %r9b,%r9d
-  .byte  76,141,21,140,0,0,0                 // lea           0x8c(%rip),%r10        # 1580 <_sk_load_tables_avx+0x2d8>
+  .byte  76,141,21,140,0,0,0                 // lea           0x8c(%rip),%r10        # 16cc <_sk_load_tables_avx+0x2d8>
   .byte  79,99,12,138                        // movslq        (%r10,%r9,4),%r9
   .byte  77,1,209                            // add           %r10,%r9
   .byte  65,255,225                          // jmpq          *%r9
@@ -13378,7 +13599,7 @@ _sk_load_tables_avx:
   .byte  196,99,61,12,192,15                 // vblendps      $0xf,%ymm0,%ymm8,%ymm8
   .byte  196,195,57,34,4,184,0               // vpinsrd       $0x0,(%r8,%rdi,4),%xmm8,%xmm0
   .byte  196,99,61,12,192,15                 // vblendps      $0xf,%ymm0,%ymm8,%ymm8
-  .byte  233,62,253,255,255                  // jmpq          12bc <_sk_load_tables_avx+0x14>
+  .byte  233,62,253,255,255                  // jmpq          1408 <_sk_load_tables_avx+0x14>
   .byte  102,144                             // xchg          %ax,%ax
   .byte  236                                 // in            (%dx),%al
   .byte  255                                 // (bad)
@@ -13396,7 +13617,7 @@ _sk_load_tables_avx:
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  126,255                             // jle           1599 <_sk_load_tables_avx+0x2f1>
+  .byte  126,255                             // jle           16e5 <_sk_load_tables_avx+0x2f1>
   .byte  255                                 // (bad)
   .byte  255                                 // .byte         0xff
 
@@ -13408,7 +13629,7 @@ _sk_load_tables_u16_be_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,113,2,0,0                    // jne           1823 <_sk_load_tables_u16_be_avx+0x287>
+  .byte  15,133,113,2,0,0                    // jne           196f <_sk_load_tables_u16_be_avx+0x287>
   .byte  196,1,121,16,4,72                   // vmovupd       (%r8,%r9,2),%xmm8
   .byte  196,129,121,16,84,72,16             // vmovupd       0x10(%r8,%r9,2),%xmm2
   .byte  196,129,121,16,92,72,32             // vmovupd       0x20(%r8,%r9,2),%xmm3
@@ -13430,7 +13651,7 @@ _sk_load_tables_u16_be_avx:
   .byte  197,177,108,208                     // vpunpcklqdq   %xmm0,%xmm9,%xmm2
   .byte  197,177,109,200                     // vpunpckhqdq   %xmm0,%xmm9,%xmm1
   .byte  196,65,57,108,212                   // vpunpcklqdq   %xmm12,%xmm8,%xmm10
-  .byte  197,121,111,29,114,66,0,0           // vmovdqa       0x4272(%rip),%xmm11        # 5880 <_sk_callback_avx+0x559>
+  .byte  197,121,111,29,134,66,0,0           // vmovdqa       0x4286(%rip),%xmm11        # 59e0 <_sk_callback_avx+0x56d>
   .byte  196,193,105,219,195                 // vpand         %xmm11,%xmm2,%xmm0
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  196,193,121,105,209                 // vpunpckhwd    %xmm9,%xmm0,%xmm2
@@ -13529,7 +13750,7 @@ _sk_load_tables_u16_be_avx:
   .byte  196,226,121,51,219                  // vpmovzxwd     %xmm3,%xmm3
   .byte  196,195,101,24,216,1                // vinsertf128   $0x1,%xmm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,36,61,0,0           // vbroadcastss  0x3d24(%rip),%ymm8        # 5534 <_sk_callback_avx+0x20d>
+  .byte  196,98,125,24,5,56,61,0,0           // vbroadcastss  0x3d38(%rip),%ymm8        # 5694 <_sk_callback_avx+0x221>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -13542,29 +13763,29 @@ _sk_load_tables_u16_be_avx:
   .byte  196,1,123,16,4,72                   // vmovsd        (%r8,%r9,2),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,85                              // je            1889 <_sk_load_tables_u16_be_avx+0x2ed>
+  .byte  116,85                              // je            19d5 <_sk_load_tables_u16_be_avx+0x2ed>
   .byte  196,1,57,22,68,72,8                 // vmovhpd       0x8(%r8,%r9,2),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,72                              // jb            1889 <_sk_load_tables_u16_be_avx+0x2ed>
+  .byte  114,72                              // jb            19d5 <_sk_load_tables_u16_be_avx+0x2ed>
   .byte  196,129,123,16,84,72,16             // vmovsd        0x10(%r8,%r9,2),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,72                              // je            1896 <_sk_load_tables_u16_be_avx+0x2fa>
+  .byte  116,72                              // je            19e2 <_sk_load_tables_u16_be_avx+0x2fa>
   .byte  196,129,105,22,84,72,24             // vmovhpd       0x18(%r8,%r9,2),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,59                              // jb            1896 <_sk_load_tables_u16_be_avx+0x2fa>
+  .byte  114,59                              // jb            19e2 <_sk_load_tables_u16_be_avx+0x2fa>
   .byte  196,129,123,16,92,72,32             // vmovsd        0x20(%r8,%r9,2),%xmm3
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,97,253,255,255               // je            15cd <_sk_load_tables_u16_be_avx+0x31>
+  .byte  15,132,97,253,255,255               // je            1719 <_sk_load_tables_u16_be_avx+0x31>
   .byte  196,129,97,22,92,72,40              // vmovhpd       0x28(%r8,%r9,2),%xmm3,%xmm3
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,80,253,255,255               // jb            15cd <_sk_load_tables_u16_be_avx+0x31>
+  .byte  15,130,80,253,255,255               // jb            1719 <_sk_load_tables_u16_be_avx+0x31>
   .byte  196,1,122,126,76,72,48              // vmovq         0x30(%r8,%r9,2),%xmm9
-  .byte  233,68,253,255,255                  // jmpq          15cd <_sk_load_tables_u16_be_avx+0x31>
+  .byte  233,68,253,255,255                  // jmpq          1719 <_sk_load_tables_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,55,253,255,255                  // jmpq          15cd <_sk_load_tables_u16_be_avx+0x31>
+  .byte  233,55,253,255,255                  // jmpq          1719 <_sk_load_tables_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
-  .byte  233,46,253,255,255                  // jmpq          15cd <_sk_load_tables_u16_be_avx+0x31>
+  .byte  233,46,253,255,255                  // jmpq          1719 <_sk_load_tables_u16_be_avx+0x31>
 
 HIDDEN _sk_load_tables_rgb_u16_be_avx
 .globl _sk_load_tables_rgb_u16_be_avx
@@ -13574,7 +13795,7 @@ _sk_load_tables_rgb_u16_be_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,127                       // lea           (%rdi,%rdi,2),%r9
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,93,2,0,0                     // jne           1b0e <_sk_load_tables_rgb_u16_be_avx+0x26f>
+  .byte  15,133,93,2,0,0                     // jne           1c5a <_sk_load_tables_rgb_u16_be_avx+0x26f>
   .byte  196,129,122,111,4,72                // vmovdqu       (%r8,%r9,2),%xmm0
   .byte  196,129,122,111,84,72,12            // vmovdqu       0xc(%r8,%r9,2),%xmm2
   .byte  196,129,122,111,76,72,24            // vmovdqu       0x18(%r8,%r9,2),%xmm1
@@ -13601,7 +13822,7 @@ _sk_load_tables_rgb_u16_be_avx:
   .byte  197,185,108,202                     // vpunpcklqdq   %xmm2,%xmm8,%xmm1
   .byte  197,185,109,210                     // vpunpckhqdq   %xmm2,%xmm8,%xmm2
   .byte  197,121,108,195                     // vpunpcklqdq   %xmm3,%xmm0,%xmm8
-  .byte  197,121,111,13,107,63,0,0           // vmovdqa       0x3f6b(%rip),%xmm9        # 5890 <_sk_callback_avx+0x569>
+  .byte  197,121,111,13,127,63,0,0           // vmovdqa       0x3f7f(%rip),%xmm9        # 59f0 <_sk_callback_avx+0x57d>
   .byte  196,193,113,219,193                 // vpand         %xmm9,%xmm1,%xmm0
   .byte  196,65,41,239,210                   // vpxor         %xmm10,%xmm10,%xmm10
   .byte  196,193,121,105,202                 // vpunpckhwd    %xmm10,%xmm0,%xmm1
@@ -13693,7 +13914,7 @@ _sk_load_tables_rgb_u16_be_avx:
   .byte  196,227,105,33,211,48               // vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   .byte  196,195,109,24,208,1                // vinsertf128   $0x1,%xmm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,54,58,0,0         // vbroadcastss  0x3a36(%rip),%ymm3        # 5538 <_sk_callback_avx+0x211>
+  .byte  196,226,125,24,29,74,58,0,0         // vbroadcastss  0x3a4a(%rip),%ymm3        # 5698 <_sk_callback_avx+0x225>
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,93                               // pop           %r13
@@ -13704,36 +13925,36 @@ _sk_load_tables_rgb_u16_be_avx:
   .byte  196,129,121,110,4,72                // vmovd         (%r8,%r9,2),%xmm0
   .byte  196,129,121,196,68,72,4,2           // vpinsrw       $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  117,5                               // jne           1b27 <_sk_load_tables_rgb_u16_be_avx+0x288>
-  .byte  233,190,253,255,255                 // jmpq          18e5 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  117,5                               // jne           1c73 <_sk_load_tables_rgb_u16_be_avx+0x288>
+  .byte  233,190,253,255,255                 // jmpq          1a31 <_sk_load_tables_rgb_u16_be_avx+0x46>
   .byte  196,129,121,110,76,72,6             // vmovd         0x6(%r8,%r9,2),%xmm1
   .byte  196,1,113,196,68,72,10,2            // vpinsrw       $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,26                              // jb            1b56 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
+  .byte  114,26                              // jb            1ca2 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
   .byte  196,129,121,110,76,72,12            // vmovd         0xc(%r8,%r9,2),%xmm1
   .byte  196,129,113,196,84,72,16,2          // vpinsrw       $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  117,10                              // jne           1b5b <_sk_load_tables_rgb_u16_be_avx+0x2bc>
-  .byte  233,143,253,255,255                 // jmpq          18e5 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  .byte  233,138,253,255,255                 // jmpq          18e5 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           1ca7 <_sk_load_tables_rgb_u16_be_avx+0x2bc>
+  .byte  233,143,253,255,255                 // jmpq          1a31 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,138,253,255,255                 // jmpq          1a31 <_sk_load_tables_rgb_u16_be_avx+0x46>
   .byte  196,129,121,110,76,72,18            // vmovd         0x12(%r8,%r9,2),%xmm1
   .byte  196,1,113,196,76,72,22,2            // vpinsrw       $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,26                              // jb            1b8a <_sk_load_tables_rgb_u16_be_avx+0x2eb>
+  .byte  114,26                              // jb            1cd6 <_sk_load_tables_rgb_u16_be_avx+0x2eb>
   .byte  196,129,121,110,76,72,24            // vmovd         0x18(%r8,%r9,2),%xmm1
   .byte  196,129,113,196,76,72,28,2          // vpinsrw       $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  117,10                              // jne           1b8f <_sk_load_tables_rgb_u16_be_avx+0x2f0>
-  .byte  233,91,253,255,255                  // jmpq          18e5 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  .byte  233,86,253,255,255                  // jmpq          18e5 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           1cdb <_sk_load_tables_rgb_u16_be_avx+0x2f0>
+  .byte  233,91,253,255,255                  // jmpq          1a31 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,86,253,255,255                  // jmpq          1a31 <_sk_load_tables_rgb_u16_be_avx+0x46>
   .byte  196,129,121,110,92,72,30            // vmovd         0x1e(%r8,%r9,2),%xmm3
   .byte  196,1,97,196,92,72,34,2             // vpinsrw       $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,20                              // jb            1bb8 <_sk_load_tables_rgb_u16_be_avx+0x319>
+  .byte  114,20                              // jb            1d04 <_sk_load_tables_rgb_u16_be_avx+0x319>
   .byte  196,129,121,110,92,72,36            // vmovd         0x24(%r8,%r9,2),%xmm3
   .byte  196,129,97,196,92,72,40,2           // vpinsrw       $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
-  .byte  233,45,253,255,255                  // jmpq          18e5 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  .byte  233,40,253,255,255                  // jmpq          18e5 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,45,253,255,255                  // jmpq          1a31 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,40,253,255,255                  // jmpq          1a31 <_sk_load_tables_rgb_u16_be_avx+0x46>
 
 HIDDEN _sk_byte_tables_avx
 .globl _sk_byte_tables_avx
@@ -13746,7 +13967,7 @@ _sk_byte_tables_avx:
   .byte  65,84                               // push          %r12
   .byte  83                                  // push          %rbx
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,106,57,0,0          // vbroadcastss  0x396a(%rip),%ymm8        # 553c <_sk_callback_avx+0x215>
+  .byte  196,98,125,24,5,126,57,0,0          // vbroadcastss  0x397e(%rip),%ymm8        # 569c <_sk_callback_avx+0x229>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,195,249,22,192,1                // vpextrq       $0x1,%xmm0,%r8
@@ -13783,7 +14004,7 @@ _sk_byte_tables_avx:
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,53,24,192,1                 // vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,184,56,0,0         // vbroadcastss  0x38b8(%rip),%ymm9        # 5540 <_sk_callback_avx+0x219>
+  .byte  196,98,125,24,13,204,56,0,0         // vbroadcastss  0x38cc(%rip),%ymm9        # 56a0 <_sk_callback_avx+0x22d>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -13945,7 +14166,7 @@ _sk_byte_tables_rgb_avx:
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,53,24,192,1                 // vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,222,53,0,0         // vbroadcastss  0x35de(%rip),%ymm9        # 5544 <_sk_callback_avx+0x21d>
+  .byte  196,98,125,24,13,242,53,0,0         // vbroadcastss  0x35f2(%rip),%ymm9        # 56a4 <_sk_callback_avx+0x231>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -14242,36 +14463,36 @@ _sk_parametric_r_avx:
   .byte  196,193,124,88,195                  // vaddps        %ymm11,%ymm0,%ymm0
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,216                      // vcvtdq2ps     %ymm0,%ymm11
-  .byte  196,98,125,24,37,60,49,0,0          // vbroadcastss  0x313c(%rip),%ymm12        # 5548 <_sk_callback_avx+0x221>
+  .byte  196,98,125,24,37,80,49,0,0          // vbroadcastss  0x3150(%rip),%ymm12        # 56a8 <_sk_callback_avx+0x235>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,50,49,0,0          // vbroadcastss  0x3132(%rip),%ymm12        # 554c <_sk_callback_avx+0x225>
+  .byte  196,98,125,24,37,70,49,0,0          // vbroadcastss  0x3146(%rip),%ymm12        # 56ac <_sk_callback_avx+0x239>
   .byte  196,193,124,84,196                  // vandps        %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,40,49,0,0          // vbroadcastss  0x3128(%rip),%ymm12        # 5550 <_sk_callback_avx+0x229>
+  .byte  196,98,125,24,37,60,49,0,0          // vbroadcastss  0x313c(%rip),%ymm12        # 56b0 <_sk_callback_avx+0x23d>
   .byte  196,193,124,86,196                  // vorps         %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,30,49,0,0          // vbroadcastss  0x311e(%rip),%ymm12        # 5554 <_sk_callback_avx+0x22d>
+  .byte  196,98,125,24,37,50,49,0,0          // vbroadcastss  0x3132(%rip),%ymm12        # 56b4 <_sk_callback_avx+0x241>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,20,49,0,0          // vbroadcastss  0x3114(%rip),%ymm12        # 5558 <_sk_callback_avx+0x231>
+  .byte  196,98,125,24,37,40,49,0,0          // vbroadcastss  0x3128(%rip),%ymm12        # 56b8 <_sk_callback_avx+0x245>
   .byte  196,65,124,89,228                   // vmulps        %ymm12,%ymm0,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,5,49,0,0           // vbroadcastss  0x3105(%rip),%ymm12        # 555c <_sk_callback_avx+0x235>
+  .byte  196,98,125,24,37,25,49,0,0          // vbroadcastss  0x3119(%rip),%ymm12        # 56bc <_sk_callback_avx+0x249>
   .byte  196,193,124,88,196                  // vaddps        %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,251,48,0,0         // vbroadcastss  0x30fb(%rip),%ymm12        # 5560 <_sk_callback_avx+0x239>
+  .byte  196,98,125,24,37,15,49,0,0          // vbroadcastss  0x310f(%rip),%ymm12        # 56c0 <_sk_callback_avx+0x24d>
   .byte  197,156,94,192                      // vdivps        %ymm0,%ymm12,%ymm0
   .byte  197,164,92,192                      // vsubps        %ymm0,%ymm11,%ymm0
   .byte  197,172,89,192                      // vmulps        %ymm0,%ymm10,%ymm0
   .byte  196,99,125,8,208,1                  // vroundps      $0x1,%ymm0,%ymm10
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  196,98,125,24,29,223,48,0,0         // vbroadcastss  0x30df(%rip),%ymm11        # 5564 <_sk_callback_avx+0x23d>
+  .byte  196,98,125,24,29,243,48,0,0         // vbroadcastss  0x30f3(%rip),%ymm11        # 56c4 <_sk_callback_avx+0x251>
   .byte  196,193,124,88,195                  // vaddps        %ymm11,%ymm0,%ymm0
-  .byte  196,98,125,24,29,213,48,0,0         // vbroadcastss  0x30d5(%rip),%ymm11        # 5568 <_sk_callback_avx+0x241>
+  .byte  196,98,125,24,29,233,48,0,0         // vbroadcastss  0x30e9(%rip),%ymm11        # 56c8 <_sk_callback_avx+0x255>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,124,92,195                  // vsubps        %ymm11,%ymm0,%ymm0
-  .byte  196,98,125,24,29,198,48,0,0         // vbroadcastss  0x30c6(%rip),%ymm11        # 556c <_sk_callback_avx+0x245>
+  .byte  196,98,125,24,29,218,48,0,0         // vbroadcastss  0x30da(%rip),%ymm11        # 56cc <_sk_callback_avx+0x259>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,188,48,0,0         // vbroadcastss  0x30bc(%rip),%ymm11        # 5570 <_sk_callback_avx+0x249>
+  .byte  196,98,125,24,29,208,48,0,0         // vbroadcastss  0x30d0(%rip),%ymm11        # 56d0 <_sk_callback_avx+0x25d>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,124,88,194                  // vaddps        %ymm10,%ymm0,%ymm0
-  .byte  196,98,125,24,21,173,48,0,0         // vbroadcastss  0x30ad(%rip),%ymm10        # 5574 <_sk_callback_avx+0x24d>
+  .byte  196,98,125,24,21,193,48,0,0         // vbroadcastss  0x30c1(%rip),%ymm10        # 56d4 <_sk_callback_avx+0x261>
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -14279,7 +14500,7 @@ _sk_parametric_r_avx:
   .byte  196,195,125,74,193,128              // vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,124,95,192                  // vmaxps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,132,48,0,0          // vbroadcastss  0x3084(%rip),%ymm8        # 5578 <_sk_callback_avx+0x251>
+  .byte  196,98,125,24,5,152,48,0,0          // vbroadcastss  0x3098(%rip),%ymm8        # 56d8 <_sk_callback_avx+0x265>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -14301,36 +14522,36 @@ _sk_parametric_g_avx:
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,217                      // vcvtdq2ps     %ymm1,%ymm11
-  .byte  196,98,125,24,37,53,48,0,0          // vbroadcastss  0x3035(%rip),%ymm12        # 557c <_sk_callback_avx+0x255>
+  .byte  196,98,125,24,37,73,48,0,0          // vbroadcastss  0x3049(%rip),%ymm12        # 56dc <_sk_callback_avx+0x269>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,43,48,0,0          // vbroadcastss  0x302b(%rip),%ymm12        # 5580 <_sk_callback_avx+0x259>
+  .byte  196,98,125,24,37,63,48,0,0          // vbroadcastss  0x303f(%rip),%ymm12        # 56e0 <_sk_callback_avx+0x26d>
   .byte  196,193,116,84,204                  // vandps        %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,33,48,0,0          // vbroadcastss  0x3021(%rip),%ymm12        # 5584 <_sk_callback_avx+0x25d>
+  .byte  196,98,125,24,37,53,48,0,0          // vbroadcastss  0x3035(%rip),%ymm12        # 56e4 <_sk_callback_avx+0x271>
   .byte  196,193,116,86,204                  // vorps         %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,23,48,0,0          // vbroadcastss  0x3017(%rip),%ymm12        # 5588 <_sk_callback_avx+0x261>
+  .byte  196,98,125,24,37,43,48,0,0          // vbroadcastss  0x302b(%rip),%ymm12        # 56e8 <_sk_callback_avx+0x275>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,13,48,0,0          // vbroadcastss  0x300d(%rip),%ymm12        # 558c <_sk_callback_avx+0x265>
+  .byte  196,98,125,24,37,33,48,0,0          // vbroadcastss  0x3021(%rip),%ymm12        # 56ec <_sk_callback_avx+0x279>
   .byte  196,65,116,89,228                   // vmulps        %ymm12,%ymm1,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,254,47,0,0         // vbroadcastss  0x2ffe(%rip),%ymm12        # 5590 <_sk_callback_avx+0x269>
+  .byte  196,98,125,24,37,18,48,0,0          // vbroadcastss  0x3012(%rip),%ymm12        # 56f0 <_sk_callback_avx+0x27d>
   .byte  196,193,116,88,204                  // vaddps        %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,244,47,0,0         // vbroadcastss  0x2ff4(%rip),%ymm12        # 5594 <_sk_callback_avx+0x26d>
+  .byte  196,98,125,24,37,8,48,0,0           // vbroadcastss  0x3008(%rip),%ymm12        # 56f4 <_sk_callback_avx+0x281>
   .byte  197,156,94,201                      // vdivps        %ymm1,%ymm12,%ymm1
   .byte  197,164,92,201                      // vsubps        %ymm1,%ymm11,%ymm1
   .byte  197,172,89,201                      // vmulps        %ymm1,%ymm10,%ymm1
   .byte  196,99,125,8,209,1                  // vroundps      $0x1,%ymm1,%ymm10
   .byte  196,65,116,92,210                   // vsubps        %ymm10,%ymm1,%ymm10
-  .byte  196,98,125,24,29,216,47,0,0         // vbroadcastss  0x2fd8(%rip),%ymm11        # 5598 <_sk_callback_avx+0x271>
+  .byte  196,98,125,24,29,236,47,0,0         // vbroadcastss  0x2fec(%rip),%ymm11        # 56f8 <_sk_callback_avx+0x285>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,206,47,0,0         // vbroadcastss  0x2fce(%rip),%ymm11        # 559c <_sk_callback_avx+0x275>
+  .byte  196,98,125,24,29,226,47,0,0         // vbroadcastss  0x2fe2(%rip),%ymm11        # 56fc <_sk_callback_avx+0x289>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,116,92,203                  // vsubps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,191,47,0,0         // vbroadcastss  0x2fbf(%rip),%ymm11        # 55a0 <_sk_callback_avx+0x279>
+  .byte  196,98,125,24,29,211,47,0,0         // vbroadcastss  0x2fd3(%rip),%ymm11        # 5700 <_sk_callback_avx+0x28d>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,181,47,0,0         // vbroadcastss  0x2fb5(%rip),%ymm11        # 55a4 <_sk_callback_avx+0x27d>
+  .byte  196,98,125,24,29,201,47,0,0         // vbroadcastss  0x2fc9(%rip),%ymm11        # 5704 <_sk_callback_avx+0x291>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,116,88,202                  // vaddps        %ymm10,%ymm1,%ymm1
-  .byte  196,98,125,24,21,166,47,0,0         // vbroadcastss  0x2fa6(%rip),%ymm10        # 55a8 <_sk_callback_avx+0x281>
+  .byte  196,98,125,24,21,186,47,0,0         // vbroadcastss  0x2fba(%rip),%ymm10        # 5708 <_sk_callback_avx+0x295>
   .byte  196,193,116,89,202                  // vmulps        %ymm10,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -14338,7 +14559,7 @@ _sk_parametric_g_avx:
   .byte  196,195,117,74,201,128              // vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,116,95,200                  // vmaxps        %ymm8,%ymm1,%ymm1
-  .byte  196,98,125,24,5,125,47,0,0          // vbroadcastss  0x2f7d(%rip),%ymm8        # 55ac <_sk_callback_avx+0x285>
+  .byte  196,98,125,24,5,145,47,0,0          // vbroadcastss  0x2f91(%rip),%ymm8        # 570c <_sk_callback_avx+0x299>
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -14360,36 +14581,36 @@ _sk_parametric_b_avx:
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,218                      // vcvtdq2ps     %ymm2,%ymm11
-  .byte  196,98,125,24,37,46,47,0,0          // vbroadcastss  0x2f2e(%rip),%ymm12        # 55b0 <_sk_callback_avx+0x289>
+  .byte  196,98,125,24,37,66,47,0,0          // vbroadcastss  0x2f42(%rip),%ymm12        # 5710 <_sk_callback_avx+0x29d>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,36,47,0,0          // vbroadcastss  0x2f24(%rip),%ymm12        # 55b4 <_sk_callback_avx+0x28d>
+  .byte  196,98,125,24,37,56,47,0,0          // vbroadcastss  0x2f38(%rip),%ymm12        # 5714 <_sk_callback_avx+0x2a1>
   .byte  196,193,108,84,212                  // vandps        %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,26,47,0,0          // vbroadcastss  0x2f1a(%rip),%ymm12        # 55b8 <_sk_callback_avx+0x291>
+  .byte  196,98,125,24,37,46,47,0,0          // vbroadcastss  0x2f2e(%rip),%ymm12        # 5718 <_sk_callback_avx+0x2a5>
   .byte  196,193,108,86,212                  // vorps         %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,16,47,0,0          // vbroadcastss  0x2f10(%rip),%ymm12        # 55bc <_sk_callback_avx+0x295>
+  .byte  196,98,125,24,37,36,47,0,0          // vbroadcastss  0x2f24(%rip),%ymm12        # 571c <_sk_callback_avx+0x2a9>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,6,47,0,0           // vbroadcastss  0x2f06(%rip),%ymm12        # 55c0 <_sk_callback_avx+0x299>
+  .byte  196,98,125,24,37,26,47,0,0          // vbroadcastss  0x2f1a(%rip),%ymm12        # 5720 <_sk_callback_avx+0x2ad>
   .byte  196,65,108,89,228                   // vmulps        %ymm12,%ymm2,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,247,46,0,0         // vbroadcastss  0x2ef7(%rip),%ymm12        # 55c4 <_sk_callback_avx+0x29d>
+  .byte  196,98,125,24,37,11,47,0,0          // vbroadcastss  0x2f0b(%rip),%ymm12        # 5724 <_sk_callback_avx+0x2b1>
   .byte  196,193,108,88,212                  // vaddps        %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,237,46,0,0         // vbroadcastss  0x2eed(%rip),%ymm12        # 55c8 <_sk_callback_avx+0x2a1>
+  .byte  196,98,125,24,37,1,47,0,0           // vbroadcastss  0x2f01(%rip),%ymm12        # 5728 <_sk_callback_avx+0x2b5>
   .byte  197,156,94,210                      // vdivps        %ymm2,%ymm12,%ymm2
   .byte  197,164,92,210                      // vsubps        %ymm2,%ymm11,%ymm2
   .byte  197,172,89,210                      // vmulps        %ymm2,%ymm10,%ymm2
   .byte  196,99,125,8,210,1                  // vroundps      $0x1,%ymm2,%ymm10
   .byte  196,65,108,92,210                   // vsubps        %ymm10,%ymm2,%ymm10
-  .byte  196,98,125,24,29,209,46,0,0         // vbroadcastss  0x2ed1(%rip),%ymm11        # 55cc <_sk_callback_avx+0x2a5>
+  .byte  196,98,125,24,29,229,46,0,0         // vbroadcastss  0x2ee5(%rip),%ymm11        # 572c <_sk_callback_avx+0x2b9>
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
-  .byte  196,98,125,24,29,199,46,0,0         // vbroadcastss  0x2ec7(%rip),%ymm11        # 55d0 <_sk_callback_avx+0x2a9>
+  .byte  196,98,125,24,29,219,46,0,0         // vbroadcastss  0x2edb(%rip),%ymm11        # 5730 <_sk_callback_avx+0x2bd>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,108,92,211                  // vsubps        %ymm11,%ymm2,%ymm2
-  .byte  196,98,125,24,29,184,46,0,0         // vbroadcastss  0x2eb8(%rip),%ymm11        # 55d4 <_sk_callback_avx+0x2ad>
+  .byte  196,98,125,24,29,204,46,0,0         // vbroadcastss  0x2ecc(%rip),%ymm11        # 5734 <_sk_callback_avx+0x2c1>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,174,46,0,0         // vbroadcastss  0x2eae(%rip),%ymm11        # 55d8 <_sk_callback_avx+0x2b1>
+  .byte  196,98,125,24,29,194,46,0,0         // vbroadcastss  0x2ec2(%rip),%ymm11        # 5738 <_sk_callback_avx+0x2c5>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,108,88,210                  // vaddps        %ymm10,%ymm2,%ymm2
-  .byte  196,98,125,24,21,159,46,0,0         // vbroadcastss  0x2e9f(%rip),%ymm10        # 55dc <_sk_callback_avx+0x2b5>
+  .byte  196,98,125,24,21,179,46,0,0         // vbroadcastss  0x2eb3(%rip),%ymm10        # 573c <_sk_callback_avx+0x2c9>
   .byte  196,193,108,89,210                  // vmulps        %ymm10,%ymm2,%ymm2
   .byte  197,253,91,210                      // vcvtps2dq     %ymm2,%ymm2
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -14397,7 +14618,7 @@ _sk_parametric_b_avx:
   .byte  196,195,109,74,209,128              // vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,118,46,0,0          // vbroadcastss  0x2e76(%rip),%ymm8        # 55e0 <_sk_callback_avx+0x2b9>
+  .byte  196,98,125,24,5,138,46,0,0          // vbroadcastss  0x2e8a(%rip),%ymm8        # 5740 <_sk_callback_avx+0x2cd>
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -14419,36 +14640,36 @@ _sk_parametric_a_avx:
   .byte  196,193,100,88,219                  // vaddps        %ymm11,%ymm3,%ymm3
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,219                      // vcvtdq2ps     %ymm3,%ymm11
-  .byte  196,98,125,24,37,39,46,0,0          // vbroadcastss  0x2e27(%rip),%ymm12        # 55e4 <_sk_callback_avx+0x2bd>
+  .byte  196,98,125,24,37,59,46,0,0          // vbroadcastss  0x2e3b(%rip),%ymm12        # 5744 <_sk_callback_avx+0x2d1>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,29,46,0,0          // vbroadcastss  0x2e1d(%rip),%ymm12        # 55e8 <_sk_callback_avx+0x2c1>
+  .byte  196,98,125,24,37,49,46,0,0          // vbroadcastss  0x2e31(%rip),%ymm12        # 5748 <_sk_callback_avx+0x2d5>
   .byte  196,193,100,84,220                  // vandps        %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,19,46,0,0          // vbroadcastss  0x2e13(%rip),%ymm12        # 55ec <_sk_callback_avx+0x2c5>
+  .byte  196,98,125,24,37,39,46,0,0          // vbroadcastss  0x2e27(%rip),%ymm12        # 574c <_sk_callback_avx+0x2d9>
   .byte  196,193,100,86,220                  // vorps         %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,9,46,0,0           // vbroadcastss  0x2e09(%rip),%ymm12        # 55f0 <_sk_callback_avx+0x2c9>
+  .byte  196,98,125,24,37,29,46,0,0          // vbroadcastss  0x2e1d(%rip),%ymm12        # 5750 <_sk_callback_avx+0x2dd>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,255,45,0,0         // vbroadcastss  0x2dff(%rip),%ymm12        # 55f4 <_sk_callback_avx+0x2cd>
+  .byte  196,98,125,24,37,19,46,0,0          // vbroadcastss  0x2e13(%rip),%ymm12        # 5754 <_sk_callback_avx+0x2e1>
   .byte  196,65,100,89,228                   // vmulps        %ymm12,%ymm3,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,240,45,0,0         // vbroadcastss  0x2df0(%rip),%ymm12        # 55f8 <_sk_callback_avx+0x2d1>
+  .byte  196,98,125,24,37,4,46,0,0           // vbroadcastss  0x2e04(%rip),%ymm12        # 5758 <_sk_callback_avx+0x2e5>
   .byte  196,193,100,88,220                  // vaddps        %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,230,45,0,0         // vbroadcastss  0x2de6(%rip),%ymm12        # 55fc <_sk_callback_avx+0x2d5>
+  .byte  196,98,125,24,37,250,45,0,0         // vbroadcastss  0x2dfa(%rip),%ymm12        # 575c <_sk_callback_avx+0x2e9>
   .byte  197,156,94,219                      // vdivps        %ymm3,%ymm12,%ymm3
   .byte  197,164,92,219                      // vsubps        %ymm3,%ymm11,%ymm3
   .byte  197,172,89,219                      // vmulps        %ymm3,%ymm10,%ymm3
   .byte  196,99,125,8,211,1                  // vroundps      $0x1,%ymm3,%ymm10
   .byte  196,65,100,92,210                   // vsubps        %ymm10,%ymm3,%ymm10
-  .byte  196,98,125,24,29,202,45,0,0         // vbroadcastss  0x2dca(%rip),%ymm11        # 5600 <_sk_callback_avx+0x2d9>
+  .byte  196,98,125,24,29,222,45,0,0         // vbroadcastss  0x2dde(%rip),%ymm11        # 5760 <_sk_callback_avx+0x2ed>
   .byte  196,193,100,88,219                  // vaddps        %ymm11,%ymm3,%ymm3
-  .byte  196,98,125,24,29,192,45,0,0         // vbroadcastss  0x2dc0(%rip),%ymm11        # 5604 <_sk_callback_avx+0x2dd>
+  .byte  196,98,125,24,29,212,45,0,0         // vbroadcastss  0x2dd4(%rip),%ymm11        # 5764 <_sk_callback_avx+0x2f1>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,100,92,219                  // vsubps        %ymm11,%ymm3,%ymm3
-  .byte  196,98,125,24,29,177,45,0,0         // vbroadcastss  0x2db1(%rip),%ymm11        # 5608 <_sk_callback_avx+0x2e1>
+  .byte  196,98,125,24,29,197,45,0,0         // vbroadcastss  0x2dc5(%rip),%ymm11        # 5768 <_sk_callback_avx+0x2f5>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,167,45,0,0         // vbroadcastss  0x2da7(%rip),%ymm11        # 560c <_sk_callback_avx+0x2e5>
+  .byte  196,98,125,24,29,187,45,0,0         // vbroadcastss  0x2dbb(%rip),%ymm11        # 576c <_sk_callback_avx+0x2f9>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,100,88,218                  // vaddps        %ymm10,%ymm3,%ymm3
-  .byte  196,98,125,24,21,152,45,0,0         // vbroadcastss  0x2d98(%rip),%ymm10        # 5610 <_sk_callback_avx+0x2e9>
+  .byte  196,98,125,24,21,172,45,0,0         // vbroadcastss  0x2dac(%rip),%ymm10        # 5770 <_sk_callback_avx+0x2fd>
   .byte  196,193,100,89,218                  // vmulps        %ymm10,%ymm3,%ymm3
   .byte  197,253,91,219                      // vcvtps2dq     %ymm3,%ymm3
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -14456,7 +14677,7 @@ _sk_parametric_a_avx:
   .byte  196,195,101,74,217,128              // vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,100,95,216                  // vmaxps        %ymm8,%ymm3,%ymm3
-  .byte  196,98,125,24,5,111,45,0,0          // vbroadcastss  0x2d6f(%rip),%ymm8        # 5614 <_sk_callback_avx+0x2ed>
+  .byte  196,98,125,24,5,131,45,0,0          // vbroadcastss  0x2d83(%rip),%ymm8        # 5774 <_sk_callback_avx+0x301>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -14465,31 +14686,31 @@ HIDDEN _sk_lab_to_xyz_avx
 .globl _sk_lab_to_xyz_avx
 FUNCTION(_sk_lab_to_xyz_avx)
 _sk_lab_to_xyz_avx:
-  .byte  196,98,125,24,5,97,45,0,0           // vbroadcastss  0x2d61(%rip),%ymm8        # 5618 <_sk_callback_avx+0x2f1>
+  .byte  196,98,125,24,5,117,45,0,0          // vbroadcastss  0x2d75(%rip),%ymm8        # 5778 <_sk_callback_avx+0x305>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,87,45,0,0           // vbroadcastss  0x2d57(%rip),%ymm8        # 561c <_sk_callback_avx+0x2f5>
+  .byte  196,98,125,24,5,107,45,0,0          // vbroadcastss  0x2d6b(%rip),%ymm8        # 577c <_sk_callback_avx+0x309>
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  196,98,125,24,13,77,45,0,0          // vbroadcastss  0x2d4d(%rip),%ymm9        # 5620 <_sk_callback_avx+0x2f9>
+  .byte  196,98,125,24,13,97,45,0,0          // vbroadcastss  0x2d61(%rip),%ymm9        # 5780 <_sk_callback_avx+0x30d>
   .byte  196,193,116,88,201                  // vaddps        %ymm9,%ymm1,%ymm1
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  196,193,108,88,209                  // vaddps        %ymm9,%ymm2,%ymm2
-  .byte  196,98,125,24,5,57,45,0,0           // vbroadcastss  0x2d39(%rip),%ymm8        # 5624 <_sk_callback_avx+0x2fd>
+  .byte  196,98,125,24,5,77,45,0,0           // vbroadcastss  0x2d4d(%rip),%ymm8        # 5784 <_sk_callback_avx+0x311>
   .byte  196,193,124,88,192                  // vaddps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,47,45,0,0           // vbroadcastss  0x2d2f(%rip),%ymm8        # 5628 <_sk_callback_avx+0x301>
+  .byte  196,98,125,24,5,67,45,0,0           // vbroadcastss  0x2d43(%rip),%ymm8        # 5788 <_sk_callback_avx+0x315>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,37,45,0,0           // vbroadcastss  0x2d25(%rip),%ymm8        # 562c <_sk_callback_avx+0x305>
+  .byte  196,98,125,24,5,57,45,0,0           // vbroadcastss  0x2d39(%rip),%ymm8        # 578c <_sk_callback_avx+0x319>
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  197,252,88,201                      // vaddps        %ymm1,%ymm0,%ymm1
-  .byte  196,98,125,24,5,23,45,0,0           // vbroadcastss  0x2d17(%rip),%ymm8        # 5630 <_sk_callback_avx+0x309>
+  .byte  196,98,125,24,5,43,45,0,0           // vbroadcastss  0x2d2b(%rip),%ymm8        # 5790 <_sk_callback_avx+0x31d>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  197,252,92,210                      // vsubps        %ymm2,%ymm0,%ymm2
   .byte  197,116,89,193                      // vmulps        %ymm1,%ymm1,%ymm8
   .byte  196,65,116,89,192                   // vmulps        %ymm8,%ymm1,%ymm8
-  .byte  196,98,125,24,13,0,45,0,0           // vbroadcastss  0x2d00(%rip),%ymm9        # 5634 <_sk_callback_avx+0x30d>
+  .byte  196,98,125,24,13,20,45,0,0          // vbroadcastss  0x2d14(%rip),%ymm9        # 5794 <_sk_callback_avx+0x321>
   .byte  196,65,52,194,208,1                 // vcmpltps      %ymm8,%ymm9,%ymm10
-  .byte  196,98,125,24,29,245,44,0,0         // vbroadcastss  0x2cf5(%rip),%ymm11        # 5638 <_sk_callback_avx+0x311>
+  .byte  196,98,125,24,29,9,45,0,0           // vbroadcastss  0x2d09(%rip),%ymm11        # 5798 <_sk_callback_avx+0x325>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,37,235,44,0,0         // vbroadcastss  0x2ceb(%rip),%ymm12        # 563c <_sk_callback_avx+0x315>
+  .byte  196,98,125,24,37,255,44,0,0         // vbroadcastss  0x2cff(%rip),%ymm12        # 579c <_sk_callback_avx+0x329>
   .byte  196,193,116,89,204                  // vmulps        %ymm12,%ymm1,%ymm1
   .byte  196,67,117,74,192,160               // vblendvps     %ymm10,%ymm8,%ymm1,%ymm8
   .byte  197,252,89,200                      // vmulps        %ymm0,%ymm0,%ymm1
@@ -14504,9 +14725,9 @@ _sk_lab_to_xyz_avx:
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,193,108,89,212                  // vmulps        %ymm12,%ymm2,%ymm2
   .byte  196,227,109,74,208,144              // vblendvps     %ymm9,%ymm0,%ymm2,%ymm2
-  .byte  196,226,125,24,5,161,44,0,0         // vbroadcastss  0x2ca1(%rip),%ymm0        # 5640 <_sk_callback_avx+0x319>
+  .byte  196,226,125,24,5,181,44,0,0         // vbroadcastss  0x2cb5(%rip),%ymm0        # 57a0 <_sk_callback_avx+0x32d>
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
-  .byte  196,98,125,24,5,152,44,0,0          // vbroadcastss  0x2c98(%rip),%ymm8        # 5644 <_sk_callback_avx+0x31d>
+  .byte  196,98,125,24,5,172,44,0,0          // vbroadcastss  0x2cac(%rip),%ymm8        # 57a4 <_sk_callback_avx+0x331>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -14520,14 +14741,14 @@ _sk_load_a8_avx:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,62                              // jne           2a03 <_sk_load_a8_avx+0x4e>
+  .byte  117,62                              // jne           2b4f <_sk_load_a8_avx+0x4e>
   .byte  197,250,126,0                       // vmovq         (%rax),%xmm0
   .byte  196,226,121,49,200                  // vpmovzxbd     %xmm0,%xmm1
   .byte  196,227,121,4,192,229               // vpermilps     $0xe5,%xmm0,%xmm0
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,92,44,0,0         // vbroadcastss  0x2c5c(%rip),%ymm1        # 5648 <_sk_callback_avx+0x321>
+  .byte  196,226,125,24,13,112,44,0,0        // vbroadcastss  0x2c70(%rip),%ymm1        # 57a8 <_sk_callback_avx+0x335>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -14544,9 +14765,9 @@ _sk_load_a8_avx:
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           2a0b <_sk_load_a8_avx+0x56>
+  .byte  117,234                             // jne           2b57 <_sk_load_a8_avx+0x56>
   .byte  196,193,249,110,193                 // vmovq         %r9,%xmm0
-  .byte  235,161                             // jmp           29c9 <_sk_load_a8_avx+0x14>
+  .byte  235,161                             // jmp           2b15 <_sk_load_a8_avx+0x14>
 
 HIDDEN _sk_gather_a8_avx
 .globl _sk_gather_a8_avx
@@ -14596,7 +14817,7 @@ _sk_gather_a8_avx:
   .byte  196,226,121,49,201                  // vpmovzxbd     %xmm1,%xmm1
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,81,43,0,0         // vbroadcastss  0x2b51(%rip),%ymm1        # 564c <_sk_callback_avx+0x325>
+  .byte  196,226,125,24,13,101,43,0,0        // vbroadcastss  0x2b65(%rip),%ymm1        # 57ac <_sk_callback_avx+0x339>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -14614,14 +14835,14 @@ FUNCTION(_sk_store_a8_avx)
 _sk_store_a8_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,44,43,0,0           // vbroadcastss  0x2b2c(%rip),%ymm8        # 5650 <_sk_callback_avx+0x329>
+  .byte  196,98,125,24,5,64,43,0,0           // vbroadcastss  0x2b40(%rip),%ymm8        # 57b0 <_sk_callback_avx+0x33d>
   .byte  196,65,100,89,192                   // vmulps        %ymm8,%ymm3,%ymm8
   .byte  196,65,125,91,192                   // vcvtps2dq     %ymm8,%ymm8
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  196,65,57,103,192                   // vpackuswb     %xmm8,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           2b4d <_sk_store_a8_avx+0x37>
+  .byte  117,10                              // jne           2c99 <_sk_store_a8_avx+0x37>
   .byte  196,65,123,17,4,58                  // vmovsd        %xmm8,(%r10,%rdi,1)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -14629,10 +14850,10 @@ _sk_store_a8_avx:
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            2b49 <_sk_store_a8_avx+0x33>
+  .byte  119,236                             // ja            2c95 <_sk_store_a8_avx+0x33>
   .byte  196,66,121,48,192                   // vpmovzxbw     %xmm8,%xmm8
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,67,0,0,0                  // lea           0x43(%rip),%r9        # 2bb0 <_sk_store_a8_avx+0x9a>
+  .byte  76,141,13,67,0,0,0                  // lea           0x43(%rip),%r9        # 2cfc <_sk_store_a8_avx+0x9a>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -14643,7 +14864,7 @@ _sk_store_a8_avx:
   .byte  196,67,121,20,68,58,2,4             // vpextrb       $0x4,%xmm8,0x2(%r10,%rdi,1)
   .byte  196,67,121,20,68,58,1,2             // vpextrb       $0x2,%xmm8,0x1(%r10,%rdi,1)
   .byte  196,67,121,20,4,58,0                // vpextrb       $0x0,%xmm8,(%r10,%rdi,1)
-  .byte  235,154                             // jmp           2b49 <_sk_store_a8_avx+0x33>
+  .byte  235,154                             // jmp           2c95 <_sk_store_a8_avx+0x33>
   .byte  144                                 // nop
   .byte  246,255                             // idiv          %bh
   .byte  255                                 // (bad)
@@ -14677,17 +14898,17 @@ _sk_load_g8_avx:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,67                              // jne           2c1f <_sk_load_g8_avx+0x53>
+  .byte  117,67                              // jne           2d6b <_sk_load_g8_avx+0x53>
   .byte  197,250,126,0                       // vmovq         (%rax),%xmm0
   .byte  196,226,121,49,200                  // vpmovzxbd     %xmm0,%xmm1
   .byte  196,227,121,4,192,229               // vpermilps     $0xe5,%xmm0,%xmm0
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,81,42,0,0         // vbroadcastss  0x2a51(%rip),%ymm1        # 5654 <_sk_callback_avx+0x32d>
+  .byte  196,226,125,24,13,101,42,0,0        // vbroadcastss  0x2a65(%rip),%ymm1        # 57b4 <_sk_callback_avx+0x341>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,70,42,0,0         // vbroadcastss  0x2a46(%rip),%ymm3        # 5658 <_sk_callback_avx+0x331>
+  .byte  196,226,125,24,29,90,42,0,0         // vbroadcastss  0x2a5a(%rip),%ymm3        # 57b8 <_sk_callback_avx+0x345>
   .byte  76,137,193                          // mov           %r8,%rcx
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
@@ -14701,9 +14922,9 @@ _sk_load_g8_avx:
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           2c27 <_sk_load_g8_avx+0x5b>
+  .byte  117,234                             // jne           2d73 <_sk_load_g8_avx+0x5b>
   .byte  196,193,249,110,193                 // vmovq         %r9,%xmm0
-  .byte  235,156                             // jmp           2be0 <_sk_load_g8_avx+0x14>
+  .byte  235,156                             // jmp           2d2c <_sk_load_g8_avx+0x14>
 
 HIDDEN _sk_gather_g8_avx
 .globl _sk_gather_g8_avx
@@ -14753,10 +14974,10 @@ _sk_gather_g8_avx:
   .byte  196,226,121,49,201                  // vpmovzxbd     %xmm1,%xmm1
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,69,41,0,0         // vbroadcastss  0x2945(%rip),%ymm1        # 565c <_sk_callback_avx+0x335>
+  .byte  196,226,125,24,13,89,41,0,0         // vbroadcastss  0x2959(%rip),%ymm1        # 57bc <_sk_callback_avx+0x349>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,58,41,0,0         // vbroadcastss  0x293a(%rip),%ymm3        # 5660 <_sk_callback_avx+0x339>
+  .byte  196,226,125,24,29,78,41,0,0         // vbroadcastss  0x294e(%rip),%ymm3        # 57c0 <_sk_callback_avx+0x34d>
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
   .byte  91                                  // pop           %rbx
@@ -14772,9 +14993,9 @@ _sk_gather_i8_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  73,137,192                          // mov           %rax,%r8
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  116,5                               // je            2d46 <_sk_gather_i8_avx+0xf>
+  .byte  116,5                               // je            2e92 <_sk_gather_i8_avx+0xf>
   .byte  76,137,192                          // mov           %r8,%rax
-  .byte  235,2                               // jmp           2d48 <_sk_gather_i8_avx+0x11>
+  .byte  235,2                               // jmp           2e94 <_sk_gather_i8_avx+0x11>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,87                               // push          %r15
   .byte  65,86                               // push          %r14
@@ -14836,10 +15057,10 @@ _sk_gather_i8_avx:
   .byte  196,163,121,34,4,163,2              // vpinsrd       $0x2,(%rbx,%r12,4),%xmm0,%xmm0
   .byte  196,163,121,34,28,19,3              // vpinsrd       $0x3,(%rbx,%r10,1),%xmm0,%xmm3
   .byte  196,227,61,24,195,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  .byte  197,124,40,21,174,41,0,0            // vmovaps       0x29ae(%rip),%ymm10        # 5820 <_sk_callback_avx+0x4f9>
+  .byte  197,124,40,21,194,41,0,0            // vmovaps       0x29c2(%rip),%ymm10        # 5980 <_sk_callback_avx+0x50d>
   .byte  196,193,124,84,194                  // vandps        %ymm10,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,224,39,0,0         // vbroadcastss  0x27e0(%rip),%ymm9        # 5664 <_sk_callback_avx+0x33d>
+  .byte  196,98,125,24,13,244,39,0,0         // vbroadcastss  0x27f4(%rip),%ymm9        # 57c4 <_sk_callback_avx+0x351>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,113,114,208,8               // vpsrld        $0x8,%xmm8,%xmm1
   .byte  197,233,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm2
@@ -14873,38 +15094,38 @@ _sk_load_565_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,128,0,0,0                    // jne           2f7c <_sk_load_565_avx+0x8e>
+  .byte  15,133,128,0,0,0                    // jne           30c8 <_sk_load_565_avx+0x8e>
   .byte  196,193,122,111,4,122               // vmovdqu       (%r10,%rdi,2),%xmm0
   .byte  197,241,239,201                     // vpxor         %xmm1,%xmm1,%xmm1
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,209,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  .byte  196,226,125,24,5,74,39,0,0          // vbroadcastss  0x274a(%rip),%ymm0        # 5668 <_sk_callback_avx+0x341>
+  .byte  196,226,125,24,5,94,39,0,0          // vbroadcastss  0x275e(%rip),%ymm0        # 57c8 <_sk_callback_avx+0x355>
   .byte  197,236,84,192                      // vandps        %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,61,39,0,0         // vbroadcastss  0x273d(%rip),%ymm1        # 566c <_sk_callback_avx+0x345>
+  .byte  196,226,125,24,13,81,39,0,0         // vbroadcastss  0x2751(%rip),%ymm1        # 57cc <_sk_callback_avx+0x359>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,52,39,0,0         // vbroadcastss  0x2734(%rip),%ymm1        # 5670 <_sk_callback_avx+0x349>
+  .byte  196,226,125,24,13,72,39,0,0         // vbroadcastss  0x2748(%rip),%ymm1        # 57d0 <_sk_callback_avx+0x35d>
   .byte  197,236,84,201                      // vandps        %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,39,39,0,0         // vbroadcastss  0x2727(%rip),%ymm3        # 5674 <_sk_callback_avx+0x34d>
+  .byte  196,226,125,24,29,59,39,0,0         // vbroadcastss  0x273b(%rip),%ymm3        # 57d4 <_sk_callback_avx+0x361>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,24,29,30,39,0,0         // vbroadcastss  0x271e(%rip),%ymm3        # 5678 <_sk_callback_avx+0x351>
+  .byte  196,226,125,24,29,50,39,0,0         // vbroadcastss  0x2732(%rip),%ymm3        # 57d8 <_sk_callback_avx+0x365>
   .byte  197,236,84,211                      // vandps        %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,17,39,0,0         // vbroadcastss  0x2711(%rip),%ymm3        # 567c <_sk_callback_avx+0x355>
+  .byte  196,226,125,24,29,37,39,0,0         // vbroadcastss  0x2725(%rip),%ymm3        # 57dc <_sk_callback_avx+0x369>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,6,39,0,0          // vbroadcastss  0x2706(%rip),%ymm3        # 5680 <_sk_callback_avx+0x359>
+  .byte  196,226,125,24,29,26,39,0,0         // vbroadcastss  0x271a(%rip),%ymm3        # 57e0 <_sk_callback_avx+0x36d>
   .byte  255,224                             // jmpq          *%rax
   .byte  65,137,200                          // mov           %ecx,%r8d
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  197,249,239,192                     // vpxor         %xmm0,%xmm0,%xmm0
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,110,255,255,255              // ja            2f02 <_sk_load_565_avx+0x14>
+  .byte  15,135,110,255,255,255              // ja            304e <_sk_load_565_avx+0x14>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,73,0,0,0                  // lea           0x49(%rip),%r9        # 2fe8 <_sk_load_565_avx+0xfa>
+  .byte  76,141,13,73,0,0,0                  // lea           0x49(%rip),%r9        # 3134 <_sk_load_565_avx+0xfa>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -14916,7 +15137,7 @@ _sk_load_565_avx:
   .byte  196,193,121,196,68,122,4,2          // vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,68,122,2,1          // vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,4,122,0             // vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  .byte  233,26,255,255,255                  // jmpq          2f02 <_sk_load_565_avx+0x14>
+  .byte  233,26,255,255,255                  // jmpq          304e <_sk_load_565_avx+0x14>
   .byte  244                                 // hlt
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
@@ -14994,23 +15215,23 @@ _sk_gather_565_avx:
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,209,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  .byte  196,226,125,24,5,166,37,0,0         // vbroadcastss  0x25a6(%rip),%ymm0        # 5684 <_sk_callback_avx+0x35d>
+  .byte  196,226,125,24,5,186,37,0,0         // vbroadcastss  0x25ba(%rip),%ymm0        # 57e4 <_sk_callback_avx+0x371>
   .byte  197,236,84,192                      // vandps        %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,153,37,0,0        // vbroadcastss  0x2599(%rip),%ymm1        # 5688 <_sk_callback_avx+0x361>
+  .byte  196,226,125,24,13,173,37,0,0        // vbroadcastss  0x25ad(%rip),%ymm1        # 57e8 <_sk_callback_avx+0x375>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,144,37,0,0        // vbroadcastss  0x2590(%rip),%ymm1        # 568c <_sk_callback_avx+0x365>
+  .byte  196,226,125,24,13,164,37,0,0        // vbroadcastss  0x25a4(%rip),%ymm1        # 57ec <_sk_callback_avx+0x379>
   .byte  197,236,84,201                      // vandps        %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,131,37,0,0        // vbroadcastss  0x2583(%rip),%ymm3        # 5690 <_sk_callback_avx+0x369>
+  .byte  196,226,125,24,29,151,37,0,0        // vbroadcastss  0x2597(%rip),%ymm3        # 57f0 <_sk_callback_avx+0x37d>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,24,29,122,37,0,0        // vbroadcastss  0x257a(%rip),%ymm3        # 5694 <_sk_callback_avx+0x36d>
+  .byte  196,226,125,24,29,142,37,0,0        // vbroadcastss  0x258e(%rip),%ymm3        # 57f4 <_sk_callback_avx+0x381>
   .byte  197,236,84,211                      // vandps        %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,109,37,0,0        // vbroadcastss  0x256d(%rip),%ymm3        # 5698 <_sk_callback_avx+0x371>
+  .byte  196,226,125,24,29,129,37,0,0        // vbroadcastss  0x2581(%rip),%ymm3        # 57f8 <_sk_callback_avx+0x385>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,98,37,0,0         // vbroadcastss  0x2562(%rip),%ymm3        # 569c <_sk_callback_avx+0x375>
+  .byte  196,226,125,24,29,118,37,0,0        // vbroadcastss  0x2576(%rip),%ymm3        # 57fc <_sk_callback_avx+0x389>
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,94                               // pop           %r14
@@ -15024,14 +15245,14 @@ FUNCTION(_sk_store_565_avx)
 _sk_store_565_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,78,37,0,0           // vbroadcastss  0x254e(%rip),%ymm8        # 56a0 <_sk_callback_avx+0x379>
+  .byte  196,98,125,24,5,98,37,0,0           // vbroadcastss  0x2562(%rip),%ymm8        # 5800 <_sk_callback_avx+0x38d>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,41,114,241,11               // vpslld        $0xb,%xmm9,%xmm10
   .byte  196,67,125,25,201,1                 // vextractf128  $0x1,%ymm9,%xmm9
   .byte  196,193,49,114,241,11               // vpslld        $0xb,%xmm9,%xmm9
   .byte  196,67,45,24,201,1                  // vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
-  .byte  196,98,125,24,21,39,37,0,0          // vbroadcastss  0x2527(%rip),%ymm10        # 56a4 <_sk_callback_avx+0x37d>
+  .byte  196,98,125,24,21,59,37,0,0          // vbroadcastss  0x253b(%rip),%ymm10        # 5804 <_sk_callback_avx+0x391>
   .byte  196,65,116,89,210                   // vmulps        %ymm10,%ymm1,%ymm10
   .byte  196,65,125,91,210                   // vcvtps2dq     %ymm10,%ymm10
   .byte  196,193,33,114,242,5                // vpslld        $0x5,%xmm10,%xmm11
@@ -15045,7 +15266,7 @@ _sk_store_565_avx:
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           31cd <_sk_store_565_avx+0x89>
+  .byte  117,10                              // jne           3319 <_sk_store_565_avx+0x89>
   .byte  196,65,122,127,4,122                // vmovdqu       %xmm8,(%r10,%rdi,2)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15053,9 +15274,9 @@ _sk_store_565_avx:
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            31c9 <_sk_store_565_avx+0x85>
+  .byte  119,236                             // ja            3315 <_sk_store_565_avx+0x85>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,68,0,0,0                  // lea           0x44(%rip),%r9        # 322c <_sk_store_565_avx+0xe8>
+  .byte  76,141,13,68,0,0,0                  // lea           0x44(%rip),%r9        # 3378 <_sk_store_565_avx+0xe8>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15066,7 +15287,7 @@ _sk_store_565_avx:
   .byte  196,67,121,21,68,122,4,2            // vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   .byte  196,67,121,21,68,122,2,1            // vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   .byte  196,67,121,21,4,122,0               // vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  .byte  235,159                             // jmp           31c9 <_sk_store_565_avx+0x85>
+  .byte  235,159                             // jmp           3315 <_sk_store_565_avx+0x85>
   .byte  102,144                             // xchg          %ax,%ax
   .byte  245                                 // cmc
   .byte  255                                 // (bad)
@@ -15099,31 +15320,31 @@ _sk_load_4444_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,152,0,0,0                    // jne           32ee <_sk_load_4444_avx+0xa6>
+  .byte  15,133,152,0,0,0                    // jne           343a <_sk_load_4444_avx+0xa6>
   .byte  196,193,122,111,4,122               // vmovdqu       (%r10,%rdi,2),%xmm0
   .byte  197,241,239,201                     // vpxor         %xmm1,%xmm1,%xmm1
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,217,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  .byte  196,226,125,24,5,48,36,0,0          // vbroadcastss  0x2430(%rip),%ymm0        # 56a8 <_sk_callback_avx+0x381>
+  .byte  196,226,125,24,5,68,36,0,0          // vbroadcastss  0x2444(%rip),%ymm0        # 5808 <_sk_callback_avx+0x395>
   .byte  197,228,84,192                      // vandps        %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,35,36,0,0         // vbroadcastss  0x2423(%rip),%ymm1        # 56ac <_sk_callback_avx+0x385>
+  .byte  196,226,125,24,13,55,36,0,0         // vbroadcastss  0x2437(%rip),%ymm1        # 580c <_sk_callback_avx+0x399>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,26,36,0,0         // vbroadcastss  0x241a(%rip),%ymm1        # 56b0 <_sk_callback_avx+0x389>
+  .byte  196,226,125,24,13,46,36,0,0         // vbroadcastss  0x242e(%rip),%ymm1        # 5810 <_sk_callback_avx+0x39d>
   .byte  197,228,84,201                      // vandps        %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,13,36,0,0         // vbroadcastss  0x240d(%rip),%ymm2        # 56b4 <_sk_callback_avx+0x38d>
+  .byte  196,226,125,24,21,33,36,0,0         // vbroadcastss  0x2421(%rip),%ymm2        # 5814 <_sk_callback_avx+0x3a1>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,4,36,0,0          // vbroadcastss  0x2404(%rip),%ymm2        # 56b8 <_sk_callback_avx+0x391>
+  .byte  196,226,125,24,21,24,36,0,0         // vbroadcastss  0x2418(%rip),%ymm2        # 5818 <_sk_callback_avx+0x3a5>
   .byte  197,228,84,210                      // vandps        %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,247,35,0,0          // vbroadcastss  0x23f7(%rip),%ymm8        # 56bc <_sk_callback_avx+0x395>
+  .byte  196,98,125,24,5,11,36,0,0           // vbroadcastss  0x240b(%rip),%ymm8        # 581c <_sk_callback_avx+0x3a9>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,237,35,0,0          // vbroadcastss  0x23ed(%rip),%ymm8        # 56c0 <_sk_callback_avx+0x399>
+  .byte  196,98,125,24,5,1,36,0,0            // vbroadcastss  0x2401(%rip),%ymm8        # 5820 <_sk_callback_avx+0x3ad>
   .byte  196,193,100,84,216                  // vandps        %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,223,35,0,0          // vbroadcastss  0x23df(%rip),%ymm8        # 56c4 <_sk_callback_avx+0x39d>
+  .byte  196,98,125,24,5,243,35,0,0          // vbroadcastss  0x23f3(%rip),%ymm8        # 5824 <_sk_callback_avx+0x3b1>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15132,9 +15353,9 @@ _sk_load_4444_avx:
   .byte  197,249,239,192                     // vpxor         %xmm0,%xmm0,%xmm0
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,86,255,255,255               // ja            325c <_sk_load_4444_avx+0x14>
+  .byte  15,135,86,255,255,255               // ja            33a8 <_sk_load_4444_avx+0x14>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,75,0,0,0                  // lea           0x4b(%rip),%r9        # 335c <_sk_load_4444_avx+0x114>
+  .byte  76,141,13,75,0,0,0                  // lea           0x4b(%rip),%r9        # 34a8 <_sk_load_4444_avx+0x114>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15146,7 +15367,7 @@ _sk_load_4444_avx:
   .byte  196,193,121,196,68,122,4,2          // vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,68,122,2,1          // vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,4,122,0             // vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  .byte  233,2,255,255,255                   // jmpq          325c <_sk_load_4444_avx+0x14>
+  .byte  233,2,255,255,255                   // jmpq          33a8 <_sk_load_4444_avx+0x14>
   .byte  102,144                             // xchg          %ax,%ax
   .byte  242,255                             // repnz         (bad)
   .byte  255                                 // (bad)
@@ -15225,25 +15446,25 @@ _sk_gather_4444_avx:
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,217,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  .byte  196,226,125,24,5,118,34,0,0         // vbroadcastss  0x2276(%rip),%ymm0        # 56c8 <_sk_callback_avx+0x3a1>
+  .byte  196,226,125,24,5,138,34,0,0         // vbroadcastss  0x228a(%rip),%ymm0        # 5828 <_sk_callback_avx+0x3b5>
   .byte  197,228,84,192                      // vandps        %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,105,34,0,0        // vbroadcastss  0x2269(%rip),%ymm1        # 56cc <_sk_callback_avx+0x3a5>
+  .byte  196,226,125,24,13,125,34,0,0        // vbroadcastss  0x227d(%rip),%ymm1        # 582c <_sk_callback_avx+0x3b9>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,96,34,0,0         // vbroadcastss  0x2260(%rip),%ymm1        # 56d0 <_sk_callback_avx+0x3a9>
+  .byte  196,226,125,24,13,116,34,0,0        // vbroadcastss  0x2274(%rip),%ymm1        # 5830 <_sk_callback_avx+0x3bd>
   .byte  197,228,84,201                      // vandps        %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,83,34,0,0         // vbroadcastss  0x2253(%rip),%ymm2        # 56d4 <_sk_callback_avx+0x3ad>
+  .byte  196,226,125,24,21,103,34,0,0        // vbroadcastss  0x2267(%rip),%ymm2        # 5834 <_sk_callback_avx+0x3c1>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,74,34,0,0         // vbroadcastss  0x224a(%rip),%ymm2        # 56d8 <_sk_callback_avx+0x3b1>
+  .byte  196,226,125,24,21,94,34,0,0         // vbroadcastss  0x225e(%rip),%ymm2        # 5838 <_sk_callback_avx+0x3c5>
   .byte  197,228,84,210                      // vandps        %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,61,34,0,0           // vbroadcastss  0x223d(%rip),%ymm8        # 56dc <_sk_callback_avx+0x3b5>
+  .byte  196,98,125,24,5,81,34,0,0           // vbroadcastss  0x2251(%rip),%ymm8        # 583c <_sk_callback_avx+0x3c9>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,51,34,0,0           // vbroadcastss  0x2233(%rip),%ymm8        # 56e0 <_sk_callback_avx+0x3b9>
+  .byte  196,98,125,24,5,71,34,0,0           // vbroadcastss  0x2247(%rip),%ymm8        # 5840 <_sk_callback_avx+0x3cd>
   .byte  196,193,100,84,216                  // vandps        %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,37,34,0,0           // vbroadcastss  0x2225(%rip),%ymm8        # 56e4 <_sk_callback_avx+0x3bd>
+  .byte  196,98,125,24,5,57,34,0,0           // vbroadcastss  0x2239(%rip),%ymm8        # 5844 <_sk_callback_avx+0x3d1>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -15259,7 +15480,7 @@ FUNCTION(_sk_store_4444_avx)
 _sk_store_4444_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,10,34,0,0           // vbroadcastss  0x220a(%rip),%ymm8        # 56e8 <_sk_callback_avx+0x3c1>
+  .byte  196,98,125,24,5,30,34,0,0           // vbroadcastss  0x221e(%rip),%ymm8        # 5848 <_sk_callback_avx+0x3d5>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,41,114,241,12               // vpslld        $0xc,%xmm9,%xmm10
@@ -15286,7 +15507,7 @@ _sk_store_4444_avx:
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           3577 <_sk_store_4444_avx+0xa7>
+  .byte  117,10                              // jne           36c3 <_sk_store_4444_avx+0xa7>
   .byte  196,65,122,127,4,122                // vmovdqu       %xmm8,(%r10,%rdi,2)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15294,9 +15515,9 @@ _sk_store_4444_avx:
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            3573 <_sk_store_4444_avx+0xa3>
+  .byte  119,236                             // ja            36bf <_sk_store_4444_avx+0xa3>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,66,0,0,0                  // lea           0x42(%rip),%r9        # 35d4 <_sk_store_4444_avx+0x104>
+  .byte  76,141,13,66,0,0,0                  // lea           0x42(%rip),%r9        # 3720 <_sk_store_4444_avx+0x104>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15307,7 +15528,7 @@ _sk_store_4444_avx:
   .byte  196,67,121,21,68,122,4,2            // vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   .byte  196,67,121,21,68,122,2,1            // vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   .byte  196,67,121,21,4,122,0               // vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  .byte  235,159                             // jmp           3573 <_sk_store_4444_avx+0xa3>
+  .byte  235,159                             // jmp           36bf <_sk_store_4444_avx+0xa3>
   .byte  247,255                             // idiv          %edi
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
@@ -15338,12 +15559,12 @@ _sk_load_8888_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,135,0,0,0                    // jne           3685 <_sk_load_8888_avx+0x95>
+  .byte  15,133,135,0,0,0                    // jne           37d1 <_sk_load_8888_avx+0x95>
   .byte  196,65,124,16,12,186                // vmovups       (%r10,%rdi,4),%ymm9
-  .byte  197,124,40,21,52,34,0,0             // vmovaps       0x2234(%rip),%ymm10        # 5840 <_sk_callback_avx+0x519>
+  .byte  197,124,40,21,72,34,0,0             // vmovaps       0x2248(%rip),%ymm10        # 59a0 <_sk_callback_avx+0x52d>
   .byte  196,193,52,84,194                   // vandps        %ymm10,%ymm9,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,5,206,32,0,0          // vbroadcastss  0x20ce(%rip),%ymm8        # 56ec <_sk_callback_avx+0x3c5>
+  .byte  196,98,125,24,5,226,32,0,0          // vbroadcastss  0x20e2(%rip),%ymm8        # 584c <_sk_callback_avx+0x3d9>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  196,193,113,114,209,8               // vpsrld        $0x8,%xmm9,%xmm1
   .byte  196,99,125,25,203,1                 // vextractf128  $0x1,%ymm9,%xmm3
@@ -15370,9 +15591,9 @@ _sk_load_8888_avx:
   .byte  196,65,52,87,201                    // vxorps        %ymm9,%ymm9,%ymm9
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,102,255,255,255              // ja            3604 <_sk_load_8888_avx+0x14>
+  .byte  15,135,102,255,255,255              // ja            3750 <_sk_load_8888_avx+0x14>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,139,0,0,0                 // lea           0x8b(%rip),%r9        # 3734 <_sk_load_8888_avx+0x144>
+  .byte  76,141,13,139,0,0,0                 // lea           0x8b(%rip),%r9        # 3880 <_sk_load_8888_avx+0x144>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15395,7 +15616,7 @@ _sk_load_8888_avx:
   .byte  196,99,53,12,200,15                 // vblendps      $0xf,%ymm0,%ymm9,%ymm9
   .byte  196,195,49,34,4,186,0               // vpinsrd       $0x0,(%r10,%rdi,4),%xmm9,%xmm0
   .byte  196,99,53,12,200,15                 // vblendps      $0xf,%ymm0,%ymm9,%ymm9
-  .byte  233,210,254,255,255                 // jmpq          3604 <_sk_load_8888_avx+0x14>
+  .byte  233,210,254,255,255                 // jmpq          3750 <_sk_load_8888_avx+0x14>
   .byte  102,144                             // xchg          %ax,%ax
   .byte  236                                 // in            (%dx),%al
   .byte  255                                 // (bad)
@@ -15413,7 +15634,7 @@ _sk_load_8888_avx:
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  126,255                             // jle           374d <_sk_load_8888_avx+0x15d>
+  .byte  126,255                             // jle           3899 <_sk_load_8888_avx+0x15d>
   .byte  255                                 // (bad)
   .byte  255                                 // .byte         0xff
 
@@ -15458,10 +15679,10 @@ _sk_gather_8888_avx:
   .byte  196,131,121,34,4,152,2              // vpinsrd       $0x2,(%r8,%r11,4),%xmm0,%xmm0
   .byte  196,131,121,34,28,144,3             // vpinsrd       $0x3,(%r8,%r10,4),%xmm0,%xmm3
   .byte  196,227,61,24,195,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  .byte  197,124,40,21,94,32,0,0             // vmovaps       0x205e(%rip),%ymm10        # 5860 <_sk_callback_avx+0x539>
+  .byte  197,124,40,21,114,32,0,0            // vmovaps       0x2072(%rip),%ymm10        # 59c0 <_sk_callback_avx+0x54d>
   .byte  196,193,124,84,194                  // vandps        %ymm10,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,220,30,0,0         // vbroadcastss  0x1edc(%rip),%ymm9        # 56f0 <_sk_callback_avx+0x3c9>
+  .byte  196,98,125,24,13,240,30,0,0         // vbroadcastss  0x1ef0(%rip),%ymm9        # 5850 <_sk_callback_avx+0x3dd>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,113,114,208,8               // vpsrld        $0x8,%xmm8,%xmm1
   .byte  197,233,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm2
@@ -15493,7 +15714,7 @@ FUNCTION(_sk_store_8888_avx)
 _sk_store_8888_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,106,30,0,0          // vbroadcastss  0x1e6a(%rip),%ymm8        # 56f4 <_sk_callback_avx+0x3cd>
+  .byte  196,98,125,24,5,126,30,0,0          // vbroadcastss  0x1e7e(%rip),%ymm8        # 5854 <_sk_callback_avx+0x3e1>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,65,116,89,208                   // vmulps        %ymm8,%ymm1,%ymm10
@@ -15518,7 +15739,7 @@ _sk_store_8888_avx:
   .byte  196,65,45,86,192                    // vorpd         %ymm8,%ymm10,%ymm8
   .byte  196,65,53,86,192                    // vorpd         %ymm8,%ymm9,%ymm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           3918 <_sk_store_8888_avx+0x9c>
+  .byte  117,10                              // jne           3a64 <_sk_store_8888_avx+0x9c>
   .byte  196,65,124,17,4,186                 // vmovups       %ymm8,(%r10,%rdi,4)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15526,9 +15747,9 @@ _sk_store_8888_avx:
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            3914 <_sk_store_8888_avx+0x98>
+  .byte  119,236                             // ja            3a60 <_sk_store_8888_avx+0x98>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,85,0,0,0                  // lea           0x55(%rip),%r9        # 3988 <_sk_store_8888_avx+0x10c>
+  .byte  76,141,13,85,0,0,0                  // lea           0x55(%rip),%r9        # 3ad4 <_sk_store_8888_avx+0x10c>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15542,7 +15763,7 @@ _sk_store_8888_avx:
   .byte  196,67,121,22,68,186,8,2            // vpextrd       $0x2,%xmm8,0x8(%r10,%rdi,4)
   .byte  196,67,121,22,68,186,4,1            // vpextrd       $0x1,%xmm8,0x4(%r10,%rdi,4)
   .byte  196,65,121,126,4,186                // vmovd         %xmm8,(%r10,%rdi,4)
-  .byte  235,143                             // jmp           3914 <_sk_store_8888_avx+0x98>
+  .byte  235,143                             // jmp           3a60 <_sk_store_8888_avx+0x98>
   .byte  15,31,0                             // nopl          (%rax)
   .byte  245                                 // cmc
   .byte  255                                 // (bad)
@@ -15580,7 +15801,7 @@ _sk_load_f16_avx:
   .byte  197,252,17,116,36,192               // vmovups       %ymm6,-0x40(%rsp)
   .byte  197,252,17,108,36,160               // vmovups       %ymm5,-0x60(%rsp)
   .byte  197,254,127,100,36,128              // vmovdqu       %ymm4,-0x80(%rsp)
-  .byte  15,133,141,2,0,0                    // jne           3c5b <_sk_load_f16_avx+0x2b7>
+  .byte  15,133,141,2,0,0                    // jne           3da7 <_sk_load_f16_avx+0x2b7>
   .byte  197,121,16,4,248                    // vmovupd       (%rax,%rdi,8),%xmm8
   .byte  197,249,16,84,248,16                // vmovupd       0x10(%rax,%rdi,8),%xmm2
   .byte  197,249,16,76,248,32                // vmovupd       0x20(%rax,%rdi,8),%xmm1
@@ -15598,13 +15819,13 @@ _sk_load_f16_avx:
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
-  .byte  196,98,125,24,37,209,28,0,0         // vbroadcastss  0x1cd1(%rip),%ymm12        # 56f8 <_sk_callback_avx+0x3d1>
+  .byte  196,98,125,24,37,229,28,0,0         // vbroadcastss  0x1ce5(%rip),%ymm12        # 5858 <_sk_callback_avx+0x3e5>
   .byte  196,193,124,84,204                  // vandps        %ymm12,%ymm0,%ymm1
   .byte  197,252,87,193                      // vxorps        %ymm1,%ymm0,%ymm0
   .byte  196,195,125,25,198,1                // vextractf128  $0x1,%ymm0,%xmm14
-  .byte  196,98,121,24,29,189,28,0,0         // vbroadcastss  0x1cbd(%rip),%xmm11        # 56fc <_sk_callback_avx+0x3d5>
+  .byte  196,98,121,24,29,209,28,0,0         // vbroadcastss  0x1cd1(%rip),%xmm11        # 585c <_sk_callback_avx+0x3e9>
   .byte  196,193,8,87,219                    // vxorps        %xmm11,%xmm14,%xmm3
-  .byte  196,98,121,24,45,179,28,0,0         // vbroadcastss  0x1cb3(%rip),%xmm13        # 5700 <_sk_callback_avx+0x3d9>
+  .byte  196,98,121,24,45,199,28,0,0         // vbroadcastss  0x1cc7(%rip),%xmm13        # 5860 <_sk_callback_avx+0x3ed>
   .byte  197,145,102,219                     // vpcmpgtd      %xmm3,%xmm13,%xmm3
   .byte  196,65,120,87,211                   // vxorps        %xmm11,%xmm0,%xmm10
   .byte  196,65,17,102,210                   // vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -15618,7 +15839,7 @@ _sk_load_f16_avx:
   .byte  196,227,125,24,195,1                // vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   .byte  197,252,86,193                      // vorps         %ymm1,%ymm0,%ymm0
   .byte  196,227,125,25,193,1                // vextractf128  $0x1,%ymm0,%xmm1
-  .byte  196,226,121,24,29,105,28,0,0        // vbroadcastss  0x1c69(%rip),%xmm3        # 5704 <_sk_callback_avx+0x3dd>
+  .byte  196,226,121,24,29,125,28,0,0        // vbroadcastss  0x1c7d(%rip),%xmm3        # 5864 <_sk_callback_avx+0x3f1>
   .byte  197,241,254,203                     // vpaddd        %xmm3,%xmm1,%xmm1
   .byte  197,249,254,195                     // vpaddd        %xmm3,%xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
@@ -15711,29 +15932,29 @@ _sk_load_f16_avx:
   .byte  197,123,16,4,248                    // vmovsd        (%rax,%rdi,8),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,79                              // je            3cba <_sk_load_f16_avx+0x316>
+  .byte  116,79                              // je            3e06 <_sk_load_f16_avx+0x316>
   .byte  197,57,22,68,248,8                  // vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,67                              // jb            3cba <_sk_load_f16_avx+0x316>
+  .byte  114,67                              // jb            3e06 <_sk_load_f16_avx+0x316>
   .byte  197,251,16,84,248,16                // vmovsd        0x10(%rax,%rdi,8),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,68                              // je            3cc7 <_sk_load_f16_avx+0x323>
+  .byte  116,68                              // je            3e13 <_sk_load_f16_avx+0x323>
   .byte  197,233,22,84,248,24                // vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,56                              // jb            3cc7 <_sk_load_f16_avx+0x323>
+  .byte  114,56                              // jb            3e13 <_sk_load_f16_avx+0x323>
   .byte  197,251,16,76,248,32                // vmovsd        0x20(%rax,%rdi,8),%xmm1
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,70,253,255,255               // je            39e5 <_sk_load_f16_avx+0x41>
+  .byte  15,132,70,253,255,255               // je            3b31 <_sk_load_f16_avx+0x41>
   .byte  197,241,22,76,248,40                // vmovhpd       0x28(%rax,%rdi,8),%xmm1,%xmm1
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,54,253,255,255               // jb            39e5 <_sk_load_f16_avx+0x41>
+  .byte  15,130,54,253,255,255               // jb            3b31 <_sk_load_f16_avx+0x41>
   .byte  197,122,126,76,248,48               // vmovq         0x30(%rax,%rdi,8),%xmm9
-  .byte  233,43,253,255,255                  // jmpq          39e5 <_sk_load_f16_avx+0x41>
+  .byte  233,43,253,255,255                  // jmpq          3b31 <_sk_load_f16_avx+0x41>
   .byte  197,241,87,201                      // vxorpd        %xmm1,%xmm1,%xmm1
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,30,253,255,255                  // jmpq          39e5 <_sk_load_f16_avx+0x41>
+  .byte  233,30,253,255,255                  // jmpq          3b31 <_sk_load_f16_avx+0x41>
   .byte  197,241,87,201                      // vxorpd        %xmm1,%xmm1,%xmm1
-  .byte  233,21,253,255,255                  // jmpq          39e5 <_sk_load_f16_avx+0x41>
+  .byte  233,21,253,255,255                  // jmpq          3b31 <_sk_load_f16_avx+0x41>
 
 HIDDEN _sk_gather_f16_avx
 .globl _sk_gather_f16_avx
@@ -15797,13 +16018,13 @@ _sk_gather_f16_avx:
   .byte  197,249,105,210                     // vpunpckhwd    %xmm2,%xmm0,%xmm2
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,194,1                // vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
-  .byte  196,98,125,24,37,45,25,0,0          // vbroadcastss  0x192d(%rip),%ymm12        # 5708 <_sk_callback_avx+0x3e1>
+  .byte  196,98,125,24,37,65,25,0,0          // vbroadcastss  0x1941(%rip),%ymm12        # 5868 <_sk_callback_avx+0x3f5>
   .byte  196,193,124,84,212                  // vandps        %ymm12,%ymm0,%ymm2
   .byte  197,252,87,194                      // vxorps        %ymm2,%ymm0,%ymm0
   .byte  196,195,125,25,198,1                // vextractf128  $0x1,%ymm0,%xmm14
-  .byte  196,98,121,24,29,25,25,0,0          // vbroadcastss  0x1919(%rip),%xmm11        # 570c <_sk_callback_avx+0x3e5>
+  .byte  196,98,121,24,29,45,25,0,0          // vbroadcastss  0x192d(%rip),%xmm11        # 586c <_sk_callback_avx+0x3f9>
   .byte  196,193,8,87,219                    // vxorps        %xmm11,%xmm14,%xmm3
-  .byte  196,98,121,24,45,15,25,0,0          // vbroadcastss  0x190f(%rip),%xmm13        # 5710 <_sk_callback_avx+0x3e9>
+  .byte  196,98,121,24,45,35,25,0,0          // vbroadcastss  0x1923(%rip),%xmm13        # 5870 <_sk_callback_avx+0x3fd>
   .byte  197,145,102,219                     // vpcmpgtd      %xmm3,%xmm13,%xmm3
   .byte  196,65,120,87,211                   // vxorps        %xmm11,%xmm0,%xmm10
   .byte  196,65,17,102,210                   // vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -15817,7 +16038,7 @@ _sk_gather_f16_avx:
   .byte  196,227,125,24,195,1                // vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   .byte  197,252,86,194                      // vorps         %ymm2,%ymm0,%ymm0
   .byte  196,227,125,25,194,1                // vextractf128  $0x1,%ymm0,%xmm2
-  .byte  196,226,121,24,29,197,24,0,0        // vbroadcastss  0x18c5(%rip),%xmm3        # 5714 <_sk_callback_avx+0x3ed>
+  .byte  196,226,121,24,29,217,24,0,0        // vbroadcastss  0x18d9(%rip),%xmm3        # 5874 <_sk_callback_avx+0x401>
   .byte  197,233,254,211                     // vpaddd        %xmm3,%xmm2,%xmm2
   .byte  197,249,254,195                     // vpaddd        %xmm3,%xmm0,%xmm0
   .byte  196,227,125,24,194,1                // vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
@@ -15921,12 +16142,12 @@ _sk_store_f16_avx:
   .byte  197,252,17,52,36                    // vmovups       %ymm6,(%rsp)
   .byte  197,252,17,108,36,224               // vmovups       %ymm5,-0x20(%rsp)
   .byte  197,252,17,100,36,192               // vmovups       %ymm4,-0x40(%rsp)
-  .byte  196,98,125,24,13,222,22,0,0         // vbroadcastss  0x16de(%rip),%ymm9        # 5718 <_sk_callback_avx+0x3f1>
+  .byte  196,98,125,24,13,242,22,0,0         // vbroadcastss  0x16f2(%rip),%ymm9        # 5878 <_sk_callback_avx+0x405>
   .byte  196,65,124,84,209                   // vandps        %ymm9,%ymm0,%ymm10
   .byte  197,252,17,68,36,128                // vmovups       %ymm0,-0x80(%rsp)
   .byte  196,65,124,87,218                   // vxorps        %ymm10,%ymm0,%ymm11
   .byte  196,67,125,25,220,1                 // vextractf128  $0x1,%ymm11,%xmm12
-  .byte  196,98,121,24,5,195,22,0,0          // vbroadcastss  0x16c3(%rip),%xmm8        # 571c <_sk_callback_avx+0x3f5>
+  .byte  196,98,121,24,5,215,22,0,0          // vbroadcastss  0x16d7(%rip),%xmm8        # 587c <_sk_callback_avx+0x409>
   .byte  196,65,57,102,236                   // vpcmpgtd      %xmm12,%xmm8,%xmm13
   .byte  196,65,57,102,243                   // vpcmpgtd      %xmm11,%xmm8,%xmm14
   .byte  196,67,13,24,237,1                  // vinsertf128   $0x1,%xmm13,%ymm14,%ymm13
@@ -15936,7 +16157,7 @@ _sk_store_f16_avx:
   .byte  196,67,13,24,242,1                  // vinsertf128   $0x1,%xmm10,%ymm14,%ymm14
   .byte  196,193,33,114,211,13               // vpsrld        $0xd,%xmm11,%xmm11
   .byte  196,193,25,114,212,13               // vpsrld        $0xd,%xmm12,%xmm12
-  .byte  196,98,125,24,21,138,22,0,0         // vbroadcastss  0x168a(%rip),%ymm10        # 5720 <_sk_callback_avx+0x3f9>
+  .byte  196,98,125,24,21,158,22,0,0         // vbroadcastss  0x169e(%rip),%ymm10        # 5880 <_sk_callback_avx+0x40d>
   .byte  196,65,12,86,242                    // vorps         %ymm10,%ymm14,%ymm14
   .byte  196,67,125,25,247,1                 // vextractf128  $0x1,%ymm14,%xmm15
   .byte  196,65,1,254,228                    // vpaddd        %xmm12,%xmm15,%xmm12
@@ -16018,7 +16239,7 @@ _sk_store_f16_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,66                              // jne           4274 <_sk_store_f16_avx+0x25e>
+  .byte  117,66                              // jne           43c0 <_sk_store_f16_avx+0x25e>
   .byte  197,120,17,28,248                   // vmovups       %xmm11,(%rax,%rdi,8)
   .byte  197,120,17,84,248,16                // vmovups       %xmm10,0x10(%rax,%rdi,8)
   .byte  197,120,17,76,248,32                // vmovups       %xmm9,0x20(%rax,%rdi,8)
@@ -16034,22 +16255,22 @@ _sk_store_f16_avx:
   .byte  255,224                             // jmpq          *%rax
   .byte  197,121,214,28,248                  // vmovq         %xmm11,(%rax,%rdi,8)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,202                             // je            4249 <_sk_store_f16_avx+0x233>
+  .byte  116,202                             // je            4395 <_sk_store_f16_avx+0x233>
   .byte  197,121,23,92,248,8                 // vmovhpd       %xmm11,0x8(%rax,%rdi,8)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,190                             // jb            4249 <_sk_store_f16_avx+0x233>
+  .byte  114,190                             // jb            4395 <_sk_store_f16_avx+0x233>
   .byte  197,121,214,84,248,16               // vmovq         %xmm10,0x10(%rax,%rdi,8)
-  .byte  116,182                             // je            4249 <_sk_store_f16_avx+0x233>
+  .byte  116,182                             // je            4395 <_sk_store_f16_avx+0x233>
   .byte  197,121,23,84,248,24                // vmovhpd       %xmm10,0x18(%rax,%rdi,8)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,170                             // jb            4249 <_sk_store_f16_avx+0x233>
+  .byte  114,170                             // jb            4395 <_sk_store_f16_avx+0x233>
   .byte  197,121,214,76,248,32               // vmovq         %xmm9,0x20(%rax,%rdi,8)
-  .byte  116,162                             // je            4249 <_sk_store_f16_avx+0x233>
+  .byte  116,162                             // je            4395 <_sk_store_f16_avx+0x233>
   .byte  197,121,23,76,248,40                // vmovhpd       %xmm9,0x28(%rax,%rdi,8)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,150                             // jb            4249 <_sk_store_f16_avx+0x233>
+  .byte  114,150                             // jb            4395 <_sk_store_f16_avx+0x233>
   .byte  197,121,214,68,248,48               // vmovq         %xmm8,0x30(%rax,%rdi,8)
-  .byte  235,142                             // jmp           4249 <_sk_store_f16_avx+0x233>
+  .byte  235,142                             // jmp           4395 <_sk_store_f16_avx+0x233>
 
 HIDDEN _sk_load_u16_be_avx
 .globl _sk_load_u16_be_avx
@@ -16059,7 +16280,7 @@ _sk_load_u16_be_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,189,0,0,0,0                // lea           0x0(,%rdi,4),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,253,0,0,0                    // jne           43ce <_sk_load_u16_be_avx+0x113>
+  .byte  15,133,253,0,0,0                    // jne           451a <_sk_load_u16_be_avx+0x113>
   .byte  196,65,121,16,4,64                  // vmovupd       (%r8,%rax,2),%xmm8
   .byte  196,193,121,16,84,64,16             // vmovupd       0x10(%r8,%rax,2),%xmm2
   .byte  196,193,121,16,92,64,32             // vmovupd       0x20(%r8,%rax,2),%xmm3
@@ -16081,7 +16302,7 @@ _sk_load_u16_be_avx:
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,29,226,19,0,0         // vbroadcastss  0x13e2(%rip),%ymm11        # 5724 <_sk_callback_avx+0x3fd>
+  .byte  196,98,125,24,29,246,19,0,0         // vbroadcastss  0x13f6(%rip),%ymm11        # 5884 <_sk_callback_avx+0x411>
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,177,109,202                     // vpunpckhqdq   %xmm2,%xmm9,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -16115,29 +16336,29 @@ _sk_load_u16_be_avx:
   .byte  196,65,123,16,4,64                  // vmovsd        (%r8,%rax,2),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,85                              // je            4434 <_sk_load_u16_be_avx+0x179>
+  .byte  116,85                              // je            4580 <_sk_load_u16_be_avx+0x179>
   .byte  196,65,57,22,68,64,8                // vmovhpd       0x8(%r8,%rax,2),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,72                              // jb            4434 <_sk_load_u16_be_avx+0x179>
+  .byte  114,72                              // jb            4580 <_sk_load_u16_be_avx+0x179>
   .byte  196,193,123,16,84,64,16             // vmovsd        0x10(%r8,%rax,2),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,72                              // je            4441 <_sk_load_u16_be_avx+0x186>
+  .byte  116,72                              // je            458d <_sk_load_u16_be_avx+0x186>
   .byte  196,193,105,22,84,64,24             // vmovhpd       0x18(%r8,%rax,2),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,59                              // jb            4441 <_sk_load_u16_be_avx+0x186>
+  .byte  114,59                              // jb            458d <_sk_load_u16_be_avx+0x186>
   .byte  196,193,123,16,92,64,32             // vmovsd        0x20(%r8,%rax,2),%xmm3
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,213,254,255,255              // je            42ec <_sk_load_u16_be_avx+0x31>
+  .byte  15,132,213,254,255,255              // je            4438 <_sk_load_u16_be_avx+0x31>
   .byte  196,193,97,22,92,64,40              // vmovhpd       0x28(%r8,%rax,2),%xmm3,%xmm3
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,196,254,255,255              // jb            42ec <_sk_load_u16_be_avx+0x31>
+  .byte  15,130,196,254,255,255              // jb            4438 <_sk_load_u16_be_avx+0x31>
   .byte  196,65,122,126,76,64,48             // vmovq         0x30(%r8,%rax,2),%xmm9
-  .byte  233,184,254,255,255                 // jmpq          42ec <_sk_load_u16_be_avx+0x31>
+  .byte  233,184,254,255,255                 // jmpq          4438 <_sk_load_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,171,254,255,255                 // jmpq          42ec <_sk_load_u16_be_avx+0x31>
+  .byte  233,171,254,255,255                 // jmpq          4438 <_sk_load_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
-  .byte  233,162,254,255,255                 // jmpq          42ec <_sk_load_u16_be_avx+0x31>
+  .byte  233,162,254,255,255                 // jmpq          4438 <_sk_load_u16_be_avx+0x31>
 
 HIDDEN _sk_load_rgb_u16_be_avx
 .globl _sk_load_rgb_u16_be_avx
@@ -16147,7 +16368,7 @@ _sk_load_rgb_u16_be_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,127                        // lea           (%rdi,%rdi,2),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,243,0,0,0                    // jne           454f <_sk_load_rgb_u16_be_avx+0x105>
+  .byte  15,133,243,0,0,0                    // jne           469b <_sk_load_rgb_u16_be_avx+0x105>
   .byte  196,193,122,111,4,64                // vmovdqu       (%r8,%rax,2),%xmm0
   .byte  196,193,122,111,84,64,12            // vmovdqu       0xc(%r8,%rax,2),%xmm2
   .byte  196,193,122,111,76,64,24            // vmovdqu       0x18(%r8,%rax,2),%xmm1
@@ -16174,7 +16395,7 @@ _sk_load_rgb_u16_be_avx:
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,29,66,18,0,0          // vbroadcastss  0x1242(%rip),%ymm11        # 5728 <_sk_callback_avx+0x401>
+  .byte  196,98,125,24,29,86,18,0,0          // vbroadcastss  0x1256(%rip),%ymm11        # 5888 <_sk_callback_avx+0x415>
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,185,109,202                     // vpunpckhqdq   %xmm2,%xmm8,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -16195,41 +16416,41 @@ _sk_load_rgb_u16_be_avx:
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,211                  // vmulps        %ymm11,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,223,17,0,0        // vbroadcastss  0x11df(%rip),%ymm3        # 572c <_sk_callback_avx+0x405>
+  .byte  196,226,125,24,29,243,17,0,0        // vbroadcastss  0x11f3(%rip),%ymm3        # 588c <_sk_callback_avx+0x419>
   .byte  255,224                             // jmpq          *%rax
   .byte  196,193,121,110,4,64                // vmovd         (%r8,%rax,2),%xmm0
   .byte  196,193,121,196,68,64,4,2           // vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  117,5                               // jne           4568 <_sk_load_rgb_u16_be_avx+0x11e>
-  .byte  233,40,255,255,255                  // jmpq          4490 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  117,5                               // jne           46b4 <_sk_load_rgb_u16_be_avx+0x11e>
+  .byte  233,40,255,255,255                  // jmpq          45dc <_sk_load_rgb_u16_be_avx+0x46>
   .byte  196,193,121,110,76,64,6             // vmovd         0x6(%r8,%rax,2),%xmm1
   .byte  196,65,113,196,68,64,10,2           // vpinsrw       $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,26                              // jb            4597 <_sk_load_rgb_u16_be_avx+0x14d>
+  .byte  114,26                              // jb            46e3 <_sk_load_rgb_u16_be_avx+0x14d>
   .byte  196,193,121,110,76,64,12            // vmovd         0xc(%r8,%rax,2),%xmm1
   .byte  196,193,113,196,84,64,16,2          // vpinsrw       $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  117,10                              // jne           459c <_sk_load_rgb_u16_be_avx+0x152>
-  .byte  233,249,254,255,255                 // jmpq          4490 <_sk_load_rgb_u16_be_avx+0x46>
-  .byte  233,244,254,255,255                 // jmpq          4490 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           46e8 <_sk_load_rgb_u16_be_avx+0x152>
+  .byte  233,249,254,255,255                 // jmpq          45dc <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,244,254,255,255                 // jmpq          45dc <_sk_load_rgb_u16_be_avx+0x46>
   .byte  196,193,121,110,76,64,18            // vmovd         0x12(%r8,%rax,2),%xmm1
   .byte  196,65,113,196,76,64,22,2           // vpinsrw       $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,26                              // jb            45cb <_sk_load_rgb_u16_be_avx+0x181>
+  .byte  114,26                              // jb            4717 <_sk_load_rgb_u16_be_avx+0x181>
   .byte  196,193,121,110,76,64,24            // vmovd         0x18(%r8,%rax,2),%xmm1
   .byte  196,193,113,196,76,64,28,2          // vpinsrw       $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  117,10                              // jne           45d0 <_sk_load_rgb_u16_be_avx+0x186>
-  .byte  233,197,254,255,255                 // jmpq          4490 <_sk_load_rgb_u16_be_avx+0x46>
-  .byte  233,192,254,255,255                 // jmpq          4490 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           471c <_sk_load_rgb_u16_be_avx+0x186>
+  .byte  233,197,254,255,255                 // jmpq          45dc <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,192,254,255,255                 // jmpq          45dc <_sk_load_rgb_u16_be_avx+0x46>
   .byte  196,193,121,110,92,64,30            // vmovd         0x1e(%r8,%rax,2),%xmm3
   .byte  196,65,97,196,92,64,34,2            // vpinsrw       $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,20                              // jb            45f9 <_sk_load_rgb_u16_be_avx+0x1af>
+  .byte  114,20                              // jb            4745 <_sk_load_rgb_u16_be_avx+0x1af>
   .byte  196,193,121,110,92,64,36            // vmovd         0x24(%r8,%rax,2),%xmm3
   .byte  196,193,97,196,92,64,40,2           // vpinsrw       $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
-  .byte  233,151,254,255,255                 // jmpq          4490 <_sk_load_rgb_u16_be_avx+0x46>
-  .byte  233,146,254,255,255                 // jmpq          4490 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,151,254,255,255                 // jmpq          45dc <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,146,254,255,255                 // jmpq          45dc <_sk_load_rgb_u16_be_avx+0x46>
 
 HIDDEN _sk_store_u16_be_avx
 .globl _sk_store_u16_be_avx
@@ -16238,7 +16459,7 @@ _sk_store_u16_be_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,189,0,0,0,0                // lea           0x0(,%rdi,4),%rax
-  .byte  196,98,125,24,5,28,17,0,0           // vbroadcastss  0x111c(%rip),%ymm8        # 5730 <_sk_callback_avx+0x409>
+  .byte  196,98,125,24,5,48,17,0,0           // vbroadcastss  0x1130(%rip),%ymm8        # 5890 <_sk_callback_avx+0x41d>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,67,125,25,202,1                 // vextractf128  $0x1,%ymm9,%xmm10
@@ -16276,7 +16497,7 @@ _sk_store_u16_be_avx:
   .byte  196,65,17,98,200                    // vpunpckldq    %xmm8,%xmm13,%xmm9
   .byte  196,65,17,106,192                   // vpunpckhdq    %xmm8,%xmm13,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,31                              // jne           46f8 <_sk_store_u16_be_avx+0xfa>
+  .byte  117,31                              // jne           4844 <_sk_store_u16_be_avx+0xfa>
   .byte  196,65,120,17,28,64                 // vmovups       %xmm11,(%r8,%rax,2)
   .byte  196,65,120,17,84,64,16              // vmovups       %xmm10,0x10(%r8,%rax,2)
   .byte  196,65,120,17,76,64,32              // vmovups       %xmm9,0x20(%r8,%rax,2)
@@ -16285,22 +16506,22 @@ _sk_store_u16_be_avx:
   .byte  255,224                             // jmpq          *%rax
   .byte  196,65,121,214,28,64                // vmovq         %xmm11,(%r8,%rax,2)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,240                             // je            46f4 <_sk_store_u16_be_avx+0xf6>
+  .byte  116,240                             // je            4840 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,23,92,64,8               // vmovhpd       %xmm11,0x8(%r8,%rax,2)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,227                             // jb            46f4 <_sk_store_u16_be_avx+0xf6>
+  .byte  114,227                             // jb            4840 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,214,84,64,16             // vmovq         %xmm10,0x10(%r8,%rax,2)
-  .byte  116,218                             // je            46f4 <_sk_store_u16_be_avx+0xf6>
+  .byte  116,218                             // je            4840 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,23,84,64,24              // vmovhpd       %xmm10,0x18(%r8,%rax,2)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,205                             // jb            46f4 <_sk_store_u16_be_avx+0xf6>
+  .byte  114,205                             // jb            4840 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,214,76,64,32             // vmovq         %xmm9,0x20(%r8,%rax,2)
-  .byte  116,196                             // je            46f4 <_sk_store_u16_be_avx+0xf6>
+  .byte  116,196                             // je            4840 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,23,76,64,40              // vmovhpd       %xmm9,0x28(%r8,%rax,2)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,183                             // jb            46f4 <_sk_store_u16_be_avx+0xf6>
+  .byte  114,183                             // jb            4840 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,214,68,64,48             // vmovq         %xmm8,0x30(%r8,%rax,2)
-  .byte  235,174                             // jmp           46f4 <_sk_store_u16_be_avx+0xf6>
+  .byte  235,174                             // jmp           4840 <_sk_store_u16_be_avx+0xf6>
 
 HIDDEN _sk_load_f32_avx
 .globl _sk_load_f32_avx
@@ -16308,10 +16529,10 @@ FUNCTION(_sk_load_f32_avx)
 _sk_load_f32_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  119,110                             // ja            47bc <_sk_load_f32_avx+0x76>
+  .byte  119,110                             // ja            4908 <_sk_load_f32_avx+0x76>
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
-  .byte  76,141,21,132,0,0,0                 // lea           0x84(%rip),%r10        # 47e4 <_sk_load_f32_avx+0x9e>
+  .byte  76,141,21,132,0,0,0                 // lea           0x84(%rip),%r10        # 4930 <_sk_load_f32_avx+0x9e>
   .byte  73,99,4,138                         // movslq        (%r10,%rcx,4),%rax
   .byte  76,1,208                            // add           %r10,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -16370,7 +16591,7 @@ _sk_store_f32_avx:
   .byte  196,65,37,20,196                    // vunpcklpd     %ymm12,%ymm11,%ymm8
   .byte  196,65,37,21,220                    // vunpckhpd     %ymm12,%ymm11,%ymm11
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,55                              // jne           4871 <_sk_store_f32_avx+0x6d>
+  .byte  117,55                              // jne           49bd <_sk_store_f32_avx+0x6d>
   .byte  196,67,45,24,225,1                  // vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
   .byte  196,67,61,24,235,1                  // vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
   .byte  196,67,45,6,201,49                  // vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
@@ -16383,22 +16604,22 @@ _sk_store_f32_avx:
   .byte  255,224                             // jmpq          *%rax
   .byte  196,65,121,17,20,128                // vmovupd       %xmm10,(%r8,%rax,4)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,240                             // je            486d <_sk_store_f32_avx+0x69>
+  .byte  116,240                             // je            49b9 <_sk_store_f32_avx+0x69>
   .byte  196,65,121,17,76,128,16             // vmovupd       %xmm9,0x10(%r8,%rax,4)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,227                             // jb            486d <_sk_store_f32_avx+0x69>
+  .byte  114,227                             // jb            49b9 <_sk_store_f32_avx+0x69>
   .byte  196,65,121,17,68,128,32             // vmovupd       %xmm8,0x20(%r8,%rax,4)
-  .byte  116,218                             // je            486d <_sk_store_f32_avx+0x69>
+  .byte  116,218                             // je            49b9 <_sk_store_f32_avx+0x69>
   .byte  196,65,121,17,92,128,48             // vmovupd       %xmm11,0x30(%r8,%rax,4)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,205                             // jb            486d <_sk_store_f32_avx+0x69>
+  .byte  114,205                             // jb            49b9 <_sk_store_f32_avx+0x69>
   .byte  196,67,125,25,84,128,64,1           // vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
-  .byte  116,195                             // je            486d <_sk_store_f32_avx+0x69>
+  .byte  116,195                             // je            49b9 <_sk_store_f32_avx+0x69>
   .byte  196,67,125,25,76,128,80,1           // vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,181                             // jb            486d <_sk_store_f32_avx+0x69>
+  .byte  114,181                             // jb            49b9 <_sk_store_f32_avx+0x69>
   .byte  196,67,125,25,68,128,96,1           // vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
-  .byte  235,171                             // jmp           486d <_sk_store_f32_avx+0x69>
+  .byte  235,171                             // jmp           49b9 <_sk_store_f32_avx+0x69>
 
 HIDDEN _sk_clamp_x_avx
 .globl _sk_clamp_x_avx
@@ -16534,12 +16755,12 @@ HIDDEN _sk_luminance_to_alpha_avx
 .globl _sk_luminance_to_alpha_avx
 FUNCTION(_sk_luminance_to_alpha_avx)
 _sk_luminance_to_alpha_avx:
-  .byte  196,226,125,24,29,167,12,0,0        // vbroadcastss  0xca7(%rip),%ymm3        # 5734 <_sk_callback_avx+0x40d>
+  .byte  196,226,125,24,29,187,12,0,0        // vbroadcastss  0xcbb(%rip),%ymm3        # 5894 <_sk_callback_avx+0x421>
   .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
-  .byte  196,226,125,24,29,158,12,0,0        // vbroadcastss  0xc9e(%rip),%ymm3        # 5738 <_sk_callback_avx+0x411>
+  .byte  196,226,125,24,29,178,12,0,0        // vbroadcastss  0xcb2(%rip),%ymm3        # 5898 <_sk_callback_avx+0x425>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,145,12,0,0        // vbroadcastss  0xc91(%rip),%ymm1        # 573c <_sk_callback_avx+0x415>
+  .byte  196,226,125,24,13,165,12,0,0        // vbroadcastss  0xca5(%rip),%ymm1        # 589c <_sk_callback_avx+0x429>
   .byte  197,236,89,201                      // vmulps        %ymm1,%ymm2,%ymm1
   .byte  197,252,88,217                      // vaddps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -16717,7 +16938,7 @@ _sk_linear_gradient_avx:
   .byte  196,226,125,24,88,28                // vbroadcastss  0x1c(%rax),%ymm3
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  15,132,146,0,0,0                    // je            4e01 <_sk_linear_gradient_avx+0xb8>
+  .byte  15,132,146,0,0,0                    // je            4f4d <_sk_linear_gradient_avx+0xb8>
   .byte  72,139,64,8                         // mov           0x8(%rax),%rax
   .byte  72,131,192,32                       // add           $0x20,%rax
   .byte  196,65,28,87,228                    // vxorps        %ymm12,%ymm12,%ymm12
@@ -16744,8 +16965,8 @@ _sk_linear_gradient_avx:
   .byte  196,227,13,74,219,208               // vblendvps     %ymm13,%ymm3,%ymm14,%ymm3
   .byte  72,131,192,36                       // add           $0x24,%rax
   .byte  73,255,200                          // dec           %r8
-  .byte  117,140                             // jne           4d8b <_sk_linear_gradient_avx+0x42>
-  .byte  235,20                              // jmp           4e15 <_sk_linear_gradient_avx+0xcc>
+  .byte  117,140                             // jne           4ed7 <_sk_linear_gradient_avx+0x42>
+  .byte  235,20                              // jmp           4f61 <_sk_linear_gradient_avx+0xcc>
   .byte  196,65,36,87,219                    // vxorps        %ymm11,%ymm11,%ymm11
   .byte  196,65,44,87,210                    // vxorps        %ymm10,%ymm10,%ymm10
   .byte  196,65,52,87,201                    // vxorps        %ymm9,%ymm9,%ymm9
@@ -16800,27 +17021,27 @@ _sk_xy_to_polar_unit_avx:
   .byte  196,65,52,95,226                    // vmaxps        %ymm10,%ymm9,%ymm12
   .byte  196,65,36,94,220                    // vdivps        %ymm12,%ymm11,%ymm11
   .byte  196,65,36,89,227                    // vmulps        %ymm11,%ymm11,%ymm12
-  .byte  196,98,125,24,45,118,8,0,0          // vbroadcastss  0x876(%rip),%ymm13        # 5740 <_sk_callback_avx+0x419>
+  .byte  196,98,125,24,45,138,8,0,0          // vbroadcastss  0x88a(%rip),%ymm13        # 58a0 <_sk_callback_avx+0x42d>
   .byte  196,65,28,89,237                    // vmulps        %ymm13,%ymm12,%ymm13
-  .byte  196,98,125,24,53,108,8,0,0          // vbroadcastss  0x86c(%rip),%ymm14        # 5744 <_sk_callback_avx+0x41d>
+  .byte  196,98,125,24,53,128,8,0,0          // vbroadcastss  0x880(%rip),%ymm14        # 58a4 <_sk_callback_avx+0x431>
   .byte  196,65,20,88,238                    // vaddps        %ymm14,%ymm13,%ymm13
   .byte  196,65,28,89,237                    // vmulps        %ymm13,%ymm12,%ymm13
-  .byte  196,98,125,24,53,93,8,0,0           // vbroadcastss  0x85d(%rip),%ymm14        # 5748 <_sk_callback_avx+0x421>
+  .byte  196,98,125,24,53,113,8,0,0          // vbroadcastss  0x871(%rip),%ymm14        # 58a8 <_sk_callback_avx+0x435>
   .byte  196,65,20,88,238                    // vaddps        %ymm14,%ymm13,%ymm13
   .byte  196,65,28,89,229                    // vmulps        %ymm13,%ymm12,%ymm12
-  .byte  196,98,125,24,45,78,8,0,0           // vbroadcastss  0x84e(%rip),%ymm13        # 574c <_sk_callback_avx+0x425>
+  .byte  196,98,125,24,45,98,8,0,0           // vbroadcastss  0x862(%rip),%ymm13        # 58ac <_sk_callback_avx+0x439>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
   .byte  196,65,52,194,202,1                 // vcmpltps      %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,57,8,0,0           // vbroadcastss  0x839(%rip),%ymm10        # 5750 <_sk_callback_avx+0x429>
+  .byte  196,98,125,24,21,77,8,0,0           // vbroadcastss  0x84d(%rip),%ymm10        # 58b0 <_sk_callback_avx+0x43d>
   .byte  196,65,44,92,211                    // vsubps        %ymm11,%ymm10,%ymm10
   .byte  196,67,37,74,202,144                // vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   .byte  196,193,124,194,192,1               // vcmpltps      %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,21,35,8,0,0           // vbroadcastss  0x823(%rip),%ymm10        # 5754 <_sk_callback_avx+0x42d>
+  .byte  196,98,125,24,21,55,8,0,0           // vbroadcastss  0x837(%rip),%ymm10        # 58b4 <_sk_callback_avx+0x441>
   .byte  196,65,44,92,209                    // vsubps        %ymm9,%ymm10,%ymm10
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  196,65,116,194,200,1                // vcmpltps      %ymm8,%ymm1,%ymm9
-  .byte  196,98,125,24,21,13,8,0,0           // vbroadcastss  0x80d(%rip),%ymm10        # 5758 <_sk_callback_avx+0x431>
+  .byte  196,98,125,24,21,33,8,0,0           // vbroadcastss  0x821(%rip),%ymm10        # 58b8 <_sk_callback_avx+0x445>
   .byte  197,44,92,208                       // vsubps        %ymm0,%ymm10,%ymm10
   .byte  196,195,125,74,194,144              // vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   .byte  196,65,124,194,200,3                // vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -16833,7 +17054,7 @@ HIDDEN _sk_save_xy_avx
 FUNCTION(_sk_save_xy_avx)
 _sk_save_xy_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,236,7,0,0           // vbroadcastss  0x7ec(%rip),%ymm8        # 575c <_sk_callback_avx+0x435>
+  .byte  196,98,125,24,5,0,8,0,0             // vbroadcastss  0x800(%rip),%ymm8        # 58bc <_sk_callback_avx+0x449>
   .byte  196,65,124,88,200                   // vaddps        %ymm8,%ymm0,%ymm9
   .byte  196,67,125,8,209,1                  // vroundps      $0x1,%ymm9,%ymm10
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
@@ -16870,9 +17091,9 @@ HIDDEN _sk_bilinear_nx_avx
 FUNCTION(_sk_bilinear_nx_avx)
 _sk_bilinear_nx_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,120,7,0,0          // vbroadcastss  0x778(%rip),%ymm0        # 5760 <_sk_callback_avx+0x439>
+  .byte  196,226,125,24,5,140,7,0,0          // vbroadcastss  0x78c(%rip),%ymm0        # 58c0 <_sk_callback_avx+0x44d>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,111,7,0,0           // vbroadcastss  0x76f(%rip),%ymm8        # 5764 <_sk_callback_avx+0x43d>
+  .byte  196,98,125,24,5,131,7,0,0           // vbroadcastss  0x783(%rip),%ymm8        # 58c4 <_sk_callback_avx+0x451>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -16883,7 +17104,7 @@ HIDDEN _sk_bilinear_px_avx
 FUNCTION(_sk_bilinear_px_avx)
 _sk_bilinear_px_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,87,7,0,0           // vbroadcastss  0x757(%rip),%ymm0        # 5768 <_sk_callback_avx+0x441>
+  .byte  196,226,125,24,5,107,7,0,0          // vbroadcastss  0x76b(%rip),%ymm0        # 58c8 <_sk_callback_avx+0x455>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -16895,9 +17116,9 @@ HIDDEN _sk_bilinear_ny_avx
 FUNCTION(_sk_bilinear_ny_avx)
 _sk_bilinear_ny_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,59,7,0,0          // vbroadcastss  0x73b(%rip),%ymm1        # 576c <_sk_callback_avx+0x445>
+  .byte  196,226,125,24,13,79,7,0,0          // vbroadcastss  0x74f(%rip),%ymm1        # 58cc <_sk_callback_avx+0x459>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,49,7,0,0            // vbroadcastss  0x731(%rip),%ymm8        # 5770 <_sk_callback_avx+0x449>
+  .byte  196,98,125,24,5,69,7,0,0            // vbroadcastss  0x745(%rip),%ymm8        # 58d0 <_sk_callback_avx+0x45d>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -16908,7 +17129,7 @@ HIDDEN _sk_bilinear_py_avx
 FUNCTION(_sk_bilinear_py_avx)
 _sk_bilinear_py_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,25,7,0,0          // vbroadcastss  0x719(%rip),%ymm1        # 5774 <_sk_callback_avx+0x44d>
+  .byte  196,226,125,24,13,45,7,0,0          // vbroadcastss  0x72d(%rip),%ymm1        # 58d4 <_sk_callback_avx+0x461>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -16920,14 +17141,14 @@ HIDDEN _sk_bicubic_n3x_avx
 FUNCTION(_sk_bicubic_n3x_avx)
 _sk_bicubic_n3x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,252,6,0,0          // vbroadcastss  0x6fc(%rip),%ymm0        # 5778 <_sk_callback_avx+0x451>
+  .byte  196,226,125,24,5,16,7,0,0           // vbroadcastss  0x710(%rip),%ymm0        # 58d8 <_sk_callback_avx+0x465>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,243,6,0,0           // vbroadcastss  0x6f3(%rip),%ymm8        # 577c <_sk_callback_avx+0x455>
+  .byte  196,98,125,24,5,7,7,0,0             // vbroadcastss  0x707(%rip),%ymm8        # 58dc <_sk_callback_avx+0x469>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,228,6,0,0          // vbroadcastss  0x6e4(%rip),%ymm10        # 5780 <_sk_callback_avx+0x459>
+  .byte  196,98,125,24,21,248,6,0,0          // vbroadcastss  0x6f8(%rip),%ymm10        # 58e0 <_sk_callback_avx+0x46d>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,218,6,0,0          // vbroadcastss  0x6da(%rip),%ymm10        # 5784 <_sk_callback_avx+0x45d>
+  .byte  196,98,125,24,21,238,6,0,0          // vbroadcastss  0x6ee(%rip),%ymm10        # 58e4 <_sk_callback_avx+0x471>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -16939,19 +17160,19 @@ HIDDEN _sk_bicubic_n1x_avx
 FUNCTION(_sk_bicubic_n1x_avx)
 _sk_bicubic_n1x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,189,6,0,0          // vbroadcastss  0x6bd(%rip),%ymm0        # 5788 <_sk_callback_avx+0x461>
+  .byte  196,226,125,24,5,209,6,0,0          // vbroadcastss  0x6d1(%rip),%ymm0        # 58e8 <_sk_callback_avx+0x475>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,180,6,0,0           // vbroadcastss  0x6b4(%rip),%ymm8        # 578c <_sk_callback_avx+0x465>
+  .byte  196,98,125,24,5,200,6,0,0           // vbroadcastss  0x6c8(%rip),%ymm8        # 58ec <_sk_callback_avx+0x479>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,170,6,0,0          // vbroadcastss  0x6aa(%rip),%ymm9        # 5790 <_sk_callback_avx+0x469>
+  .byte  196,98,125,24,13,190,6,0,0          // vbroadcastss  0x6be(%rip),%ymm9        # 58f0 <_sk_callback_avx+0x47d>
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,160,6,0,0          // vbroadcastss  0x6a0(%rip),%ymm10        # 5794 <_sk_callback_avx+0x46d>
+  .byte  196,98,125,24,21,180,6,0,0          // vbroadcastss  0x6b4(%rip),%ymm10        # 58f4 <_sk_callback_avx+0x481>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,145,6,0,0          // vbroadcastss  0x691(%rip),%ymm10        # 5798 <_sk_callback_avx+0x471>
+  .byte  196,98,125,24,21,165,6,0,0          // vbroadcastss  0x6a5(%rip),%ymm10        # 58f8 <_sk_callback_avx+0x485>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,130,6,0,0          // vbroadcastss  0x682(%rip),%ymm9        # 579c <_sk_callback_avx+0x475>
+  .byte  196,98,125,24,13,150,6,0,0          // vbroadcastss  0x696(%rip),%ymm9        # 58fc <_sk_callback_avx+0x489>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -16962,17 +17183,17 @@ HIDDEN _sk_bicubic_p1x_avx
 FUNCTION(_sk_bicubic_p1x_avx)
 _sk_bicubic_p1x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,106,6,0,0           // vbroadcastss  0x66a(%rip),%ymm8        # 57a0 <_sk_callback_avx+0x479>
+  .byte  196,98,125,24,5,126,6,0,0           // vbroadcastss  0x67e(%rip),%ymm8        # 5900 <_sk_callback_avx+0x48d>
   .byte  197,188,88,0                        // vaddps        (%rax),%ymm8,%ymm0
   .byte  197,124,16,72,64                    // vmovups       0x40(%rax),%ymm9
-  .byte  196,98,125,24,21,92,6,0,0           // vbroadcastss  0x65c(%rip),%ymm10        # 57a4 <_sk_callback_avx+0x47d>
+  .byte  196,98,125,24,21,112,6,0,0          // vbroadcastss  0x670(%rip),%ymm10        # 5904 <_sk_callback_avx+0x491>
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
-  .byte  196,98,125,24,29,82,6,0,0           // vbroadcastss  0x652(%rip),%ymm11        # 57a8 <_sk_callback_avx+0x481>
+  .byte  196,98,125,24,29,102,6,0,0          // vbroadcastss  0x666(%rip),%ymm11        # 5908 <_sk_callback_avx+0x495>
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
   .byte  196,65,44,88,192                    // vaddps        %ymm8,%ymm10,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
-  .byte  196,98,125,24,13,57,6,0,0           // vbroadcastss  0x639(%rip),%ymm9        # 57ac <_sk_callback_avx+0x485>
+  .byte  196,98,125,24,13,77,6,0,0           // vbroadcastss  0x64d(%rip),%ymm9        # 590c <_sk_callback_avx+0x499>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -16983,13 +17204,13 @@ HIDDEN _sk_bicubic_p3x_avx
 FUNCTION(_sk_bicubic_p3x_avx)
 _sk_bicubic_p3x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,33,6,0,0           // vbroadcastss  0x621(%rip),%ymm0        # 57b0 <_sk_callback_avx+0x489>
+  .byte  196,226,125,24,5,53,6,0,0           // vbroadcastss  0x635(%rip),%ymm0        # 5910 <_sk_callback_avx+0x49d>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,14,6,0,0           // vbroadcastss  0x60e(%rip),%ymm10        # 57b4 <_sk_callback_avx+0x48d>
+  .byte  196,98,125,24,21,34,6,0,0           // vbroadcastss  0x622(%rip),%ymm10        # 5914 <_sk_callback_avx+0x4a1>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,4,6,0,0            // vbroadcastss  0x604(%rip),%ymm10        # 57b8 <_sk_callback_avx+0x491>
+  .byte  196,98,125,24,21,24,6,0,0           // vbroadcastss  0x618(%rip),%ymm10        # 5918 <_sk_callback_avx+0x4a5>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -17001,14 +17222,14 @@ HIDDEN _sk_bicubic_n3y_avx
 FUNCTION(_sk_bicubic_n3y_avx)
 _sk_bicubic_n3y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,231,5,0,0         // vbroadcastss  0x5e7(%rip),%ymm1        # 57bc <_sk_callback_avx+0x495>
+  .byte  196,226,125,24,13,251,5,0,0         // vbroadcastss  0x5fb(%rip),%ymm1        # 591c <_sk_callback_avx+0x4a9>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,221,5,0,0           // vbroadcastss  0x5dd(%rip),%ymm8        # 57c0 <_sk_callback_avx+0x499>
+  .byte  196,98,125,24,5,241,5,0,0           // vbroadcastss  0x5f1(%rip),%ymm8        # 5920 <_sk_callback_avx+0x4ad>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,206,5,0,0          // vbroadcastss  0x5ce(%rip),%ymm10        # 57c4 <_sk_callback_avx+0x49d>
+  .byte  196,98,125,24,21,226,5,0,0          // vbroadcastss  0x5e2(%rip),%ymm10        # 5924 <_sk_callback_avx+0x4b1>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,196,5,0,0          // vbroadcastss  0x5c4(%rip),%ymm10        # 57c8 <_sk_callback_avx+0x4a1>
+  .byte  196,98,125,24,21,216,5,0,0          // vbroadcastss  0x5d8(%rip),%ymm10        # 5928 <_sk_callback_avx+0x4b5>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -17020,19 +17241,19 @@ HIDDEN _sk_bicubic_n1y_avx
 FUNCTION(_sk_bicubic_n1y_avx)
 _sk_bicubic_n1y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,167,5,0,0         // vbroadcastss  0x5a7(%rip),%ymm1        # 57cc <_sk_callback_avx+0x4a5>
+  .byte  196,226,125,24,13,187,5,0,0         // vbroadcastss  0x5bb(%rip),%ymm1        # 592c <_sk_callback_avx+0x4b9>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,157,5,0,0           // vbroadcastss  0x59d(%rip),%ymm8        # 57d0 <_sk_callback_avx+0x4a9>
+  .byte  196,98,125,24,5,177,5,0,0           // vbroadcastss  0x5b1(%rip),%ymm8        # 5930 <_sk_callback_avx+0x4bd>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,147,5,0,0          // vbroadcastss  0x593(%rip),%ymm9        # 57d4 <_sk_callback_avx+0x4ad>
+  .byte  196,98,125,24,13,167,5,0,0          // vbroadcastss  0x5a7(%rip),%ymm9        # 5934 <_sk_callback_avx+0x4c1>
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,137,5,0,0          // vbroadcastss  0x589(%rip),%ymm10        # 57d8 <_sk_callback_avx+0x4b1>
+  .byte  196,98,125,24,21,157,5,0,0          // vbroadcastss  0x59d(%rip),%ymm10        # 5938 <_sk_callback_avx+0x4c5>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,122,5,0,0          // vbroadcastss  0x57a(%rip),%ymm10        # 57dc <_sk_callback_avx+0x4b5>
+  .byte  196,98,125,24,21,142,5,0,0          // vbroadcastss  0x58e(%rip),%ymm10        # 593c <_sk_callback_avx+0x4c9>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,107,5,0,0          // vbroadcastss  0x56b(%rip),%ymm9        # 57e0 <_sk_callback_avx+0x4b9>
+  .byte  196,98,125,24,13,127,5,0,0          // vbroadcastss  0x57f(%rip),%ymm9        # 5940 <_sk_callback_avx+0x4cd>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -17043,17 +17264,17 @@ HIDDEN _sk_bicubic_p1y_avx
 FUNCTION(_sk_bicubic_p1y_avx)
 _sk_bicubic_p1y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,83,5,0,0            // vbroadcastss  0x553(%rip),%ymm8        # 57e4 <_sk_callback_avx+0x4bd>
+  .byte  196,98,125,24,5,103,5,0,0           // vbroadcastss  0x567(%rip),%ymm8        # 5944 <_sk_callback_avx+0x4d1>
   .byte  197,188,88,72,32                    // vaddps        0x20(%rax),%ymm8,%ymm1
   .byte  197,124,16,72,96                    // vmovups       0x60(%rax),%ymm9
-  .byte  196,98,125,24,21,68,5,0,0           // vbroadcastss  0x544(%rip),%ymm10        # 57e8 <_sk_callback_avx+0x4c1>
+  .byte  196,98,125,24,21,88,5,0,0           // vbroadcastss  0x558(%rip),%ymm10        # 5948 <_sk_callback_avx+0x4d5>
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
-  .byte  196,98,125,24,29,58,5,0,0           // vbroadcastss  0x53a(%rip),%ymm11        # 57ec <_sk_callback_avx+0x4c5>
+  .byte  196,98,125,24,29,78,5,0,0           // vbroadcastss  0x54e(%rip),%ymm11        # 594c <_sk_callback_avx+0x4d9>
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
   .byte  196,65,44,88,192                    // vaddps        %ymm8,%ymm10,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
-  .byte  196,98,125,24,13,33,5,0,0           // vbroadcastss  0x521(%rip),%ymm9        # 57f0 <_sk_callback_avx+0x4c9>
+  .byte  196,98,125,24,13,53,5,0,0           // vbroadcastss  0x535(%rip),%ymm9        # 5950 <_sk_callback_avx+0x4dd>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -17064,13 +17285,13 @@ HIDDEN _sk_bicubic_p3y_avx
 FUNCTION(_sk_bicubic_p3y_avx)
 _sk_bicubic_p3y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,9,5,0,0           // vbroadcastss  0x509(%rip),%ymm1        # 57f4 <_sk_callback_avx+0x4cd>
+  .byte  196,226,125,24,13,29,5,0,0          // vbroadcastss  0x51d(%rip),%ymm1        # 5954 <_sk_callback_avx+0x4e1>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,245,4,0,0          // vbroadcastss  0x4f5(%rip),%ymm10        # 57f8 <_sk_callback_avx+0x4d1>
+  .byte  196,98,125,24,21,9,5,0,0            // vbroadcastss  0x509(%rip),%ymm10        # 5958 <_sk_callback_avx+0x4e5>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,235,4,0,0          // vbroadcastss  0x4eb(%rip),%ymm10        # 57fc <_sk_callback_avx+0x4d5>
+  .byte  196,98,125,24,21,255,4,0,0          // vbroadcastss  0x4ff(%rip),%ymm10        # 595c <_sk_callback_avx+0x4e9>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -17157,9 +17378,17 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  128,63,0                            // cmpb          $0x0,(%rdi)
-  .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
-  .byte  63                                  // (bad)
+  .byte  128,63,1                            // cmpb          $0x1,(%rdi)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,4,0                               // add           %al,(%rax,%rax,1)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  2,0                                 // add           (%rax),%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  33,8                                // and           %ecx,(%rax)
+  .byte  130                                 // (bad)
+  .byte  60,0                                // cmp           $0x0,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  191,0,0,128,63                      // mov           $0x3f800000,%edi
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -17181,11 +17410,13 @@ BALIGN4
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
   .byte  63                                  // (bad)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  128,191,0,0,224,64,0                // cmpb          $0x0,0x40e00000(%rdi)
+  .byte  128,63,0                            // cmpb          $0x0,(%rdi)
+  .byte  0,128,191,0,0,224                   // add           %al,-0x1fffff41(%rax)
+  .byte  64,0,0                              // add           %al,(%rax)
+  .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
   .byte  63                                  // (bad)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  128,63,145                          // cmpb          $0x91,(%rdi)
+  .byte  145                                 // xchg          %eax,%ecx
   .byte  131,158,61,154,153,153,62           // sbbl          $0x3e,-0x666665c3(%rsi)
   .byte  92                                  // pop           %rsp
   .byte  143                                 // (bad)
@@ -17229,7 +17460,7 @@ BALIGN4
   .byte  190,129,128,128,59                  // mov           $0x3b808081,%esi
   .byte  129,128,128,59,0,248,0,0,8,33       // addl          $0x21080000,-0x7ffc480(%rax)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        5525 <.literal4+0xd9>
+  .byte  224,7                               // loopne        5685 <.literal4+0xed>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -17243,10 +17474,10 @@ BALIGN4
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
   .byte  0,52,255                            // add           %dh,(%rdi,%rdi,8)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            5550 <.literal4+0x104>
+  .byte  127,0                               // jg            56b0 <.literal4+0x118>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            55c9 <.literal4+0x17d>
+  .byte  119,115                             // ja            5729 <.literal4+0x191>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -17260,10 +17491,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            5584 <.literal4+0x138>
+  .byte  127,0                               // jg            56e4 <.literal4+0x14c>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            55fd <.literal4+0x1b1>
+  .byte  119,115                             // ja            575d <.literal4+0x1c5>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -17277,10 +17508,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            55b8 <.literal4+0x16c>
+  .byte  127,0                               // jg            5718 <.literal4+0x180>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            5631 <.literal4+0x1e5>
+  .byte  119,115                             // ja            5791 <.literal4+0x1f9>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -17294,10 +17525,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            55ec <.literal4+0x1a0>
+  .byte  127,0                               // jg            574c <.literal4+0x1b4>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            5665 <.literal4+0x219>
+  .byte  119,115                             // ja            57c5 <.literal4+0x22d>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -17310,7 +17541,7 @@ BALIGN4
   .byte  0,75,0                              // add           %cl,0x0(%rbx)
   .byte  0,128,63,0,0,200                    // add           %al,-0x37ffffc1(%rax)
   .byte  66,0,0                              // rex.X         add %al,(%rax)
-  .byte  127,67                              // jg            5663 <.literal4+0x217>
+  .byte  127,67                              // jg            57c3 <.literal4+0x22b>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,195                               // add           %al,%bl
   .byte  0,0                                 // add           %al,(%rax)
@@ -17322,10 +17553,10 @@ BALIGN4
   .byte  190,80,128,3,62                     // mov           $0x3e038050,%esi
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           5683 <.literal4+0x237>
+  .byte  118,63                              // jbe           57e3 <.literal4+0x24b>
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            5697 <.literal4+0x24b>
+  .byte  127,67                              // jg            57f7 <.literal4+0x25f>
   .byte  129,128,128,59,0,0,128,63,129,128   // addl          $0x80813f80,0x3b80(%rax)
   .byte  128,59,0                            // cmpb          $0x0,(%rbx)
   .byte  0,128,63,129,128,128                // add           %al,-0x7f7f7ec1(%rax)
@@ -17334,7 +17565,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        5679 <.literal4+0x22d>
+  .byte  224,7                               // loopne        57d9 <.literal4+0x241>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -17346,7 +17577,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        5695 <.literal4+0x249>
+  .byte  224,7                               // loopne        57f5 <.literal4+0x25d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -17357,7 +17588,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  248                                 // clc
   .byte  65,0,0                              // add           %al,(%r8)
-  .byte  124,66                              // jl            56ea <.literal4+0x29e>
+  .byte  124,66                              // jl            584a <.literal4+0x2b2>
   .byte  0,240                               // add           %dh,%al
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,55,0,15                 // mov           %ecx,0xf003788(%rax)
@@ -17375,9 +17606,9 @@ BALIGN4
   .byte  137,136,136,59,15,0                 // mov           %ecx,0xf3b88(%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,61,0,0                  // mov           %ecx,0x3d88(%rax)
-  .byte  112,65                              // jo            572d <.literal4+0x2e1>
+  .byte  112,65                              // jo            588d <.literal4+0x2f5>
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            573b <.literal4+0x2ef>
+  .byte  127,67                              // jg            589b <.literal4+0x303>
   .byte  0,128,0,0,0,0                       // add           %al,0x0(%rax)
   .byte  0,128,0,4,0,128                     // add           %al,-0x7ffffc00(%rax)
   .byte  0,0                                 // add           %al,(%rax)
@@ -17393,7 +17624,7 @@ BALIGN4
   .byte  0,128,55,0,0,128                    // add           %al,-0x7fffffc9(%rax)
   .byte  63                                  // (bad)
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            577b <.literal4+0x32f>
+  .byte  127,71                              // jg            58db <.literal4+0x343>
   .byte  208                                 // (bad)
   .byte  179,89                              // mov           $0x59,%bl
   .byte  62,89                               // ds            pop %rcx
@@ -17620,7 +17851,7 @@ _sk_seed_shader_sse41:
   .byte  102,15,110,199                      // movd          %edi,%xmm0
   .byte  102,15,112,192,0                    // pshufd        $0x0,%xmm0,%xmm0
   .byte  15,91,200                           // cvtdq2ps      %xmm0,%xmm1
-  .byte  15,40,21,100,57,0,0                 // movaps        0x3964(%rip),%xmm2        # 39e0 <_sk_callback_sse41+0xe3>
+  .byte  15,40,21,68,58,0,0                  // movaps        0x3a44(%rip),%xmm2        # 3ac0 <_sk_callback_sse41+0xdc>
   .byte  15,88,202                           // addps         %xmm2,%xmm1
   .byte  15,16,2                             // movups        (%rdx),%xmm0
   .byte  15,88,193                           // addps         %xmm1,%xmm0
@@ -17629,7 +17860,7 @@ _sk_seed_shader_sse41:
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  15,88,202                           // addps         %xmm2,%xmm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,21,83,57,0,0                  // movaps        0x3953(%rip),%xmm2        # 39f0 <_sk_callback_sse41+0xf3>
+  .byte  15,40,21,51,58,0,0                  // movaps        0x3a33(%rip),%xmm2        # 3ad0 <_sk_callback_sse41+0xec>
   .byte  15,87,219                           // xorps         %xmm3,%xmm3
   .byte  15,87,228                           // xorps         %xmm4,%xmm4
   .byte  15,87,237                           // xorps         %xmm5,%xmm5
@@ -17637,6 +17868,56 @@ _sk_seed_shader_sse41:
   .byte  15,87,255                           // xorps         %xmm7,%xmm7
   .byte  255,224                             // jmpq          *%rax
 
+HIDDEN _sk_dither_sse41
+.globl _sk_dither_sse41
+FUNCTION(_sk_dither_sse41)
+_sk_dither_sse41:
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  102,68,15,110,199                   // movd          %edi,%xmm8
+  .byte  102,69,15,112,192,0                 // pshufd        $0x0,%xmm8,%xmm8
+  .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
+  .byte  68,15,16,10                         // movups        (%rdx),%xmm9
+  .byte  69,15,88,200                        // addps         %xmm8,%xmm9
+  .byte  243,69,15,91,201                    // cvttps2dq     %xmm9,%xmm9
+  .byte  72,139,8                            // mov           (%rax),%rcx
+  .byte  102,68,15,110,1                     // movd          (%rcx),%xmm8
+  .byte  102,69,15,112,192,0                 // pshufd        $0x0,%xmm8,%xmm8
+  .byte  102,69,15,239,193                   // pxor          %xmm9,%xmm8
+  .byte  102,68,15,111,21,248,57,0,0         // movdqa        0x39f8(%rip),%xmm10        # 3ae0 <_sk_callback_sse41+0xfc>
+  .byte  102,69,15,111,216                   // movdqa        %xmm8,%xmm11
+  .byte  102,69,15,219,218                   // pand          %xmm10,%xmm11
+  .byte  102,65,15,114,243,5                 // pslld         $0x5,%xmm11
+  .byte  102,69,15,219,209                   // pand          %xmm9,%xmm10
+  .byte  102,65,15,114,242,4                 // pslld         $0x4,%xmm10
+  .byte  102,68,15,111,37,228,57,0,0         // movdqa        0x39e4(%rip),%xmm12        # 3af0 <_sk_callback_sse41+0x10c>
+  .byte  102,68,15,111,45,235,57,0,0         // movdqa        0x39eb(%rip),%xmm13        # 3b00 <_sk_callback_sse41+0x11c>
+  .byte  102,69,15,111,240                   // movdqa        %xmm8,%xmm14
+  .byte  102,69,15,219,245                   // pand          %xmm13,%xmm14
+  .byte  102,65,15,114,246,2                 // pslld         $0x2,%xmm14
+  .byte  102,69,15,219,233                   // pand          %xmm9,%xmm13
+  .byte  102,69,15,254,237                   // paddd         %xmm13,%xmm13
+  .byte  102,69,15,219,196                   // pand          %xmm12,%xmm8
+  .byte  102,65,15,114,208,1                 // psrld         $0x1,%xmm8
+  .byte  102,69,15,219,204                   // pand          %xmm12,%xmm9
+  .byte  102,65,15,114,209,2                 // psrld         $0x2,%xmm9
+  .byte  102,69,15,235,234                   // por           %xmm10,%xmm13
+  .byte  102,69,15,235,233                   // por           %xmm9,%xmm13
+  .byte  102,69,15,235,243                   // por           %xmm11,%xmm14
+  .byte  102,69,15,235,245                   // por           %xmm13,%xmm14
+  .byte  102,69,15,235,240                   // por           %xmm8,%xmm14
+  .byte  69,15,91,198                        // cvtdq2ps      %xmm14,%xmm8
+  .byte  68,15,89,5,166,57,0,0               // mulps         0x39a6(%rip),%xmm8        # 3b10 <_sk_callback_sse41+0x12c>
+  .byte  68,15,88,5,174,57,0,0               // addps         0x39ae(%rip),%xmm8        # 3b20 <_sk_callback_sse41+0x13c>
+  .byte  243,68,15,16,72,8                   // movss         0x8(%rax),%xmm9
+  .byte  69,15,198,201,0                     // shufps        $0x0,%xmm9,%xmm9
+  .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
+  .byte  68,15,89,203                        // mulps         %xmm3,%xmm9
+  .byte  65,15,88,193                        // addps         %xmm9,%xmm0
+  .byte  65,15,88,201                        // addps         %xmm9,%xmm1
+  .byte  65,15,88,209                        // addps         %xmm9,%xmm2
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  255,224                             // jmpq          *%rax
+
 HIDDEN _sk_constant_color_sse41
 .globl _sk_constant_color_sse41
 FUNCTION(_sk_constant_color_sse41)
@@ -17669,7 +17950,7 @@ HIDDEN _sk_srcatop_sse41
 FUNCTION(_sk_srcatop_sse41)
 _sk_srcatop_sse41:
   .byte  15,89,199                           // mulps         %xmm7,%xmm0
-  .byte  68,15,40,5,14,57,0,0                // movaps        0x390e(%rip),%xmm8        # 3a00 <_sk_callback_sse41+0x103>
+  .byte  68,15,40,5,87,57,0,0                // movaps        0x3957(%rip),%xmm8        # 3b30 <_sk_callback_sse41+0x14c>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,89,204                        // mulps         %xmm4,%xmm9
@@ -17694,7 +17975,7 @@ FUNCTION(_sk_dstatop_sse41)
 _sk_dstatop_sse41:
   .byte  68,15,40,195                        // movaps        %xmm3,%xmm8
   .byte  68,15,89,196                        // mulps         %xmm4,%xmm8
-  .byte  68,15,40,13,209,56,0,0              // movaps        0x38d1(%rip),%xmm9        # 3a10 <_sk_callback_sse41+0x113>
+  .byte  68,15,40,13,26,57,0,0               // movaps        0x391a(%rip),%xmm9        # 3b40 <_sk_callback_sse41+0x15c>
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  65,15,88,192                        // addps         %xmm8,%xmm0
@@ -17741,7 +18022,7 @@ HIDDEN _sk_srcout_sse41
 .globl _sk_srcout_sse41
 FUNCTION(_sk_srcout_sse41)
 _sk_srcout_sse41:
-  .byte  68,15,40,5,117,56,0,0               // movaps        0x3875(%rip),%xmm8        # 3a20 <_sk_callback_sse41+0x123>
+  .byte  68,15,40,5,190,56,0,0               // movaps        0x38be(%rip),%xmm8        # 3b50 <_sk_callback_sse41+0x16c>
   .byte  68,15,92,199                        // subps         %xmm7,%xmm8
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
@@ -17754,7 +18035,7 @@ HIDDEN _sk_dstout_sse41
 .globl _sk_dstout_sse41
 FUNCTION(_sk_dstout_sse41)
 _sk_dstout_sse41:
-  .byte  68,15,40,5,101,56,0,0               // movaps        0x3865(%rip),%xmm8        # 3a30 <_sk_callback_sse41+0x133>
+  .byte  68,15,40,5,174,56,0,0               // movaps        0x38ae(%rip),%xmm8        # 3b60 <_sk_callback_sse41+0x17c>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  15,89,196                           // mulps         %xmm4,%xmm0
@@ -17771,7 +18052,7 @@ HIDDEN _sk_srcover_sse41
 .globl _sk_srcover_sse41
 FUNCTION(_sk_srcover_sse41)
 _sk_srcover_sse41:
-  .byte  68,15,40,5,72,56,0,0                // movaps        0x3848(%rip),%xmm8        # 3a40 <_sk_callback_sse41+0x143>
+  .byte  68,15,40,5,145,56,0,0               // movaps        0x3891(%rip),%xmm8        # 3b70 <_sk_callback_sse41+0x18c>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,89,204                        // mulps         %xmm4,%xmm9
@@ -17791,7 +18072,7 @@ HIDDEN _sk_dstover_sse41
 .globl _sk_dstover_sse41
 FUNCTION(_sk_dstover_sse41)
 _sk_dstover_sse41:
-  .byte  68,15,40,5,28,56,0,0                // movaps        0x381c(%rip),%xmm8        # 3a50 <_sk_callback_sse41+0x153>
+  .byte  68,15,40,5,101,56,0,0               // movaps        0x3865(%rip),%xmm8        # 3b80 <_sk_callback_sse41+0x19c>
   .byte  68,15,92,199                        // subps         %xmm7,%xmm8
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -17819,7 +18100,7 @@ HIDDEN _sk_multiply_sse41
 .globl _sk_multiply_sse41
 FUNCTION(_sk_multiply_sse41)
 _sk_multiply_sse41:
-  .byte  68,15,40,5,240,55,0,0               // movaps        0x37f0(%rip),%xmm8        # 3a60 <_sk_callback_sse41+0x163>
+  .byte  68,15,40,5,57,56,0,0                // movaps        0x3839(%rip),%xmm8        # 3b90 <_sk_callback_sse41+0x1ac>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  69,15,40,209                        // movaps        %xmm9,%xmm10
@@ -17895,7 +18176,7 @@ HIDDEN _sk_xor__sse41
 FUNCTION(_sk_xor__sse41)
 _sk_xor__sse41:
   .byte  68,15,40,195                        // movaps        %xmm3,%xmm8
-  .byte  15,40,29,33,55,0,0                  // movaps        0x3721(%rip),%xmm3        # 3a70 <_sk_callback_sse41+0x173>
+  .byte  15,40,29,106,55,0,0                 // movaps        0x376a(%rip),%xmm3        # 3ba0 <_sk_callback_sse41+0x1bc>
   .byte  68,15,40,203                        // movaps        %xmm3,%xmm9
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
@@ -17943,7 +18224,7 @@ _sk_darken_sse41:
   .byte  68,15,89,206                        // mulps         %xmm6,%xmm9
   .byte  65,15,95,209                        // maxps         %xmm9,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,140,54,0,0                 // movaps        0x368c(%rip),%xmm2        # 3a80 <_sk_callback_sse41+0x183>
+  .byte  15,40,21,213,54,0,0                 // movaps        0x36d5(%rip),%xmm2        # 3bb0 <_sk_callback_sse41+0x1cc>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -17977,7 +18258,7 @@ _sk_lighten_sse41:
   .byte  68,15,89,206                        // mulps         %xmm6,%xmm9
   .byte  65,15,93,209                        // minps         %xmm9,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,49,54,0,0                  // movaps        0x3631(%rip),%xmm2        # 3a90 <_sk_callback_sse41+0x193>
+  .byte  15,40,21,122,54,0,0                 // movaps        0x367a(%rip),%xmm2        # 3bc0 <_sk_callback_sse41+0x1dc>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -18014,7 +18295,7 @@ _sk_difference_sse41:
   .byte  65,15,93,209                        // minps         %xmm9,%xmm2
   .byte  15,88,210                           // addps         %xmm2,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,203,53,0,0                 // movaps        0x35cb(%rip),%xmm2        # 3aa0 <_sk_callback_sse41+0x1a3>
+  .byte  15,40,21,20,54,0,0                  // movaps        0x3614(%rip),%xmm2        # 3bd0 <_sk_callback_sse41+0x1ec>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -18041,7 +18322,7 @@ _sk_exclusion_sse41:
   .byte  15,89,214                           // mulps         %xmm6,%xmm2
   .byte  15,88,210                           // addps         %xmm2,%xmm2
   .byte  68,15,92,202                        // subps         %xmm2,%xmm9
-  .byte  15,40,13,140,53,0,0                 // movaps        0x358c(%rip),%xmm1        # 3ab0 <_sk_callback_sse41+0x1b3>
+  .byte  15,40,13,213,53,0,0                 // movaps        0x35d5(%rip),%xmm1        # 3be0 <_sk_callback_sse41+0x1fc>
   .byte  15,92,203                           // subps         %xmm3,%xmm1
   .byte  15,89,207                           // mulps         %xmm7,%xmm1
   .byte  15,88,217                           // addps         %xmm1,%xmm3
@@ -18055,7 +18336,7 @@ HIDDEN _sk_colorburn_sse41
 FUNCTION(_sk_colorburn_sse41)
 _sk_colorburn_sse41:
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
-  .byte  68,15,40,21,123,53,0,0              // movaps        0x357b(%rip),%xmm10        # 3ac0 <_sk_callback_sse41+0x1c3>
+  .byte  68,15,40,21,196,53,0,0              // movaps        0x35c4(%rip),%xmm10        # 3bf0 <_sk_callback_sse41+0x20c>
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
   .byte  68,15,92,223                        // subps         %xmm7,%xmm11
   .byte  69,15,40,203                        // movaps        %xmm11,%xmm9
@@ -18137,7 +18418,7 @@ HIDDEN _sk_colordodge_sse41
 FUNCTION(_sk_colordodge_sse41)
 _sk_colordodge_sse41:
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
-  .byte  68,15,40,21,89,52,0,0               // movaps        0x3459(%rip),%xmm10        # 3ad0 <_sk_callback_sse41+0x1d3>
+  .byte  68,15,40,21,162,52,0,0              // movaps        0x34a2(%rip),%xmm10        # 3c00 <_sk_callback_sse41+0x21c>
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
   .byte  68,15,92,223                        // subps         %xmm7,%xmm11
   .byte  69,15,40,227                        // movaps        %xmm11,%xmm12
@@ -18219,7 +18500,7 @@ _sk_hardlight_sse41:
   .byte  15,40,244                           // movaps        %xmm4,%xmm6
   .byte  15,40,227                           // movaps        %xmm3,%xmm4
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
-  .byte  68,15,40,21,50,51,0,0               // movaps        0x3332(%rip),%xmm10        # 3ae0 <_sk_callback_sse41+0x1e3>
+  .byte  68,15,40,21,123,51,0,0              // movaps        0x337b(%rip),%xmm10        # 3c10 <_sk_callback_sse41+0x22c>
   .byte  65,15,40,234                        // movaps        %xmm10,%xmm5
   .byte  15,92,239                           // subps         %xmm7,%xmm5
   .byte  15,40,197                           // movaps        %xmm5,%xmm0
@@ -18302,7 +18583,7 @@ FUNCTION(_sk_overlay_sse41)
 _sk_overlay_sse41:
   .byte  68,15,40,201                        // movaps        %xmm1,%xmm9
   .byte  68,15,40,240                        // movaps        %xmm0,%xmm14
-  .byte  68,15,40,21,23,50,0,0               // movaps        0x3217(%rip),%xmm10        # 3af0 <_sk_callback_sse41+0x1f3>
+  .byte  68,15,40,21,96,50,0,0               // movaps        0x3260(%rip),%xmm10        # 3c20 <_sk_callback_sse41+0x23c>
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
   .byte  68,15,92,223                        // subps         %xmm7,%xmm11
   .byte  65,15,40,195                        // movaps        %xmm11,%xmm0
@@ -18387,7 +18668,7 @@ _sk_softlight_sse41:
   .byte  15,40,198                           // movaps        %xmm6,%xmm0
   .byte  15,94,199                           // divps         %xmm7,%xmm0
   .byte  65,15,84,193                        // andps         %xmm9,%xmm0
-  .byte  15,40,13,238,48,0,0                 // movaps        0x30ee(%rip),%xmm1        # 3b00 <_sk_callback_sse41+0x203>
+  .byte  15,40,13,55,49,0,0                  // movaps        0x3137(%rip),%xmm1        # 3c30 <_sk_callback_sse41+0x24c>
   .byte  68,15,40,209                        // movaps        %xmm1,%xmm10
   .byte  68,15,92,208                        // subps         %xmm0,%xmm10
   .byte  68,15,40,240                        // movaps        %xmm0,%xmm14
@@ -18400,10 +18681,10 @@ _sk_softlight_sse41:
   .byte  15,40,208                           // movaps        %xmm0,%xmm2
   .byte  15,89,210                           // mulps         %xmm2,%xmm2
   .byte  15,88,208                           // addps         %xmm0,%xmm2
-  .byte  68,15,40,45,204,48,0,0              // movaps        0x30cc(%rip),%xmm13        # 3b10 <_sk_callback_sse41+0x213>
+  .byte  68,15,40,45,21,49,0,0               // movaps        0x3115(%rip),%xmm13        # 3c40 <_sk_callback_sse41+0x25c>
   .byte  69,15,88,245                        // addps         %xmm13,%xmm14
   .byte  68,15,89,242                        // mulps         %xmm2,%xmm14
-  .byte  68,15,40,37,204,48,0,0              // movaps        0x30cc(%rip),%xmm12        # 3b20 <_sk_callback_sse41+0x223>
+  .byte  68,15,40,37,21,49,0,0               // movaps        0x3115(%rip),%xmm12        # 3c50 <_sk_callback_sse41+0x26c>
   .byte  69,15,89,252                        // mulps         %xmm12,%xmm15
   .byte  69,15,88,254                        // addps         %xmm14,%xmm15
   .byte  15,40,198                           // movaps        %xmm6,%xmm0
@@ -18551,7 +18832,7 @@ HIDDEN _sk_clamp_1_sse41
 .globl _sk_clamp_1_sse41
 FUNCTION(_sk_clamp_1_sse41)
 _sk_clamp_1_sse41:
-  .byte  68,15,40,5,222,46,0,0               // movaps        0x2ede(%rip),%xmm8        # 3b30 <_sk_callback_sse41+0x233>
+  .byte  68,15,40,5,39,47,0,0                // movaps        0x2f27(%rip),%xmm8        # 3c60 <_sk_callback_sse41+0x27c>
   .byte  65,15,93,192                        // minps         %xmm8,%xmm0
   .byte  65,15,93,200                        // minps         %xmm8,%xmm1
   .byte  65,15,93,208                        // minps         %xmm8,%xmm2
@@ -18563,7 +18844,7 @@ HIDDEN _sk_clamp_a_sse41
 .globl _sk_clamp_a_sse41
 FUNCTION(_sk_clamp_a_sse41)
 _sk_clamp_a_sse41:
-  .byte  15,93,29,211,46,0,0                 // minps         0x2ed3(%rip),%xmm3        # 3b40 <_sk_callback_sse41+0x243>
+  .byte  15,93,29,28,47,0,0                  // minps         0x2f1c(%rip),%xmm3        # 3c70 <_sk_callback_sse41+0x28c>
   .byte  15,93,195                           // minps         %xmm3,%xmm0
   .byte  15,93,203                           // minps         %xmm3,%xmm1
   .byte  15,93,211                           // minps         %xmm3,%xmm2
@@ -18650,7 +18931,7 @@ HIDDEN _sk_unpremul_sse41
 FUNCTION(_sk_unpremul_sse41)
 _sk_unpremul_sse41:
   .byte  69,15,87,192                        // xorps         %xmm8,%xmm8
-  .byte  68,15,40,13,62,46,0,0               // movaps        0x2e3e(%rip),%xmm9        # 3b50 <_sk_callback_sse41+0x253>
+  .byte  68,15,40,13,135,46,0,0              // movaps        0x2e87(%rip),%xmm9        # 3c80 <_sk_callback_sse41+0x29c>
   .byte  68,15,94,203                        // divps         %xmm3,%xmm9
   .byte  68,15,194,195,4                     // cmpneqps      %xmm3,%xmm8
   .byte  69,15,84,193                        // andps         %xmm9,%xmm8
@@ -18664,20 +18945,20 @@ HIDDEN _sk_from_srgb_sse41
 .globl _sk_from_srgb_sse41
 FUNCTION(_sk_from_srgb_sse41)
 _sk_from_srgb_sse41:
-  .byte  68,15,40,29,41,46,0,0               // movaps        0x2e29(%rip),%xmm11        # 3b60 <_sk_callback_sse41+0x263>
+  .byte  68,15,40,29,114,46,0,0              // movaps        0x2e72(%rip),%xmm11        # 3c90 <_sk_callback_sse41+0x2ac>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,203                        // mulps         %xmm11,%xmm9
   .byte  68,15,40,208                        // movaps        %xmm0,%xmm10
   .byte  69,15,89,210                        // mulps         %xmm10,%xmm10
-  .byte  68,15,40,37,33,46,0,0               // movaps        0x2e21(%rip),%xmm12        # 3b70 <_sk_callback_sse41+0x273>
+  .byte  68,15,40,37,106,46,0,0              // movaps        0x2e6a(%rip),%xmm12        # 3ca0 <_sk_callback_sse41+0x2bc>
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
   .byte  69,15,89,196                        // mulps         %xmm12,%xmm8
-  .byte  68,15,40,45,33,46,0,0               // movaps        0x2e21(%rip),%xmm13        # 3b80 <_sk_callback_sse41+0x283>
+  .byte  68,15,40,45,106,46,0,0              // movaps        0x2e6a(%rip),%xmm13        # 3cb0 <_sk_callback_sse41+0x2cc>
   .byte  69,15,88,197                        // addps         %xmm13,%xmm8
   .byte  69,15,89,194                        // mulps         %xmm10,%xmm8
-  .byte  68,15,40,53,33,46,0,0               // movaps        0x2e21(%rip),%xmm14        # 3b90 <_sk_callback_sse41+0x293>
+  .byte  68,15,40,53,106,46,0,0              // movaps        0x2e6a(%rip),%xmm14        # 3cc0 <_sk_callback_sse41+0x2dc>
   .byte  69,15,88,198                        // addps         %xmm14,%xmm8
-  .byte  68,15,40,61,37,46,0,0               // movaps        0x2e25(%rip),%xmm15        # 3ba0 <_sk_callback_sse41+0x2a3>
+  .byte  68,15,40,61,110,46,0,0              // movaps        0x2e6e(%rip),%xmm15        # 3cd0 <_sk_callback_sse41+0x2ec>
   .byte  65,15,194,199,1                     // cmpltps       %xmm15,%xmm0
   .byte  102,69,15,56,20,193                 // blendvps      %xmm0,%xmm9,%xmm8
   .byte  68,15,40,209                        // movaps        %xmm1,%xmm10
@@ -18722,20 +19003,20 @@ _sk_to_srgb_sse41:
   .byte  68,15,82,192                        // rsqrtps       %xmm0,%xmm8
   .byte  69,15,83,200                        // rcpps         %xmm8,%xmm9
   .byte  69,15,82,208                        // rsqrtps       %xmm8,%xmm10
-  .byte  68,15,40,29,149,45,0,0              // movaps        0x2d95(%rip),%xmm11        # 3bb0 <_sk_callback_sse41+0x2b3>
+  .byte  68,15,40,29,222,45,0,0              // movaps        0x2dde(%rip),%xmm11        # 3ce0 <_sk_callback_sse41+0x2fc>
   .byte  15,40,200                           // movaps        %xmm0,%xmm1
   .byte  65,15,89,203                        // mulps         %xmm11,%xmm1
-  .byte  68,15,40,37,150,45,0,0              // movaps        0x2d96(%rip),%xmm12        # 3bc0 <_sk_callback_sse41+0x2c3>
+  .byte  68,15,40,37,223,45,0,0              // movaps        0x2ddf(%rip),%xmm12        # 3cf0 <_sk_callback_sse41+0x30c>
   .byte  69,15,89,204                        // mulps         %xmm12,%xmm9
-  .byte  68,15,40,45,154,45,0,0              // movaps        0x2d9a(%rip),%xmm13        # 3bd0 <_sk_callback_sse41+0x2d3>
+  .byte  68,15,40,45,227,45,0,0              // movaps        0x2de3(%rip),%xmm13        # 3d00 <_sk_callback_sse41+0x31c>
   .byte  69,15,88,205                        // addps         %xmm13,%xmm9
-  .byte  68,15,40,53,158,45,0,0              // movaps        0x2d9e(%rip),%xmm14        # 3be0 <_sk_callback_sse41+0x2e3>
+  .byte  68,15,40,53,231,45,0,0              // movaps        0x2de7(%rip),%xmm14        # 3d10 <_sk_callback_sse41+0x32c>
   .byte  69,15,89,214                        // mulps         %xmm14,%xmm10
   .byte  69,15,88,209                        // addps         %xmm9,%xmm10
-  .byte  68,15,40,5,158,45,0,0               // movaps        0x2d9e(%rip),%xmm8        # 3bf0 <_sk_callback_sse41+0x2f3>
+  .byte  68,15,40,5,231,45,0,0               // movaps        0x2de7(%rip),%xmm8        # 3d20 <_sk_callback_sse41+0x33c>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,93,202                        // minps         %xmm10,%xmm9
-  .byte  68,15,40,61,158,45,0,0              // movaps        0x2d9e(%rip),%xmm15        # 3c00 <_sk_callback_sse41+0x303>
+  .byte  68,15,40,61,231,45,0,0              // movaps        0x2de7(%rip),%xmm15        # 3d30 <_sk_callback_sse41+0x34c>
   .byte  65,15,194,199,1                     // cmpltps       %xmm15,%xmm0
   .byte  102,68,15,56,20,201                 // blendvps      %xmm0,%xmm1,%xmm9
   .byte  15,82,194                           // rsqrtps       %xmm2,%xmm0
@@ -18789,7 +19070,7 @@ _sk_rgb_to_hsl_sse41:
   .byte  68,15,93,226                        // minps         %xmm2,%xmm12
   .byte  65,15,40,203                        // movaps        %xmm11,%xmm1
   .byte  65,15,92,204                        // subps         %xmm12,%xmm1
-  .byte  68,15,40,53,239,44,0,0              // movaps        0x2cef(%rip),%xmm14        # 3c10 <_sk_callback_sse41+0x313>
+  .byte  68,15,40,53,56,45,0,0               // movaps        0x2d38(%rip),%xmm14        # 3d40 <_sk_callback_sse41+0x35c>
   .byte  68,15,94,241                        // divps         %xmm1,%xmm14
   .byte  69,15,40,211                        // movaps        %xmm11,%xmm10
   .byte  69,15,194,208,0                     // cmpeqps       %xmm8,%xmm10
@@ -18798,27 +19079,27 @@ _sk_rgb_to_hsl_sse41:
   .byte  65,15,89,198                        // mulps         %xmm14,%xmm0
   .byte  69,15,40,249                        // movaps        %xmm9,%xmm15
   .byte  68,15,194,250,1                     // cmpltps       %xmm2,%xmm15
-  .byte  68,15,84,61,214,44,0,0              // andps         0x2cd6(%rip),%xmm15        # 3c20 <_sk_callback_sse41+0x323>
+  .byte  68,15,84,61,31,45,0,0               // andps         0x2d1f(%rip),%xmm15        # 3d50 <_sk_callback_sse41+0x36c>
   .byte  68,15,88,248                        // addps         %xmm0,%xmm15
   .byte  65,15,40,195                        // movaps        %xmm11,%xmm0
   .byte  65,15,194,193,0                     // cmpeqps       %xmm9,%xmm0
   .byte  65,15,92,208                        // subps         %xmm8,%xmm2
   .byte  65,15,89,214                        // mulps         %xmm14,%xmm2
-  .byte  68,15,40,45,201,44,0,0              // movaps        0x2cc9(%rip),%xmm13        # 3c30 <_sk_callback_sse41+0x333>
+  .byte  68,15,40,45,18,45,0,0               // movaps        0x2d12(%rip),%xmm13        # 3d60 <_sk_callback_sse41+0x37c>
   .byte  65,15,88,213                        // addps         %xmm13,%xmm2
   .byte  69,15,92,193                        // subps         %xmm9,%xmm8
   .byte  69,15,89,198                        // mulps         %xmm14,%xmm8
-  .byte  68,15,88,5,197,44,0,0               // addps         0x2cc5(%rip),%xmm8        # 3c40 <_sk_callback_sse41+0x343>
+  .byte  68,15,88,5,14,45,0,0                // addps         0x2d0e(%rip),%xmm8        # 3d70 <_sk_callback_sse41+0x38c>
   .byte  102,68,15,56,20,194                 // blendvps      %xmm0,%xmm2,%xmm8
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  102,69,15,56,20,199                 // blendvps      %xmm0,%xmm15,%xmm8
-  .byte  68,15,89,5,189,44,0,0               // mulps         0x2cbd(%rip),%xmm8        # 3c50 <_sk_callback_sse41+0x353>
+  .byte  68,15,89,5,6,45,0,0                 // mulps         0x2d06(%rip),%xmm8        # 3d80 <_sk_callback_sse41+0x39c>
   .byte  69,15,40,203                        // movaps        %xmm11,%xmm9
   .byte  69,15,194,204,4                     // cmpneqps      %xmm12,%xmm9
   .byte  69,15,84,193                        // andps         %xmm9,%xmm8
   .byte  69,15,92,235                        // subps         %xmm11,%xmm13
   .byte  69,15,88,220                        // addps         %xmm12,%xmm11
-  .byte  15,40,5,177,44,0,0                  // movaps        0x2cb1(%rip),%xmm0        # 3c60 <_sk_callback_sse41+0x363>
+  .byte  15,40,5,250,44,0,0                  // movaps        0x2cfa(%rip),%xmm0        # 3d90 <_sk_callback_sse41+0x3ac>
   .byte  65,15,40,211                        // movaps        %xmm11,%xmm2
   .byte  15,89,208                           // mulps         %xmm0,%xmm2
   .byte  15,194,194,1                        // cmpltps       %xmm2,%xmm0
@@ -18840,7 +19121,7 @@ _sk_hsl_to_rgb_sse41:
   .byte  15,41,100,36,184                    // movaps        %xmm4,-0x48(%rsp)
   .byte  15,41,92,36,168                     // movaps        %xmm3,-0x58(%rsp)
   .byte  68,15,40,208                        // movaps        %xmm0,%xmm10
-  .byte  68,15,40,13,119,44,0,0              // movaps        0x2c77(%rip),%xmm9        # 3c70 <_sk_callback_sse41+0x373>
+  .byte  68,15,40,13,192,44,0,0              // movaps        0x2cc0(%rip),%xmm9        # 3da0 <_sk_callback_sse41+0x3bc>
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  15,194,194,2                        // cmpleps       %xmm2,%xmm0
   .byte  15,40,217                           // movaps        %xmm1,%xmm3
@@ -18853,19 +19134,19 @@ _sk_hsl_to_rgb_sse41:
   .byte  15,41,84,36,152                     // movaps        %xmm2,-0x68(%rsp)
   .byte  69,15,88,192                        // addps         %xmm8,%xmm8
   .byte  68,15,92,197                        // subps         %xmm5,%xmm8
-  .byte  68,15,40,53,82,44,0,0               // movaps        0x2c52(%rip),%xmm14        # 3c80 <_sk_callback_sse41+0x383>
+  .byte  68,15,40,53,155,44,0,0              // movaps        0x2c9b(%rip),%xmm14        # 3db0 <_sk_callback_sse41+0x3cc>
   .byte  69,15,88,242                        // addps         %xmm10,%xmm14
   .byte  102,65,15,58,8,198,1                // roundps       $0x1,%xmm14,%xmm0
   .byte  68,15,92,240                        // subps         %xmm0,%xmm14
-  .byte  68,15,40,29,75,44,0,0               // movaps        0x2c4b(%rip),%xmm11        # 3c90 <_sk_callback_sse41+0x393>
+  .byte  68,15,40,29,148,44,0,0              // movaps        0x2c94(%rip),%xmm11        # 3dc0 <_sk_callback_sse41+0x3dc>
   .byte  65,15,40,195                        // movaps        %xmm11,%xmm0
   .byte  65,15,194,198,2                     // cmpleps       %xmm14,%xmm0
   .byte  15,40,245                           // movaps        %xmm5,%xmm6
   .byte  65,15,92,240                        // subps         %xmm8,%xmm6
-  .byte  15,40,61,68,44,0,0                  // movaps        0x2c44(%rip),%xmm7        # 3ca0 <_sk_callback_sse41+0x3a3>
+  .byte  15,40,61,141,44,0,0                 // movaps        0x2c8d(%rip),%xmm7        # 3dd0 <_sk_callback_sse41+0x3ec>
   .byte  69,15,40,238                        // movaps        %xmm14,%xmm13
   .byte  68,15,89,239                        // mulps         %xmm7,%xmm13
-  .byte  15,40,29,69,44,0,0                  // movaps        0x2c45(%rip),%xmm3        # 3cb0 <_sk_callback_sse41+0x3b3>
+  .byte  15,40,29,142,44,0,0                 // movaps        0x2c8e(%rip),%xmm3        # 3de0 <_sk_callback_sse41+0x3fc>
   .byte  68,15,40,227                        // movaps        %xmm3,%xmm12
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
   .byte  68,15,89,230                        // mulps         %xmm6,%xmm12
@@ -18875,7 +19156,7 @@ _sk_hsl_to_rgb_sse41:
   .byte  65,15,194,198,2                     // cmpleps       %xmm14,%xmm0
   .byte  68,15,40,253                        // movaps        %xmm5,%xmm15
   .byte  102,69,15,56,20,252                 // blendvps      %xmm0,%xmm12,%xmm15
-  .byte  68,15,40,37,36,44,0,0               // movaps        0x2c24(%rip),%xmm12        # 3cc0 <_sk_callback_sse41+0x3c3>
+  .byte  68,15,40,37,109,44,0,0              // movaps        0x2c6d(%rip),%xmm12        # 3df0 <_sk_callback_sse41+0x40c>
   .byte  65,15,40,196                        // movaps        %xmm12,%xmm0
   .byte  65,15,194,198,2                     // cmpleps       %xmm14,%xmm0
   .byte  68,15,89,238                        // mulps         %xmm6,%xmm13
@@ -18909,7 +19190,7 @@ _sk_hsl_to_rgb_sse41:
   .byte  65,15,40,198                        // movaps        %xmm14,%xmm0
   .byte  15,40,84,36,152                     // movaps        -0x68(%rsp),%xmm2
   .byte  102,15,56,20,202                    // blendvps      %xmm0,%xmm2,%xmm1
-  .byte  68,15,88,21,156,43,0,0              // addps         0x2b9c(%rip),%xmm10        # 3cd0 <_sk_callback_sse41+0x3d3>
+  .byte  68,15,88,21,229,43,0,0              // addps         0x2be5(%rip),%xmm10        # 3e00 <_sk_callback_sse41+0x41c>
   .byte  102,65,15,58,8,194,1                // roundps       $0x1,%xmm10,%xmm0
   .byte  68,15,92,208                        // subps         %xmm0,%xmm10
   .byte  69,15,194,218,2                     // cmpleps       %xmm10,%xmm11
@@ -18961,7 +19242,7 @@ _sk_scale_u8_sse41:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,68,15,56,49,4,56                // pmovzxbd      (%rax,%rdi,1),%xmm8
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,249,42,0,0               // mulps         0x2af9(%rip),%xmm8        # 3ce0 <_sk_callback_sse41+0x3e3>
+  .byte  68,15,89,5,66,43,0,0                // mulps         0x2b42(%rip),%xmm8        # 3e10 <_sk_callback_sse41+0x42c>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
@@ -18999,7 +19280,7 @@ _sk_lerp_u8_sse41:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,68,15,56,49,4,56                // pmovzxbd      (%rax,%rdi,1),%xmm8
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,165,42,0,0               // mulps         0x2aa5(%rip),%xmm8        # 3cf0 <_sk_callback_sse41+0x3f3>
+  .byte  68,15,89,5,238,42,0,0               // mulps         0x2aee(%rip),%xmm8        # 3e20 <_sk_callback_sse41+0x43c>
   .byte  15,92,196                           // subps         %xmm4,%xmm0
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -19022,17 +19303,17 @@ _sk_lerp_565_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,68,15,56,51,4,120               // pmovzxwd      (%rax,%rdi,2),%xmm8
-  .byte  102,15,111,29,117,42,0,0            // movdqa        0x2a75(%rip),%xmm3        # 3d00 <_sk_callback_sse41+0x403>
+  .byte  102,15,111,29,190,42,0,0            // movdqa        0x2abe(%rip),%xmm3        # 3e30 <_sk_callback_sse41+0x44c>
   .byte  102,65,15,219,216                   // pand          %xmm8,%xmm3
   .byte  68,15,91,203                        // cvtdq2ps      %xmm3,%xmm9
-  .byte  68,15,89,13,116,42,0,0              // mulps         0x2a74(%rip),%xmm9        # 3d10 <_sk_callback_sse41+0x413>
-  .byte  102,15,111,29,124,42,0,0            // movdqa        0x2a7c(%rip),%xmm3        # 3d20 <_sk_callback_sse41+0x423>
+  .byte  68,15,89,13,189,42,0,0              // mulps         0x2abd(%rip),%xmm9        # 3e40 <_sk_callback_sse41+0x45c>
+  .byte  102,15,111,29,197,42,0,0            // movdqa        0x2ac5(%rip),%xmm3        # 3e50 <_sk_callback_sse41+0x46c>
   .byte  102,65,15,219,216                   // pand          %xmm8,%xmm3
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,125,42,0,0                 // mulps         0x2a7d(%rip),%xmm3        # 3d30 <_sk_callback_sse41+0x433>
-  .byte  102,68,15,219,5,132,42,0,0          // pand          0x2a84(%rip),%xmm8        # 3d40 <_sk_callback_sse41+0x443>
+  .byte  15,89,29,198,42,0,0                 // mulps         0x2ac6(%rip),%xmm3        # 3e60 <_sk_callback_sse41+0x47c>
+  .byte  102,68,15,219,5,205,42,0,0          // pand          0x2acd(%rip),%xmm8        # 3e70 <_sk_callback_sse41+0x48c>
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,136,42,0,0               // mulps         0x2a88(%rip),%xmm8        # 3d50 <_sk_callback_sse41+0x453>
+  .byte  68,15,89,5,209,42,0,0               // mulps         0x2ad1(%rip),%xmm8        # 3e80 <_sk_callback_sse41+0x49c>
   .byte  15,92,196                           // subps         %xmm4,%xmm0
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -19043,7 +19324,7 @@ _sk_lerp_565_sse41:
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  15,88,214                           // addps         %xmm6,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,114,42,0,0                 // movaps        0x2a72(%rip),%xmm3        # 3d60 <_sk_callback_sse41+0x463>
+  .byte  15,40,29,187,42,0,0                 // movaps        0x2abb(%rip),%xmm3        # 3e90 <_sk_callback_sse41+0x4ac>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_load_tables_sse41
@@ -19054,7 +19335,7 @@ _sk_load_tables_sse41:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,139,72,8                         // mov           0x8(%rax),%r9
   .byte  243,69,15,111,4,184                 // movdqu        (%r8,%rdi,4),%xmm8
-  .byte  102,15,111,5,105,42,0,0             // movdqa        0x2a69(%rip),%xmm0        # 3d70 <_sk_callback_sse41+0x473>
+  .byte  102,15,111,5,178,42,0,0             // movdqa        0x2ab2(%rip),%xmm0        # 3ea0 <_sk_callback_sse41+0x4bc>
   .byte  102,65,15,219,192                   // pand          %xmm8,%xmm0
   .byte  102,73,15,58,22,192,1               // pextrq        $0x1,%xmm0,%r8
   .byte  102,72,15,126,193                   // movq          %xmm0,%rcx
@@ -19069,7 +19350,7 @@ _sk_load_tables_sse41:
   .byte  102,15,58,33,193,48                 // insertps      $0x30,%xmm1,%xmm0
   .byte  76,139,64,16                        // mov           0x10(%rax),%r8
   .byte  102,65,15,111,200                   // movdqa        %xmm8,%xmm1
-  .byte  102,15,56,0,13,36,42,0,0            // pshufb        0x2a24(%rip),%xmm1        # 3d80 <_sk_callback_sse41+0x483>
+  .byte  102,15,56,0,13,109,42,0,0           // pshufb        0x2a6d(%rip),%xmm1        # 3eb0 <_sk_callback_sse41+0x4cc>
   .byte  102,73,15,58,22,201,1               // pextrq        $0x1,%xmm1,%r9
   .byte  102,72,15,126,201                   // movq          %xmm1,%rcx
   .byte  68,15,182,209                       // movzbl        %cl,%r10d
@@ -19084,7 +19365,7 @@ _sk_load_tables_sse41:
   .byte  102,15,58,33,202,48                 // insertps      $0x30,%xmm2,%xmm1
   .byte  76,139,64,24                        // mov           0x18(%rax),%r8
   .byte  102,65,15,111,208                   // movdqa        %xmm8,%xmm2
-  .byte  102,15,56,0,21,224,41,0,0           // pshufb        0x29e0(%rip),%xmm2        # 3d90 <_sk_callback_sse41+0x493>
+  .byte  102,15,56,0,21,41,42,0,0            // pshufb        0x2a29(%rip),%xmm2        # 3ec0 <_sk_callback_sse41+0x4dc>
   .byte  102,72,15,58,22,209,1               // pextrq        $0x1,%xmm2,%rcx
   .byte  102,72,15,126,208                   // movq          %xmm2,%rax
   .byte  68,15,182,200                       // movzbl        %al,%r9d
@@ -19099,7 +19380,7 @@ _sk_load_tables_sse41:
   .byte  102,15,58,33,211,48                 // insertps      $0x30,%xmm3,%xmm2
   .byte  102,65,15,114,208,24                // psrld         $0x18,%xmm8
   .byte  65,15,91,216                        // cvtdq2ps      %xmm8,%xmm3
-  .byte  15,89,29,157,41,0,0                 // mulps         0x299d(%rip),%xmm3        # 3da0 <_sk_callback_sse41+0x4a3>
+  .byte  15,89,29,230,41,0,0                 // mulps         0x29e6(%rip),%xmm3        # 3ed0 <_sk_callback_sse41+0x4ec>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -19118,7 +19399,7 @@ _sk_load_tables_u16_be_sse41:
   .byte  102,65,15,111,201                   // movdqa        %xmm9,%xmm1
   .byte  102,15,97,200                       // punpcklwd     %xmm0,%xmm1
   .byte  102,68,15,105,200                   // punpckhwd     %xmm0,%xmm9
-  .byte  102,68,15,111,5,112,41,0,0          // movdqa        0x2970(%rip),%xmm8        # 3db0 <_sk_callback_sse41+0x4b3>
+  .byte  102,68,15,111,5,185,41,0,0          // movdqa        0x29b9(%rip),%xmm8        # 3ee0 <_sk_callback_sse41+0x4fc>
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,65,15,219,192                   // pand          %xmm8,%xmm0
   .byte  102,15,56,51,192                    // pmovzxwd      %xmm0,%xmm0
@@ -19135,7 +19416,7 @@ _sk_load_tables_u16_be_sse41:
   .byte  243,67,15,16,20,8                   // movss         (%r8,%r9,1),%xmm2
   .byte  102,15,58,33,194,48                 // insertps      $0x30,%xmm2,%xmm0
   .byte  76,139,64,16                        // mov           0x10(%rax),%r8
-  .byte  102,15,56,0,13,35,41,0,0            // pshufb        0x2923(%rip),%xmm1        # 3dc0 <_sk_callback_sse41+0x4c3>
+  .byte  102,15,56,0,13,108,41,0,0           // pshufb        0x296c(%rip),%xmm1        # 3ef0 <_sk_callback_sse41+0x50c>
   .byte  102,15,56,51,201                    // pmovzxwd      %xmm1,%xmm1
   .byte  102,73,15,58,22,201,1               // pextrq        $0x1,%xmm1,%r9
   .byte  102,72,15,126,201                   // movq          %xmm1,%rcx
@@ -19171,7 +19452,7 @@ _sk_load_tables_u16_be_sse41:
   .byte  102,65,15,235,216                   // por           %xmm8,%xmm3
   .byte  102,15,56,51,219                    // pmovzxwd      %xmm3,%xmm3
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,113,40,0,0                 // mulps         0x2871(%rip),%xmm3        # 3dd0 <_sk_callback_sse41+0x4d3>
+  .byte  15,89,29,186,40,0,0                 // mulps         0x28ba(%rip),%xmm3        # 3f00 <_sk_callback_sse41+0x51c>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -19193,7 +19474,7 @@ _sk_load_tables_rgb_u16_be_sse41:
   .byte  102,68,15,97,200                    // punpcklwd     %xmm0,%xmm9
   .byte  102,15,111,202                      // movdqa        %xmm2,%xmm1
   .byte  102,65,15,97,201                    // punpcklwd     %xmm9,%xmm1
-  .byte  102,68,15,111,5,51,40,0,0           // movdqa        0x2833(%rip),%xmm8        # 3de0 <_sk_callback_sse41+0x4e3>
+  .byte  102,68,15,111,5,124,40,0,0          // movdqa        0x287c(%rip),%xmm8        # 3f10 <_sk_callback_sse41+0x52c>
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,65,15,219,192                   // pand          %xmm8,%xmm0
   .byte  102,15,56,51,192                    // pmovzxwd      %xmm0,%xmm0
@@ -19210,7 +19491,7 @@ _sk_load_tables_rgb_u16_be_sse41:
   .byte  243,67,15,16,28,8                   // movss         (%r8,%r9,1),%xmm3
   .byte  102,15,58,33,195,48                 // insertps      $0x30,%xmm3,%xmm0
   .byte  76,139,64,16                        // mov           0x10(%rax),%r8
-  .byte  102,15,56,0,13,230,39,0,0           // pshufb        0x27e6(%rip),%xmm1        # 3df0 <_sk_callback_sse41+0x4f3>
+  .byte  102,15,56,0,13,47,40,0,0            // pshufb        0x282f(%rip),%xmm1        # 3f20 <_sk_callback_sse41+0x53c>
   .byte  102,15,56,51,201                    // pmovzxwd      %xmm1,%xmm1
   .byte  102,73,15,58,22,201,1               // pextrq        $0x1,%xmm1,%r9
   .byte  102,72,15,126,201                   // movq          %xmm1,%rcx
@@ -19241,7 +19522,7 @@ _sk_load_tables_rgb_u16_be_sse41:
   .byte  243,65,15,16,28,8                   // movss         (%r8,%rcx,1),%xmm3
   .byte  102,15,58,33,211,48                 // insertps      $0x30,%xmm3,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,81,39,0,0                  // movaps        0x2751(%rip),%xmm3        # 3e00 <_sk_callback_sse41+0x503>
+  .byte  15,40,29,154,39,0,0                 // movaps        0x279a(%rip),%xmm3        # 3f30 <_sk_callback_sse41+0x54c>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_byte_tables_sse41
@@ -19251,7 +19532,7 @@ _sk_byte_tables_sse41:
   .byte  65,86                               // push          %r14
   .byte  83                                  // push          %rbx
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,82,39,0,0                // movaps        0x2752(%rip),%xmm8        # 3e10 <_sk_callback_sse41+0x513>
+  .byte  68,15,40,5,155,39,0,0               // movaps        0x279b(%rip),%xmm8        # 3f40 <_sk_callback_sse41+0x55c>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,91,192                       // cvtps2dq      %xmm0,%xmm0
   .byte  102,72,15,58,22,193,1               // pextrq        $0x1,%xmm0,%rcx
@@ -19270,7 +19551,7 @@ _sk_byte_tables_sse41:
   .byte  102,15,58,32,193,3                  // pinsrb        $0x3,%ecx,%xmm0
   .byte  102,15,56,49,192                    // pmovzxbd      %xmm0,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,13,3,39,0,0                // movaps        0x2703(%rip),%xmm9        # 3e20 <_sk_callback_sse41+0x523>
+  .byte  68,15,40,13,76,39,0,0               // movaps        0x274c(%rip),%xmm9        # 3f50 <_sk_callback_sse41+0x56c>
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,91,201                       // cvtps2dq      %xmm1,%xmm1
@@ -19361,7 +19642,7 @@ _sk_byte_tables_rgb_sse41:
   .byte  102,15,58,32,193,3                  // pinsrb        $0x3,%ecx,%xmm0
   .byte  102,15,56,49,192                    // pmovzxbd      %xmm0,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,13,139,37,0,0              // movaps        0x258b(%rip),%xmm9        # 3e30 <_sk_callback_sse41+0x533>
+  .byte  68,15,40,13,212,37,0,0              // movaps        0x25d4(%rip),%xmm9        # 3f60 <_sk_callback_sse41+0x57c>
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,91,201                       // cvtps2dq      %xmm1,%xmm1
@@ -19538,31 +19819,31 @@ _sk_parametric_r_sse41:
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,91,194                        // cvtdq2ps      %xmm10,%xmm8
-  .byte  68,15,89,5,226,34,0,0               // mulps         0x22e2(%rip),%xmm8        # 3e40 <_sk_callback_sse41+0x543>
-  .byte  68,15,84,21,234,34,0,0              // andps         0x22ea(%rip),%xmm10        # 3e50 <_sk_callback_sse41+0x553>
-  .byte  68,15,86,21,242,34,0,0              // orps          0x22f2(%rip),%xmm10        # 3e60 <_sk_callback_sse41+0x563>
-  .byte  68,15,88,5,250,34,0,0               // addps         0x22fa(%rip),%xmm8        # 3e70 <_sk_callback_sse41+0x573>
-  .byte  68,15,40,37,2,35,0,0                // movaps        0x2302(%rip),%xmm12        # 3e80 <_sk_callback_sse41+0x583>
+  .byte  68,15,89,5,43,35,0,0                // mulps         0x232b(%rip),%xmm8        # 3f70 <_sk_callback_sse41+0x58c>
+  .byte  68,15,84,21,51,35,0,0               // andps         0x2333(%rip),%xmm10        # 3f80 <_sk_callback_sse41+0x59c>
+  .byte  68,15,86,21,59,35,0,0               // orps          0x233b(%rip),%xmm10        # 3f90 <_sk_callback_sse41+0x5ac>
+  .byte  68,15,88,5,67,35,0,0                // addps         0x2343(%rip),%xmm8        # 3fa0 <_sk_callback_sse41+0x5bc>
+  .byte  68,15,40,37,75,35,0,0               // movaps        0x234b(%rip),%xmm12        # 3fb0 <_sk_callback_sse41+0x5cc>
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  69,15,92,196                        // subps         %xmm12,%xmm8
-  .byte  68,15,88,21,2,35,0,0                // addps         0x2302(%rip),%xmm10        # 3e90 <_sk_callback_sse41+0x593>
-  .byte  68,15,40,37,10,35,0,0               // movaps        0x230a(%rip),%xmm12        # 3ea0 <_sk_callback_sse41+0x5a3>
+  .byte  68,15,88,21,75,35,0,0               // addps         0x234b(%rip),%xmm10        # 3fc0 <_sk_callback_sse41+0x5dc>
+  .byte  68,15,40,37,83,35,0,0               // movaps        0x2353(%rip),%xmm12        # 3fd0 <_sk_callback_sse41+0x5ec>
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,92,196                        // subps         %xmm12,%xmm8
   .byte  69,15,89,195                        // mulps         %xmm11,%xmm8
   .byte  102,69,15,58,8,208,1                // roundps       $0x1,%xmm8,%xmm10
   .byte  69,15,40,216                        // movaps        %xmm8,%xmm11
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
-  .byte  68,15,88,5,247,34,0,0               // addps         0x22f7(%rip),%xmm8        # 3eb0 <_sk_callback_sse41+0x5b3>
-  .byte  68,15,40,21,255,34,0,0              // movaps        0x22ff(%rip),%xmm10        # 3ec0 <_sk_callback_sse41+0x5c3>
+  .byte  68,15,88,5,64,35,0,0                // addps         0x2340(%rip),%xmm8        # 3fe0 <_sk_callback_sse41+0x5fc>
+  .byte  68,15,40,21,72,35,0,0               // movaps        0x2348(%rip),%xmm10        # 3ff0 <_sk_callback_sse41+0x60c>
   .byte  69,15,89,211                        // mulps         %xmm11,%xmm10
   .byte  69,15,92,194                        // subps         %xmm10,%xmm8
-  .byte  68,15,40,21,255,34,0,0              // movaps        0x22ff(%rip),%xmm10        # 3ed0 <_sk_callback_sse41+0x5d3>
+  .byte  68,15,40,21,72,35,0,0               // movaps        0x2348(%rip),%xmm10        # 4000 <_sk_callback_sse41+0x61c>
   .byte  69,15,92,211                        // subps         %xmm11,%xmm10
-  .byte  68,15,40,29,3,35,0,0                // movaps        0x2303(%rip),%xmm11        # 3ee0 <_sk_callback_sse41+0x5e3>
+  .byte  68,15,40,29,76,35,0,0               // movaps        0x234c(%rip),%xmm11        # 4010 <_sk_callback_sse41+0x62c>
   .byte  69,15,94,218                        // divps         %xmm10,%xmm11
   .byte  69,15,88,216                        // addps         %xmm8,%xmm11
-  .byte  68,15,89,29,3,35,0,0                // mulps         0x2303(%rip),%xmm11        # 3ef0 <_sk_callback_sse41+0x5f3>
+  .byte  68,15,89,29,76,35,0,0               // mulps         0x234c(%rip),%xmm11        # 4020 <_sk_callback_sse41+0x63c>
   .byte  102,69,15,91,211                    // cvtps2dq      %xmm11,%xmm10
   .byte  243,68,15,16,64,20                  // movss         0x14(%rax),%xmm8
   .byte  69,15,198,192,0                     // shufps        $0x0,%xmm8,%xmm8
@@ -19570,7 +19851,7 @@ _sk_parametric_r_sse41:
   .byte  102,69,15,56,20,193                 // blendvps      %xmm0,%xmm9,%xmm8
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  68,15,95,192                        // maxps         %xmm0,%xmm8
-  .byte  68,15,93,5,234,34,0,0               // minps         0x22ea(%rip),%xmm8        # 3f00 <_sk_callback_sse41+0x603>
+  .byte  68,15,93,5,51,35,0,0                // minps         0x2333(%rip),%xmm8        # 4030 <_sk_callback_sse41+0x64c>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  255,224                             // jmpq          *%rax
@@ -19600,31 +19881,31 @@ _sk_parametric_g_sse41:
   .byte  68,15,88,217                        // addps         %xmm1,%xmm11
   .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
   .byte  69,15,91,227                        // cvtdq2ps      %xmm11,%xmm12
-  .byte  68,15,89,37,139,34,0,0              // mulps         0x228b(%rip),%xmm12        # 3f10 <_sk_callback_sse41+0x613>
-  .byte  68,15,84,29,147,34,0,0              // andps         0x2293(%rip),%xmm11        # 3f20 <_sk_callback_sse41+0x623>
-  .byte  68,15,86,29,155,34,0,0              // orps          0x229b(%rip),%xmm11        # 3f30 <_sk_callback_sse41+0x633>
-  .byte  68,15,88,37,163,34,0,0              // addps         0x22a3(%rip),%xmm12        # 3f40 <_sk_callback_sse41+0x643>
-  .byte  15,40,13,172,34,0,0                 // movaps        0x22ac(%rip),%xmm1        # 3f50 <_sk_callback_sse41+0x653>
+  .byte  68,15,89,37,212,34,0,0              // mulps         0x22d4(%rip),%xmm12        # 4040 <_sk_callback_sse41+0x65c>
+  .byte  68,15,84,29,220,34,0,0              // andps         0x22dc(%rip),%xmm11        # 4050 <_sk_callback_sse41+0x66c>
+  .byte  68,15,86,29,228,34,0,0              // orps          0x22e4(%rip),%xmm11        # 4060 <_sk_callback_sse41+0x67c>
+  .byte  68,15,88,37,236,34,0,0              // addps         0x22ec(%rip),%xmm12        # 4070 <_sk_callback_sse41+0x68c>
+  .byte  15,40,13,245,34,0,0                 // movaps        0x22f5(%rip),%xmm1        # 4080 <_sk_callback_sse41+0x69c>
   .byte  65,15,89,203                        // mulps         %xmm11,%xmm1
   .byte  68,15,92,225                        // subps         %xmm1,%xmm12
-  .byte  68,15,88,29,172,34,0,0              // addps         0x22ac(%rip),%xmm11        # 3f60 <_sk_callback_sse41+0x663>
-  .byte  15,40,13,181,34,0,0                 // movaps        0x22b5(%rip),%xmm1        # 3f70 <_sk_callback_sse41+0x673>
+  .byte  68,15,88,29,245,34,0,0              // addps         0x22f5(%rip),%xmm11        # 4090 <_sk_callback_sse41+0x6ac>
+  .byte  15,40,13,254,34,0,0                 // movaps        0x22fe(%rip),%xmm1        # 40a0 <_sk_callback_sse41+0x6bc>
   .byte  65,15,94,203                        // divps         %xmm11,%xmm1
   .byte  68,15,92,225                        // subps         %xmm1,%xmm12
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  102,69,15,58,8,212,1                // roundps       $0x1,%xmm12,%xmm10
   .byte  69,15,40,220                        // movaps        %xmm12,%xmm11
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
-  .byte  68,15,88,37,162,34,0,0              // addps         0x22a2(%rip),%xmm12        # 3f80 <_sk_callback_sse41+0x683>
-  .byte  15,40,13,171,34,0,0                 // movaps        0x22ab(%rip),%xmm1        # 3f90 <_sk_callback_sse41+0x693>
+  .byte  68,15,88,37,235,34,0,0              // addps         0x22eb(%rip),%xmm12        # 40b0 <_sk_callback_sse41+0x6cc>
+  .byte  15,40,13,244,34,0,0                 // movaps        0x22f4(%rip),%xmm1        # 40c0 <_sk_callback_sse41+0x6dc>
   .byte  65,15,89,203                        // mulps         %xmm11,%xmm1
   .byte  68,15,92,225                        // subps         %xmm1,%xmm12
-  .byte  68,15,40,21,171,34,0,0              // movaps        0x22ab(%rip),%xmm10        # 3fa0 <_sk_callback_sse41+0x6a3>
+  .byte  68,15,40,21,244,34,0,0              // movaps        0x22f4(%rip),%xmm10        # 40d0 <_sk_callback_sse41+0x6ec>
   .byte  69,15,92,211                        // subps         %xmm11,%xmm10
-  .byte  15,40,13,176,34,0,0                 // movaps        0x22b0(%rip),%xmm1        # 3fb0 <_sk_callback_sse41+0x6b3>
+  .byte  15,40,13,249,34,0,0                 // movaps        0x22f9(%rip),%xmm1        # 40e0 <_sk_callback_sse41+0x6fc>
   .byte  65,15,94,202                        // divps         %xmm10,%xmm1
   .byte  65,15,88,204                        // addps         %xmm12,%xmm1
-  .byte  15,89,13,177,34,0,0                 // mulps         0x22b1(%rip),%xmm1        # 3fc0 <_sk_callback_sse41+0x6c3>
+  .byte  15,89,13,250,34,0,0                 // mulps         0x22fa(%rip),%xmm1        # 40f0 <_sk_callback_sse41+0x70c>
   .byte  102,68,15,91,209                    // cvtps2dq      %xmm1,%xmm10
   .byte  243,15,16,72,20                     // movss         0x14(%rax),%xmm1
   .byte  15,198,201,0                        // shufps        $0x0,%xmm1,%xmm1
@@ -19632,7 +19913,7 @@ _sk_parametric_g_sse41:
   .byte  102,65,15,56,20,201                 // blendvps      %xmm0,%xmm9,%xmm1
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  15,95,200                           // maxps         %xmm0,%xmm1
-  .byte  15,93,13,156,34,0,0                 // minps         0x229c(%rip),%xmm1        # 3fd0 <_sk_callback_sse41+0x6d3>
+  .byte  15,93,13,229,34,0,0                 // minps         0x22e5(%rip),%xmm1        # 4100 <_sk_callback_sse41+0x71c>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  255,224                             // jmpq          *%rax
@@ -19662,31 +19943,31 @@ _sk_parametric_b_sse41:
   .byte  68,15,88,218                        // addps         %xmm2,%xmm11
   .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
   .byte  69,15,91,227                        // cvtdq2ps      %xmm11,%xmm12
-  .byte  68,15,89,37,61,34,0,0               // mulps         0x223d(%rip),%xmm12        # 3fe0 <_sk_callback_sse41+0x6e3>
-  .byte  68,15,84,29,69,34,0,0               // andps         0x2245(%rip),%xmm11        # 3ff0 <_sk_callback_sse41+0x6f3>
-  .byte  68,15,86,29,77,34,0,0               // orps          0x224d(%rip),%xmm11        # 4000 <_sk_callback_sse41+0x703>
-  .byte  68,15,88,37,85,34,0,0               // addps         0x2255(%rip),%xmm12        # 4010 <_sk_callback_sse41+0x713>
-  .byte  15,40,21,94,34,0,0                  // movaps        0x225e(%rip),%xmm2        # 4020 <_sk_callback_sse41+0x723>
+  .byte  68,15,89,37,134,34,0,0              // mulps         0x2286(%rip),%xmm12        # 4110 <_sk_callback_sse41+0x72c>
+  .byte  68,15,84,29,142,34,0,0              // andps         0x228e(%rip),%xmm11        # 4120 <_sk_callback_sse41+0x73c>
+  .byte  68,15,86,29,150,34,0,0              // orps          0x2296(%rip),%xmm11        # 4130 <_sk_callback_sse41+0x74c>
+  .byte  68,15,88,37,158,34,0,0              // addps         0x229e(%rip),%xmm12        # 4140 <_sk_callback_sse41+0x75c>
+  .byte  15,40,21,167,34,0,0                 // movaps        0x22a7(%rip),%xmm2        # 4150 <_sk_callback_sse41+0x76c>
   .byte  65,15,89,211                        // mulps         %xmm11,%xmm2
   .byte  68,15,92,226                        // subps         %xmm2,%xmm12
-  .byte  68,15,88,29,94,34,0,0               // addps         0x225e(%rip),%xmm11        # 4030 <_sk_callback_sse41+0x733>
-  .byte  15,40,21,103,34,0,0                 // movaps        0x2267(%rip),%xmm2        # 4040 <_sk_callback_sse41+0x743>
+  .byte  68,15,88,29,167,34,0,0              // addps         0x22a7(%rip),%xmm11        # 4160 <_sk_callback_sse41+0x77c>
+  .byte  15,40,21,176,34,0,0                 // movaps        0x22b0(%rip),%xmm2        # 4170 <_sk_callback_sse41+0x78c>
   .byte  65,15,94,211                        // divps         %xmm11,%xmm2
   .byte  68,15,92,226                        // subps         %xmm2,%xmm12
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  102,69,15,58,8,212,1                // roundps       $0x1,%xmm12,%xmm10
   .byte  69,15,40,220                        // movaps        %xmm12,%xmm11
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
-  .byte  68,15,88,37,84,34,0,0               // addps         0x2254(%rip),%xmm12        # 4050 <_sk_callback_sse41+0x753>
-  .byte  15,40,21,93,34,0,0                  // movaps        0x225d(%rip),%xmm2        # 4060 <_sk_callback_sse41+0x763>
+  .byte  68,15,88,37,157,34,0,0              // addps         0x229d(%rip),%xmm12        # 4180 <_sk_callback_sse41+0x79c>
+  .byte  15,40,21,166,34,0,0                 // movaps        0x22a6(%rip),%xmm2        # 4190 <_sk_callback_sse41+0x7ac>
   .byte  65,15,89,211                        // mulps         %xmm11,%xmm2
   .byte  68,15,92,226                        // subps         %xmm2,%xmm12
-  .byte  68,15,40,21,93,34,0,0               // movaps        0x225d(%rip),%xmm10        # 4070 <_sk_callback_sse41+0x773>
+  .byte  68,15,40,21,166,34,0,0              // movaps        0x22a6(%rip),%xmm10        # 41a0 <_sk_callback_sse41+0x7bc>
   .byte  69,15,92,211                        // subps         %xmm11,%xmm10
-  .byte  15,40,21,98,34,0,0                  // movaps        0x2262(%rip),%xmm2        # 4080 <_sk_callback_sse41+0x783>
+  .byte  15,40,21,171,34,0,0                 // movaps        0x22ab(%rip),%xmm2        # 41b0 <_sk_callback_sse41+0x7cc>
   .byte  65,15,94,210                        // divps         %xmm10,%xmm2
   .byte  65,15,88,212                        // addps         %xmm12,%xmm2
-  .byte  15,89,21,99,34,0,0                  // mulps         0x2263(%rip),%xmm2        # 4090 <_sk_callback_sse41+0x793>
+  .byte  15,89,21,172,34,0,0                 // mulps         0x22ac(%rip),%xmm2        # 41c0 <_sk_callback_sse41+0x7dc>
   .byte  102,68,15,91,210                    // cvtps2dq      %xmm2,%xmm10
   .byte  243,15,16,80,20                     // movss         0x14(%rax),%xmm2
   .byte  15,198,210,0                        // shufps        $0x0,%xmm2,%xmm2
@@ -19694,7 +19975,7 @@ _sk_parametric_b_sse41:
   .byte  102,65,15,56,20,209                 // blendvps      %xmm0,%xmm9,%xmm2
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  15,95,208                           // maxps         %xmm0,%xmm2
-  .byte  15,93,21,78,34,0,0                  // minps         0x224e(%rip),%xmm2        # 40a0 <_sk_callback_sse41+0x7a3>
+  .byte  15,93,21,151,34,0,0                 // minps         0x2297(%rip),%xmm2        # 41d0 <_sk_callback_sse41+0x7ec>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  255,224                             // jmpq          *%rax
@@ -19724,31 +20005,31 @@ _sk_parametric_a_sse41:
   .byte  68,15,88,219                        // addps         %xmm3,%xmm11
   .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
   .byte  69,15,91,227                        // cvtdq2ps      %xmm11,%xmm12
-  .byte  68,15,89,37,239,33,0,0              // mulps         0x21ef(%rip),%xmm12        # 40b0 <_sk_callback_sse41+0x7b3>
-  .byte  68,15,84,29,247,33,0,0              // andps         0x21f7(%rip),%xmm11        # 40c0 <_sk_callback_sse41+0x7c3>
-  .byte  68,15,86,29,255,33,0,0              // orps          0x21ff(%rip),%xmm11        # 40d0 <_sk_callback_sse41+0x7d3>
-  .byte  68,15,88,37,7,34,0,0                // addps         0x2207(%rip),%xmm12        # 40e0 <_sk_callback_sse41+0x7e3>
-  .byte  15,40,29,16,34,0,0                  // movaps        0x2210(%rip),%xmm3        # 40f0 <_sk_callback_sse41+0x7f3>
+  .byte  68,15,89,37,56,34,0,0               // mulps         0x2238(%rip),%xmm12        # 41e0 <_sk_callback_sse41+0x7fc>
+  .byte  68,15,84,29,64,34,0,0               // andps         0x2240(%rip),%xmm11        # 41f0 <_sk_callback_sse41+0x80c>
+  .byte  68,15,86,29,72,34,0,0               // orps          0x2248(%rip),%xmm11        # 4200 <_sk_callback_sse41+0x81c>
+  .byte  68,15,88,37,80,34,0,0               // addps         0x2250(%rip),%xmm12        # 4210 <_sk_callback_sse41+0x82c>
+  .byte  15,40,29,89,34,0,0                  // movaps        0x2259(%rip),%xmm3        # 4220 <_sk_callback_sse41+0x83c>
   .byte  65,15,89,219                        // mulps         %xmm11,%xmm3
   .byte  68,15,92,227                        // subps         %xmm3,%xmm12
-  .byte  68,15,88,29,16,34,0,0               // addps         0x2210(%rip),%xmm11        # 4100 <_sk_callback_sse41+0x803>
-  .byte  15,40,29,25,34,0,0                  // movaps        0x2219(%rip),%xmm3        # 4110 <_sk_callback_sse41+0x813>
+  .byte  68,15,88,29,89,34,0,0               // addps         0x2259(%rip),%xmm11        # 4230 <_sk_callback_sse41+0x84c>
+  .byte  15,40,29,98,34,0,0                  // movaps        0x2262(%rip),%xmm3        # 4240 <_sk_callback_sse41+0x85c>
   .byte  65,15,94,219                        // divps         %xmm11,%xmm3
   .byte  68,15,92,227                        // subps         %xmm3,%xmm12
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  102,69,15,58,8,212,1                // roundps       $0x1,%xmm12,%xmm10
   .byte  69,15,40,220                        // movaps        %xmm12,%xmm11
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
-  .byte  68,15,88,37,6,34,0,0                // addps         0x2206(%rip),%xmm12        # 4120 <_sk_callback_sse41+0x823>
-  .byte  15,40,29,15,34,0,0                  // movaps        0x220f(%rip),%xmm3        # 4130 <_sk_callback_sse41+0x833>
+  .byte  68,15,88,37,79,34,0,0               // addps         0x224f(%rip),%xmm12        # 4250 <_sk_callback_sse41+0x86c>
+  .byte  15,40,29,88,34,0,0                  // movaps        0x2258(%rip),%xmm3        # 4260 <_sk_callback_sse41+0x87c>
   .byte  65,15,89,219                        // mulps         %xmm11,%xmm3
   .byte  68,15,92,227                        // subps         %xmm3,%xmm12
-  .byte  68,15,40,21,15,34,0,0               // movaps        0x220f(%rip),%xmm10        # 4140 <_sk_callback_sse41+0x843>
+  .byte  68,15,40,21,88,34,0,0               // movaps        0x2258(%rip),%xmm10        # 4270 <_sk_callback_sse41+0x88c>
   .byte  69,15,92,211                        // subps         %xmm11,%xmm10
-  .byte  15,40,29,20,34,0,0                  // movaps        0x2214(%rip),%xmm3        # 4150 <_sk_callback_sse41+0x853>
+  .byte  15,40,29,93,34,0,0                  // movaps        0x225d(%rip),%xmm3        # 4280 <_sk_callback_sse41+0x89c>
   .byte  65,15,94,218                        // divps         %xmm10,%xmm3
   .byte  65,15,88,220                        // addps         %xmm12,%xmm3
-  .byte  15,89,29,21,34,0,0                  // mulps         0x2215(%rip),%xmm3        # 4160 <_sk_callback_sse41+0x863>
+  .byte  15,89,29,94,34,0,0                  // mulps         0x225e(%rip),%xmm3        # 4290 <_sk_callback_sse41+0x8ac>
   .byte  102,68,15,91,211                    // cvtps2dq      %xmm3,%xmm10
   .byte  243,15,16,88,20                     // movss         0x14(%rax),%xmm3
   .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
@@ -19756,7 +20037,7 @@ _sk_parametric_a_sse41:
   .byte  102,65,15,56,20,217                 // blendvps      %xmm0,%xmm9,%xmm3
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  15,95,216                           // maxps         %xmm0,%xmm3
-  .byte  15,93,29,0,34,0,0                   // minps         0x2200(%rip),%xmm3        # 4170 <_sk_callback_sse41+0x873>
+  .byte  15,93,29,73,34,0,0                  // minps         0x2249(%rip),%xmm3        # 42a0 <_sk_callback_sse41+0x8bc>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  255,224                             // jmpq          *%rax
@@ -19766,29 +20047,29 @@ HIDDEN _sk_lab_to_xyz_sse41
 FUNCTION(_sk_lab_to_xyz_sse41)
 _sk_lab_to_xyz_sse41:
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
-  .byte  68,15,89,5,252,33,0,0               // mulps         0x21fc(%rip),%xmm8        # 4180 <_sk_callback_sse41+0x883>
-  .byte  68,15,40,13,4,34,0,0                // movaps        0x2204(%rip),%xmm9        # 4190 <_sk_callback_sse41+0x893>
+  .byte  68,15,89,5,69,34,0,0                // mulps         0x2245(%rip),%xmm8        # 42b0 <_sk_callback_sse41+0x8cc>
+  .byte  68,15,40,13,77,34,0,0               // movaps        0x224d(%rip),%xmm9        # 42c0 <_sk_callback_sse41+0x8dc>
   .byte  65,15,89,201                        // mulps         %xmm9,%xmm1
-  .byte  15,40,5,9,34,0,0                    // movaps        0x2209(%rip),%xmm0        # 41a0 <_sk_callback_sse41+0x8a3>
+  .byte  15,40,5,82,34,0,0                   // movaps        0x2252(%rip),%xmm0        # 42d0 <_sk_callback_sse41+0x8ec>
   .byte  15,88,200                           // addps         %xmm0,%xmm1
   .byte  65,15,89,209                        // mulps         %xmm9,%xmm2
   .byte  15,88,208                           // addps         %xmm0,%xmm2
-  .byte  68,15,88,5,7,34,0,0                 // addps         0x2207(%rip),%xmm8        # 41b0 <_sk_callback_sse41+0x8b3>
-  .byte  68,15,89,5,15,34,0,0                // mulps         0x220f(%rip),%xmm8        # 41c0 <_sk_callback_sse41+0x8c3>
-  .byte  15,89,13,24,34,0,0                  // mulps         0x2218(%rip),%xmm1        # 41d0 <_sk_callback_sse41+0x8d3>
+  .byte  68,15,88,5,80,34,0,0                // addps         0x2250(%rip),%xmm8        # 42e0 <_sk_callback_sse41+0x8fc>
+  .byte  68,15,89,5,88,34,0,0                // mulps         0x2258(%rip),%xmm8        # 42f0 <_sk_callback_sse41+0x90c>
+  .byte  15,89,13,97,34,0,0                  // mulps         0x2261(%rip),%xmm1        # 4300 <_sk_callback_sse41+0x91c>
   .byte  65,15,88,200                        // addps         %xmm8,%xmm1
-  .byte  15,89,21,29,34,0,0                  // mulps         0x221d(%rip),%xmm2        # 41e0 <_sk_callback_sse41+0x8e3>
+  .byte  15,89,21,102,34,0,0                 // mulps         0x2266(%rip),%xmm2        # 4310 <_sk_callback_sse41+0x92c>
   .byte  69,15,40,208                        // movaps        %xmm8,%xmm10
   .byte  68,15,92,210                        // subps         %xmm2,%xmm10
   .byte  68,15,40,217                        // movaps        %xmm1,%xmm11
   .byte  69,15,89,219                        // mulps         %xmm11,%xmm11
   .byte  68,15,89,217                        // mulps         %xmm1,%xmm11
-  .byte  68,15,40,13,17,34,0,0               // movaps        0x2211(%rip),%xmm9        # 41f0 <_sk_callback_sse41+0x8f3>
+  .byte  68,15,40,13,90,34,0,0               // movaps        0x225a(%rip),%xmm9        # 4320 <_sk_callback_sse41+0x93c>
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  65,15,194,195,1                     // cmpltps       %xmm11,%xmm0
-  .byte  15,40,21,17,34,0,0                  // movaps        0x2211(%rip),%xmm2        # 4200 <_sk_callback_sse41+0x903>
+  .byte  15,40,21,90,34,0,0                  // movaps        0x225a(%rip),%xmm2        # 4330 <_sk_callback_sse41+0x94c>
   .byte  15,88,202                           // addps         %xmm2,%xmm1
-  .byte  68,15,40,37,22,34,0,0               // movaps        0x2216(%rip),%xmm12        # 4210 <_sk_callback_sse41+0x913>
+  .byte  68,15,40,37,95,34,0,0               // movaps        0x225f(%rip),%xmm12        # 4340 <_sk_callback_sse41+0x95c>
   .byte  65,15,89,204                        // mulps         %xmm12,%xmm1
   .byte  102,65,15,56,20,203                 // blendvps      %xmm0,%xmm11,%xmm1
   .byte  69,15,40,216                        // movaps        %xmm8,%xmm11
@@ -19807,8 +20088,8 @@ _sk_lab_to_xyz_sse41:
   .byte  65,15,89,212                        // mulps         %xmm12,%xmm2
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  102,65,15,56,20,211                 // blendvps      %xmm0,%xmm11,%xmm2
-  .byte  15,89,13,207,33,0,0                 // mulps         0x21cf(%rip),%xmm1        # 4220 <_sk_callback_sse41+0x923>
-  .byte  15,89,21,216,33,0,0                 // mulps         0x21d8(%rip),%xmm2        # 4230 <_sk_callback_sse41+0x933>
+  .byte  15,89,13,24,34,0,0                  // mulps         0x2218(%rip),%xmm1        # 4350 <_sk_callback_sse41+0x96c>
+  .byte  15,89,21,33,34,0,0                  // mulps         0x2221(%rip),%xmm2        # 4360 <_sk_callback_sse41+0x97c>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,40,193                           // movaps        %xmm1,%xmm0
   .byte  65,15,40,200                        // movaps        %xmm8,%xmm1
@@ -19822,7 +20103,7 @@ _sk_load_a8_sse41:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,15,56,49,4,56                   // pmovzxbd      (%rax,%rdi,1),%xmm0
   .byte  15,91,216                           // cvtdq2ps      %xmm0,%xmm3
-  .byte  15,89,29,200,33,0,0                 // mulps         0x21c8(%rip),%xmm3        # 4240 <_sk_callback_sse41+0x943>
+  .byte  15,89,29,17,34,0,0                  // mulps         0x2211(%rip),%xmm3        # 4370 <_sk_callback_sse41+0x98c>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  15,87,201                           // xorps         %xmm1,%xmm1
@@ -19855,7 +20136,7 @@ _sk_gather_a8_sse41:
   .byte  102,15,58,32,192,3                  // pinsrb        $0x3,%eax,%xmm0
   .byte  102,15,56,49,192                    // pmovzxbd      %xmm0,%xmm0
   .byte  15,91,216                           // cvtdq2ps      %xmm0,%xmm3
-  .byte  15,89,29,92,33,0,0                  // mulps         0x215c(%rip),%xmm3        # 4250 <_sk_callback_sse41+0x953>
+  .byte  15,89,29,165,33,0,0                 // mulps         0x21a5(%rip),%xmm3        # 4380 <_sk_callback_sse41+0x99c>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
@@ -19868,7 +20149,7 @@ FUNCTION(_sk_store_a8_sse41)
 _sk_store_a8_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,80,33,0,0                // movaps        0x2150(%rip),%xmm8        # 4260 <_sk_callback_sse41+0x963>
+  .byte  68,15,40,5,153,33,0,0               // movaps        0x2199(%rip),%xmm8        # 4390 <_sk_callback_sse41+0x9ac>
   .byte  68,15,89,195                        // mulps         %xmm3,%xmm8
   .byte  102,69,15,91,192                    // cvtps2dq      %xmm8,%xmm8
   .byte  102,69,15,56,43,192                 // packusdw      %xmm8,%xmm8
@@ -19885,9 +20166,9 @@ _sk_load_g8_sse41:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,15,56,49,4,56                   // pmovzxbd      (%rax,%rdi,1),%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,45,33,0,0                   // mulps         0x212d(%rip),%xmm0        # 4270 <_sk_callback_sse41+0x973>
+  .byte  15,89,5,118,33,0,0                  // mulps         0x2176(%rip),%xmm0        # 43a0 <_sk_callback_sse41+0x9bc>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,52,33,0,0                  // movaps        0x2134(%rip),%xmm3        # 4280 <_sk_callback_sse41+0x983>
+  .byte  15,40,29,125,33,0,0                 // movaps        0x217d(%rip),%xmm3        # 43b0 <_sk_callback_sse41+0x9cc>
   .byte  15,40,200                           // movaps        %xmm0,%xmm1
   .byte  15,40,208                           // movaps        %xmm0,%xmm2
   .byte  255,224                             // jmpq          *%rax
@@ -19918,9 +20199,9 @@ _sk_gather_g8_sse41:
   .byte  102,15,58,32,192,3                  // pinsrb        $0x3,%eax,%xmm0
   .byte  102,15,56,49,192                    // pmovzxbd      %xmm0,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,205,32,0,0                  // mulps         0x20cd(%rip),%xmm0        # 4290 <_sk_callback_sse41+0x993>
+  .byte  15,89,5,22,33,0,0                   // mulps         0x2116(%rip),%xmm0        # 43c0 <_sk_callback_sse41+0x9dc>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,212,32,0,0                 // movaps        0x20d4(%rip),%xmm3        # 42a0 <_sk_callback_sse41+0x9a3>
+  .byte  15,40,29,29,33,0,0                  // movaps        0x211d(%rip),%xmm3        # 43d0 <_sk_callback_sse41+0x9ec>
   .byte  15,40,200                           // movaps        %xmm0,%xmm1
   .byte  15,40,208                           // movaps        %xmm0,%xmm2
   .byte  255,224                             // jmpq          *%rax
@@ -19932,9 +20213,9 @@ _sk_gather_i8_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  73,137,192                          // mov           %rax,%r8
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  116,5                               // je            21e3 <_sk_gather_i8_sse41+0xf>
+  .byte  116,5                               // je            22ca <_sk_gather_i8_sse41+0xf>
   .byte  76,137,192                          // mov           %r8,%rax
-  .byte  235,2                               // jmp           21e5 <_sk_gather_i8_sse41+0x11>
+  .byte  235,2                               // jmp           22cc <_sk_gather_i8_sse41+0x11>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  243,15,91,201                       // cvttps2dq     %xmm1,%xmm1
@@ -19965,17 +20246,17 @@ _sk_gather_i8_sse41:
   .byte  102,15,58,34,28,8,1                 // pinsrd        $0x1,(%rax,%rcx,1),%xmm3
   .byte  102,66,15,58,34,28,144,2            // pinsrd        $0x2,(%rax,%r10,4),%xmm3
   .byte  102,66,15,58,34,28,8,3              // pinsrd        $0x3,(%rax,%r9,1),%xmm3
-  .byte  102,15,111,5,43,32,0,0              // movdqa        0x202b(%rip),%xmm0        # 42b0 <_sk_callback_sse41+0x9b3>
+  .byte  102,15,111,5,116,32,0,0             // movdqa        0x2074(%rip),%xmm0        # 43e0 <_sk_callback_sse41+0x9fc>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,44,32,0,0                // movaps        0x202c(%rip),%xmm8        # 42c0 <_sk_callback_sse41+0x9c3>
+  .byte  68,15,40,5,117,32,0,0               // movaps        0x2075(%rip),%xmm8        # 43f0 <_sk_callback_sse41+0xa0c>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
-  .byte  102,15,56,0,13,43,32,0,0            // pshufb        0x202b(%rip),%xmm1        # 42d0 <_sk_callback_sse41+0x9d3>
+  .byte  102,15,56,0,13,116,32,0,0           // pshufb        0x2074(%rip),%xmm1        # 4400 <_sk_callback_sse41+0xa1c>
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,111,211                      // movdqa        %xmm3,%xmm2
-  .byte  102,15,56,0,21,39,32,0,0            // pshufb        0x2027(%rip),%xmm2        # 42e0 <_sk_callback_sse41+0x9e3>
+  .byte  102,15,56,0,21,112,32,0,0           // pshufb        0x2070(%rip),%xmm2        # 4410 <_sk_callback_sse41+0xa2c>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  102,15,114,211,24                   // psrld         $0x18,%xmm3
@@ -19991,19 +20272,19 @@ _sk_load_565_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,15,56,51,20,120                 // pmovzxwd      (%rax,%rdi,2),%xmm2
-  .byte  102,15,111,5,13,32,0,0              // movdqa        0x200d(%rip),%xmm0        # 42f0 <_sk_callback_sse41+0x9f3>
+  .byte  102,15,111,5,86,32,0,0              // movdqa        0x2056(%rip),%xmm0        # 4420 <_sk_callback_sse41+0xa3c>
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,15,32,0,0                   // mulps         0x200f(%rip),%xmm0        # 4300 <_sk_callback_sse41+0xa03>
-  .byte  102,15,111,13,23,32,0,0             // movdqa        0x2017(%rip),%xmm1        # 4310 <_sk_callback_sse41+0xa13>
+  .byte  15,89,5,88,32,0,0                   // mulps         0x2058(%rip),%xmm0        # 4430 <_sk_callback_sse41+0xa4c>
+  .byte  102,15,111,13,96,32,0,0             // movdqa        0x2060(%rip),%xmm1        # 4440 <_sk_callback_sse41+0xa5c>
   .byte  102,15,219,202                      // pand          %xmm2,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,25,32,0,0                  // mulps         0x2019(%rip),%xmm1        # 4320 <_sk_callback_sse41+0xa23>
-  .byte  102,15,219,21,33,32,0,0             // pand          0x2021(%rip),%xmm2        # 4330 <_sk_callback_sse41+0xa33>
+  .byte  15,89,13,98,32,0,0                  // mulps         0x2062(%rip),%xmm1        # 4450 <_sk_callback_sse41+0xa6c>
+  .byte  102,15,219,21,106,32,0,0            // pand          0x206a(%rip),%xmm2        # 4460 <_sk_callback_sse41+0xa7c>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,39,32,0,0                  // mulps         0x2027(%rip),%xmm2        # 4340 <_sk_callback_sse41+0xa43>
+  .byte  15,89,21,112,32,0,0                 // mulps         0x2070(%rip),%xmm2        # 4470 <_sk_callback_sse41+0xa8c>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,46,32,0,0                  // movaps        0x202e(%rip),%xmm3        # 4350 <_sk_callback_sse41+0xa53>
+  .byte  15,40,29,119,32,0,0                 // movaps        0x2077(%rip),%xmm3        # 4480 <_sk_callback_sse41+0xa9c>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_gather_565_sse41
@@ -20031,19 +20312,19 @@ _sk_gather_565_sse41:
   .byte  65,15,183,4,65                      // movzwl        (%r9,%rax,2),%eax
   .byte  102,15,196,192,3                    // pinsrw        $0x3,%eax,%xmm0
   .byte  102,15,56,51,208                    // pmovzxwd      %xmm0,%xmm2
-  .byte  102,15,111,5,211,31,0,0             // movdqa        0x1fd3(%rip),%xmm0        # 4360 <_sk_callback_sse41+0xa63>
+  .byte  102,15,111,5,28,32,0,0              // movdqa        0x201c(%rip),%xmm0        # 4490 <_sk_callback_sse41+0xaac>
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,213,31,0,0                  // mulps         0x1fd5(%rip),%xmm0        # 4370 <_sk_callback_sse41+0xa73>
-  .byte  102,15,111,13,221,31,0,0            // movdqa        0x1fdd(%rip),%xmm1        # 4380 <_sk_callback_sse41+0xa83>
+  .byte  15,89,5,30,32,0,0                   // mulps         0x201e(%rip),%xmm0        # 44a0 <_sk_callback_sse41+0xabc>
+  .byte  102,15,111,13,38,32,0,0             // movdqa        0x2026(%rip),%xmm1        # 44b0 <_sk_callback_sse41+0xacc>
   .byte  102,15,219,202                      // pand          %xmm2,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,223,31,0,0                 // mulps         0x1fdf(%rip),%xmm1        # 4390 <_sk_callback_sse41+0xa93>
-  .byte  102,15,219,21,231,31,0,0            // pand          0x1fe7(%rip),%xmm2        # 43a0 <_sk_callback_sse41+0xaa3>
+  .byte  15,89,13,40,32,0,0                  // mulps         0x2028(%rip),%xmm1        # 44c0 <_sk_callback_sse41+0xadc>
+  .byte  102,15,219,21,48,32,0,0             // pand          0x2030(%rip),%xmm2        # 44d0 <_sk_callback_sse41+0xaec>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,237,31,0,0                 // mulps         0x1fed(%rip),%xmm2        # 43b0 <_sk_callback_sse41+0xab3>
+  .byte  15,89,21,54,32,0,0                  // mulps         0x2036(%rip),%xmm2        # 44e0 <_sk_callback_sse41+0xafc>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,244,31,0,0                 // movaps        0x1ff4(%rip),%xmm3        # 43c0 <_sk_callback_sse41+0xac3>
+  .byte  15,40,29,61,32,0,0                  // movaps        0x203d(%rip),%xmm3        # 44f0 <_sk_callback_sse41+0xb0c>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_store_565_sse41
@@ -20052,12 +20333,12 @@ FUNCTION(_sk_store_565_sse41)
 _sk_store_565_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,245,31,0,0               // movaps        0x1ff5(%rip),%xmm8        # 43d0 <_sk_callback_sse41+0xad3>
+  .byte  68,15,40,5,62,32,0,0                // movaps        0x203e(%rip),%xmm8        # 4500 <_sk_callback_sse41+0xb1c>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
   .byte  102,65,15,114,241,11                // pslld         $0xb,%xmm9
-  .byte  68,15,40,21,234,31,0,0              // movaps        0x1fea(%rip),%xmm10        # 43e0 <_sk_callback_sse41+0xae3>
+  .byte  68,15,40,21,51,32,0,0               // movaps        0x2033(%rip),%xmm10        # 4510 <_sk_callback_sse41+0xb2c>
   .byte  68,15,89,209                        // mulps         %xmm1,%xmm10
   .byte  102,69,15,91,210                    // cvtps2dq      %xmm10,%xmm10
   .byte  102,65,15,114,242,5                 // pslld         $0x5,%xmm10
@@ -20077,21 +20358,21 @@ _sk_load_4444_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,15,56,51,28,120                 // pmovzxwd      (%rax,%rdi,2),%xmm3
-  .byte  102,15,111,5,181,31,0,0             // movdqa        0x1fb5(%rip),%xmm0        # 43f0 <_sk_callback_sse41+0xaf3>
+  .byte  102,15,111,5,254,31,0,0             // movdqa        0x1ffe(%rip),%xmm0        # 4520 <_sk_callback_sse41+0xb3c>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,183,31,0,0                  // mulps         0x1fb7(%rip),%xmm0        # 4400 <_sk_callback_sse41+0xb03>
-  .byte  102,15,111,13,191,31,0,0            // movdqa        0x1fbf(%rip),%xmm1        # 4410 <_sk_callback_sse41+0xb13>
+  .byte  15,89,5,0,32,0,0                    // mulps         0x2000(%rip),%xmm0        # 4530 <_sk_callback_sse41+0xb4c>
+  .byte  102,15,111,13,8,32,0,0              // movdqa        0x2008(%rip),%xmm1        # 4540 <_sk_callback_sse41+0xb5c>
   .byte  102,15,219,203                      // pand          %xmm3,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,193,31,0,0                 // mulps         0x1fc1(%rip),%xmm1        # 4420 <_sk_callback_sse41+0xb23>
-  .byte  102,15,111,21,201,31,0,0            // movdqa        0x1fc9(%rip),%xmm2        # 4430 <_sk_callback_sse41+0xb33>
+  .byte  15,89,13,10,32,0,0                  // mulps         0x200a(%rip),%xmm1        # 4550 <_sk_callback_sse41+0xb6c>
+  .byte  102,15,111,21,18,32,0,0             // movdqa        0x2012(%rip),%xmm2        # 4560 <_sk_callback_sse41+0xb7c>
   .byte  102,15,219,211                      // pand          %xmm3,%xmm2
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,203,31,0,0                 // mulps         0x1fcb(%rip),%xmm2        # 4440 <_sk_callback_sse41+0xb43>
-  .byte  102,15,219,29,211,31,0,0            // pand          0x1fd3(%rip),%xmm3        # 4450 <_sk_callback_sse41+0xb53>
+  .byte  15,89,21,20,32,0,0                  // mulps         0x2014(%rip),%xmm2        # 4570 <_sk_callback_sse41+0xb8c>
+  .byte  102,15,219,29,28,32,0,0             // pand          0x201c(%rip),%xmm3        # 4580 <_sk_callback_sse41+0xb9c>
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,217,31,0,0                 // mulps         0x1fd9(%rip),%xmm3        # 4460 <_sk_callback_sse41+0xb63>
+  .byte  15,89,29,34,32,0,0                  // mulps         0x2022(%rip),%xmm3        # 4590 <_sk_callback_sse41+0xbac>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -20120,21 +20401,21 @@ _sk_gather_4444_sse41:
   .byte  65,15,183,4,65                      // movzwl        (%r9,%rax,2),%eax
   .byte  102,15,196,192,3                    // pinsrw        $0x3,%eax,%xmm0
   .byte  102,15,56,51,216                    // pmovzxwd      %xmm0,%xmm3
-  .byte  102,15,111,5,124,31,0,0             // movdqa        0x1f7c(%rip),%xmm0        # 4470 <_sk_callback_sse41+0xb73>
+  .byte  102,15,111,5,197,31,0,0             // movdqa        0x1fc5(%rip),%xmm0        # 45a0 <_sk_callback_sse41+0xbbc>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,126,31,0,0                  // mulps         0x1f7e(%rip),%xmm0        # 4480 <_sk_callback_sse41+0xb83>
-  .byte  102,15,111,13,134,31,0,0            // movdqa        0x1f86(%rip),%xmm1        # 4490 <_sk_callback_sse41+0xb93>
+  .byte  15,89,5,199,31,0,0                  // mulps         0x1fc7(%rip),%xmm0        # 45b0 <_sk_callback_sse41+0xbcc>
+  .byte  102,15,111,13,207,31,0,0            // movdqa        0x1fcf(%rip),%xmm1        # 45c0 <_sk_callback_sse41+0xbdc>
   .byte  102,15,219,203                      // pand          %xmm3,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,136,31,0,0                 // mulps         0x1f88(%rip),%xmm1        # 44a0 <_sk_callback_sse41+0xba3>
-  .byte  102,15,111,21,144,31,0,0            // movdqa        0x1f90(%rip),%xmm2        # 44b0 <_sk_callback_sse41+0xbb3>
+  .byte  15,89,13,209,31,0,0                 // mulps         0x1fd1(%rip),%xmm1        # 45d0 <_sk_callback_sse41+0xbec>
+  .byte  102,15,111,21,217,31,0,0            // movdqa        0x1fd9(%rip),%xmm2        # 45e0 <_sk_callback_sse41+0xbfc>
   .byte  102,15,219,211                      // pand          %xmm3,%xmm2
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,146,31,0,0                 // mulps         0x1f92(%rip),%xmm2        # 44c0 <_sk_callback_sse41+0xbc3>
-  .byte  102,15,219,29,154,31,0,0            // pand          0x1f9a(%rip),%xmm3        # 44d0 <_sk_callback_sse41+0xbd3>
+  .byte  15,89,21,219,31,0,0                 // mulps         0x1fdb(%rip),%xmm2        # 45f0 <_sk_callback_sse41+0xc0c>
+  .byte  102,15,219,29,227,31,0,0            // pand          0x1fe3(%rip),%xmm3        # 4600 <_sk_callback_sse41+0xc1c>
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,160,31,0,0                 // mulps         0x1fa0(%rip),%xmm3        # 44e0 <_sk_callback_sse41+0xbe3>
+  .byte  15,89,29,233,31,0,0                 // mulps         0x1fe9(%rip),%xmm3        # 4610 <_sk_callback_sse41+0xc2c>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -20144,7 +20425,7 @@ FUNCTION(_sk_store_4444_sse41)
 _sk_store_4444_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,159,31,0,0               // movaps        0x1f9f(%rip),%xmm8        # 44f0 <_sk_callback_sse41+0xbf3>
+  .byte  68,15,40,5,232,31,0,0               // movaps        0x1fe8(%rip),%xmm8        # 4620 <_sk_callback_sse41+0xc3c>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
@@ -20174,17 +20455,17 @@ _sk_load_8888_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  15,16,28,184                        // movups        (%rax,%rdi,4),%xmm3
-  .byte  15,40,5,62,31,0,0                   // movaps        0x1f3e(%rip),%xmm0        # 4500 <_sk_callback_sse41+0xc03>
+  .byte  15,40,5,135,31,0,0                  // movaps        0x1f87(%rip),%xmm0        # 4630 <_sk_callback_sse41+0xc4c>
   .byte  15,84,195                           // andps         %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,64,31,0,0                // movaps        0x1f40(%rip),%xmm8        # 4510 <_sk_callback_sse41+0xc13>
+  .byte  68,15,40,5,137,31,0,0               // movaps        0x1f89(%rip),%xmm8        # 4640 <_sk_callback_sse41+0xc5c>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,40,203                           // movaps        %xmm3,%xmm1
-  .byte  102,15,56,0,13,64,31,0,0            // pshufb        0x1f40(%rip),%xmm1        # 4520 <_sk_callback_sse41+0xc23>
+  .byte  102,15,56,0,13,137,31,0,0           // pshufb        0x1f89(%rip),%xmm1        # 4650 <_sk_callback_sse41+0xc6c>
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  15,40,211                           // movaps        %xmm3,%xmm2
-  .byte  102,15,56,0,21,61,31,0,0            // pshufb        0x1f3d(%rip),%xmm2        # 4530 <_sk_callback_sse41+0xc33>
+  .byte  102,15,56,0,21,134,31,0,0           // pshufb        0x1f86(%rip),%xmm2        # 4660 <_sk_callback_sse41+0xc7c>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  102,15,114,211,24                   // psrld         $0x18,%xmm3
@@ -20215,17 +20496,17 @@ _sk_gather_8888_sse41:
   .byte  102,65,15,58,34,28,129,1            // pinsrd        $0x1,(%r9,%rax,4),%xmm3
   .byte  102,67,15,58,34,28,145,2            // pinsrd        $0x2,(%r9,%r10,4),%xmm3
   .byte  102,65,15,58,34,28,137,3            // pinsrd        $0x3,(%r9,%rcx,4),%xmm3
-  .byte  102,15,111,5,214,30,0,0             // movdqa        0x1ed6(%rip),%xmm0        # 4540 <_sk_callback_sse41+0xc43>
+  .byte  102,15,111,5,31,31,0,0              // movdqa        0x1f1f(%rip),%xmm0        # 4670 <_sk_callback_sse41+0xc8c>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,215,30,0,0               // movaps        0x1ed7(%rip),%xmm8        # 4550 <_sk_callback_sse41+0xc53>
+  .byte  68,15,40,5,32,31,0,0                // movaps        0x1f20(%rip),%xmm8        # 4680 <_sk_callback_sse41+0xc9c>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
-  .byte  102,15,56,0,13,214,30,0,0           // pshufb        0x1ed6(%rip),%xmm1        # 4560 <_sk_callback_sse41+0xc63>
+  .byte  102,15,56,0,13,31,31,0,0            // pshufb        0x1f1f(%rip),%xmm1        # 4690 <_sk_callback_sse41+0xcac>
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,111,211                      // movdqa        %xmm3,%xmm2
-  .byte  102,15,56,0,21,210,30,0,0           // pshufb        0x1ed2(%rip),%xmm2        # 4570 <_sk_callback_sse41+0xc73>
+  .byte  102,15,56,0,21,27,31,0,0            // pshufb        0x1f1b(%rip),%xmm2        # 46a0 <_sk_callback_sse41+0xcbc>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  102,15,114,211,24                   // psrld         $0x18,%xmm3
@@ -20240,7 +20521,7 @@ FUNCTION(_sk_store_8888_sse41)
 _sk_store_8888_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,190,30,0,0               // movaps        0x1ebe(%rip),%xmm8        # 4580 <_sk_callback_sse41+0xc83>
+  .byte  68,15,40,5,7,31,0,0                 // movaps        0x1f07(%rip),%xmm8        # 46b0 <_sk_callback_sse41+0xccc>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
@@ -20277,18 +20558,18 @@ _sk_load_f16_sse41:
   .byte  102,68,15,97,216                    // punpcklwd     %xmm0,%xmm11
   .byte  102,68,15,105,200                   // punpckhwd     %xmm0,%xmm9
   .byte  102,65,15,56,51,203                 // pmovzxwd      %xmm11,%xmm1
-  .byte  102,68,15,111,5,55,30,0,0           // movdqa        0x1e37(%rip),%xmm8        # 4590 <_sk_callback_sse41+0xc93>
+  .byte  102,68,15,111,5,128,30,0,0          // movdqa        0x1e80(%rip),%xmm8        # 46c0 <_sk_callback_sse41+0xcdc>
   .byte  102,15,111,209                      // movdqa        %xmm1,%xmm2
   .byte  102,65,15,219,208                   // pand          %xmm8,%xmm2
   .byte  102,15,239,202                      // pxor          %xmm2,%xmm1
-  .byte  102,15,111,29,50,30,0,0             // movdqa        0x1e32(%rip),%xmm3        # 45a0 <_sk_callback_sse41+0xca3>
+  .byte  102,15,111,29,123,30,0,0            // movdqa        0x1e7b(%rip),%xmm3        # 46d0 <_sk_callback_sse41+0xcec>
   .byte  102,15,114,242,16                   // pslld         $0x10,%xmm2
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,15,56,63,195                    // pmaxud        %xmm3,%xmm0
   .byte  102,15,118,193                      // pcmpeqd       %xmm1,%xmm0
   .byte  102,15,114,241,13                   // pslld         $0xd,%xmm1
   .byte  102,15,235,202                      // por           %xmm2,%xmm1
-  .byte  102,68,15,111,21,30,30,0,0          // movdqa        0x1e1e(%rip),%xmm10        # 45b0 <_sk_callback_sse41+0xcb3>
+  .byte  102,68,15,111,21,103,30,0,0         // movdqa        0x1e67(%rip),%xmm10        # 46e0 <_sk_callback_sse41+0xcfc>
   .byte  102,65,15,254,202                   // paddd         %xmm10,%xmm1
   .byte  102,15,219,193                      // pand          %xmm1,%xmm0
   .byte  102,65,15,115,219,8                 // psrldq        $0x8,%xmm11
@@ -20361,18 +20642,18 @@ _sk_gather_f16_sse41:
   .byte  102,68,15,97,218                    // punpcklwd     %xmm2,%xmm11
   .byte  102,68,15,105,202                   // punpckhwd     %xmm2,%xmm9
   .byte  102,65,15,56,51,203                 // pmovzxwd      %xmm11,%xmm1
-  .byte  102,68,15,111,5,220,28,0,0          // movdqa        0x1cdc(%rip),%xmm8        # 45c0 <_sk_callback_sse41+0xcc3>
+  .byte  102,68,15,111,5,37,29,0,0           // movdqa        0x1d25(%rip),%xmm8        # 46f0 <_sk_callback_sse41+0xd0c>
   .byte  102,15,111,209                      // movdqa        %xmm1,%xmm2
   .byte  102,65,15,219,208                   // pand          %xmm8,%xmm2
   .byte  102,15,239,202                      // pxor          %xmm2,%xmm1
-  .byte  102,15,111,29,215,28,0,0            // movdqa        0x1cd7(%rip),%xmm3        # 45d0 <_sk_callback_sse41+0xcd3>
+  .byte  102,15,111,29,32,29,0,0             // movdqa        0x1d20(%rip),%xmm3        # 4700 <_sk_callback_sse41+0xd1c>
   .byte  102,15,114,242,16                   // pslld         $0x10,%xmm2
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,15,56,63,195                    // pmaxud        %xmm3,%xmm0
   .byte  102,15,118,193                      // pcmpeqd       %xmm1,%xmm0
   .byte  102,15,114,241,13                   // pslld         $0xd,%xmm1
   .byte  102,15,235,202                      // por           %xmm2,%xmm1
-  .byte  102,68,15,111,21,195,28,0,0         // movdqa        0x1cc3(%rip),%xmm10        # 45e0 <_sk_callback_sse41+0xce3>
+  .byte  102,68,15,111,21,12,29,0,0          // movdqa        0x1d0c(%rip),%xmm10        # 4710 <_sk_callback_sse41+0xd2c>
   .byte  102,65,15,254,202                   // paddd         %xmm10,%xmm1
   .byte  102,15,219,193                      // pand          %xmm1,%xmm0
   .byte  102,65,15,115,219,8                 // psrldq        $0x8,%xmm11
@@ -20420,17 +20701,17 @@ FUNCTION(_sk_store_f16_sse41)
 _sk_store_f16_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  102,68,15,111,21,249,27,0,0         // movdqa        0x1bf9(%rip),%xmm10        # 45f0 <_sk_callback_sse41+0xcf3>
+  .byte  102,68,15,111,21,66,28,0,0          // movdqa        0x1c42(%rip),%xmm10        # 4720 <_sk_callback_sse41+0xd3c>
   .byte  102,68,15,111,224                   // movdqa        %xmm0,%xmm12
   .byte  102,68,15,111,232                   // movdqa        %xmm0,%xmm13
   .byte  102,69,15,219,234                   // pand          %xmm10,%xmm13
   .byte  102,69,15,239,229                   // pxor          %xmm13,%xmm12
-  .byte  102,68,15,111,13,236,27,0,0         // movdqa        0x1bec(%rip),%xmm9        # 4600 <_sk_callback_sse41+0xd03>
+  .byte  102,68,15,111,13,53,28,0,0          // movdqa        0x1c35(%rip),%xmm9        # 4730 <_sk_callback_sse41+0xd4c>
   .byte  102,65,15,114,213,16                // psrld         $0x10,%xmm13
   .byte  102,69,15,111,193                   // movdqa        %xmm9,%xmm8
   .byte  102,69,15,102,196                   // pcmpgtd       %xmm12,%xmm8
   .byte  102,65,15,114,212,13                // psrld         $0xd,%xmm12
-  .byte  102,68,15,111,29,221,27,0,0         // movdqa        0x1bdd(%rip),%xmm11        # 4610 <_sk_callback_sse41+0xd13>
+  .byte  102,68,15,111,29,38,28,0,0          // movdqa        0x1c26(%rip),%xmm11        # 4740 <_sk_callback_sse41+0xd5c>
   .byte  102,69,15,235,235                   // por           %xmm11,%xmm13
   .byte  102,69,15,254,236                   // paddd         %xmm12,%xmm13
   .byte  102,69,15,223,197                   // pandn         %xmm13,%xmm8
@@ -20500,7 +20781,7 @@ _sk_load_u16_be_sse41:
   .byte  102,15,235,200                      // por           %xmm0,%xmm1
   .byte  102,15,56,51,193                    // pmovzxwd      %xmm1,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,172,26,0,0               // movaps        0x1aac(%rip),%xmm8        # 4620 <_sk_callback_sse41+0xd23>
+  .byte  68,15,40,5,245,26,0,0               // movaps        0x1af5(%rip),%xmm8        # 4750 <_sk_callback_sse41+0xd6c>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
   .byte  102,15,113,241,8                    // psllw         $0x8,%xmm1
@@ -20552,7 +20833,7 @@ _sk_load_rgb_u16_be_sse41:
   .byte  102,15,235,193                      // por           %xmm1,%xmm0
   .byte  102,15,56,51,192                    // pmovzxwd      %xmm0,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,237,25,0,0               // movaps        0x19ed(%rip),%xmm8        # 4630 <_sk_callback_sse41+0xd33>
+  .byte  68,15,40,5,54,26,0,0                // movaps        0x1a36(%rip),%xmm8        # 4760 <_sk_callback_sse41+0xd7c>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
   .byte  102,15,113,241,8                    // psllw         $0x8,%xmm1
@@ -20569,7 +20850,7 @@ _sk_load_rgb_u16_be_sse41:
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,180,25,0,0                 // movaps        0x19b4(%rip),%xmm3        # 4640 <_sk_callback_sse41+0xd43>
+  .byte  15,40,29,253,25,0,0                 // movaps        0x19fd(%rip),%xmm3        # 4770 <_sk_callback_sse41+0xd8c>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_store_u16_be_sse41
@@ -20578,7 +20859,7 @@ FUNCTION(_sk_store_u16_be_sse41)
 _sk_store_u16_be_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,13,181,25,0,0              // movaps        0x19b5(%rip),%xmm9        # 4650 <_sk_callback_sse41+0xd53>
+  .byte  68,15,40,13,254,25,0,0              // movaps        0x19fe(%rip),%xmm9        # 4780 <_sk_callback_sse41+0xd9c>
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  102,69,15,91,192                    // cvtps2dq      %xmm8,%xmm8
@@ -20801,10 +21082,10 @@ HIDDEN _sk_luminance_to_alpha_sse41
 FUNCTION(_sk_luminance_to_alpha_sse41)
 _sk_luminance_to_alpha_sse41:
   .byte  15,40,218                           // movaps        %xmm2,%xmm3
-  .byte  15,89,5,211,22,0,0                  // mulps         0x16d3(%rip),%xmm0        # 4660 <_sk_callback_sse41+0xd63>
-  .byte  15,89,13,220,22,0,0                 // mulps         0x16dc(%rip),%xmm1        # 4670 <_sk_callback_sse41+0xd73>
+  .byte  15,89,5,28,23,0,0                   // mulps         0x171c(%rip),%xmm0        # 4790 <_sk_callback_sse41+0xdac>
+  .byte  15,89,13,37,23,0,0                  // mulps         0x1725(%rip),%xmm1        # 47a0 <_sk_callback_sse41+0xdbc>
   .byte  15,88,200                           // addps         %xmm0,%xmm1
-  .byte  15,89,29,226,22,0,0                 // mulps         0x16e2(%rip),%xmm3        # 4680 <_sk_callback_sse41+0xd83>
+  .byte  15,89,29,43,23,0,0                  // mulps         0x172b(%rip),%xmm3        # 47b0 <_sk_callback_sse41+0xdcc>
   .byte  15,88,217                           // addps         %xmm1,%xmm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
@@ -21037,7 +21318,7 @@ _sk_linear_gradient_sse41:
   .byte  69,15,198,237,0                     // shufps        $0x0,%xmm13,%xmm13
   .byte  72,139,8                            // mov           (%rax),%rcx
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,132,254,0,0,0                    // je            3440 <_sk_linear_gradient_sse41+0x138>
+  .byte  15,132,254,0,0,0                    // je            3527 <_sk_linear_gradient_sse41+0x138>
   .byte  15,41,100,36,168                    // movaps        %xmm4,-0x58(%rsp)
   .byte  15,41,108,36,184                    // movaps        %xmm5,-0x48(%rsp)
   .byte  15,41,116,36,200                    // movaps        %xmm6,-0x38(%rsp)
@@ -21087,12 +21368,12 @@ _sk_linear_gradient_sse41:
   .byte  15,40,196                           // movaps        %xmm4,%xmm0
   .byte  72,131,192,36                       // add           $0x24,%rax
   .byte  72,255,201                          // dec           %rcx
-  .byte  15,133,65,255,255,255               // jne           336b <_sk_linear_gradient_sse41+0x63>
+  .byte  15,133,65,255,255,255               // jne           3452 <_sk_linear_gradient_sse41+0x63>
   .byte  15,40,124,36,216                    // movaps        -0x28(%rsp),%xmm7
   .byte  15,40,116,36,200                    // movaps        -0x38(%rsp),%xmm6
   .byte  15,40,108,36,184                    // movaps        -0x48(%rsp),%xmm5
   .byte  15,40,100,36,168                    // movaps        -0x58(%rsp),%xmm4
-  .byte  235,13                              // jmp           344d <_sk_linear_gradient_sse41+0x145>
+  .byte  235,13                              // jmp           3534 <_sk_linear_gradient_sse41+0x145>
   .byte  15,87,201                           // xorps         %xmm1,%xmm1
   .byte  15,87,210                           // xorps         %xmm2,%xmm2
   .byte  15,87,219                           // xorps         %xmm3,%xmm3
@@ -21161,26 +21442,26 @@ _sk_xy_to_polar_unit_sse41:
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,40,236                        // movaps        %xmm12,%xmm13
   .byte  69,15,89,237                        // mulps         %xmm13,%xmm13
-  .byte  68,15,40,21,107,17,0,0              // movaps        0x116b(%rip),%xmm10        # 4690 <_sk_callback_sse41+0xd93>
+  .byte  68,15,40,21,180,17,0,0              // movaps        0x11b4(%rip),%xmm10        # 47c0 <_sk_callback_sse41+0xddc>
   .byte  69,15,89,213                        // mulps         %xmm13,%xmm10
-  .byte  68,15,88,21,111,17,0,0              // addps         0x116f(%rip),%xmm10        # 46a0 <_sk_callback_sse41+0xda3>
+  .byte  68,15,88,21,184,17,0,0              // addps         0x11b8(%rip),%xmm10        # 47d0 <_sk_callback_sse41+0xdec>
   .byte  69,15,89,213                        // mulps         %xmm13,%xmm10
-  .byte  68,15,88,21,115,17,0,0              // addps         0x1173(%rip),%xmm10        # 46b0 <_sk_callback_sse41+0xdb3>
+  .byte  68,15,88,21,188,17,0,0              // addps         0x11bc(%rip),%xmm10        # 47e0 <_sk_callback_sse41+0xdfc>
   .byte  69,15,89,213                        // mulps         %xmm13,%xmm10
-  .byte  68,15,88,21,119,17,0,0              // addps         0x1177(%rip),%xmm10        # 46c0 <_sk_callback_sse41+0xdc3>
+  .byte  68,15,88,21,192,17,0,0              // addps         0x11c0(%rip),%xmm10        # 47f0 <_sk_callback_sse41+0xe0c>
   .byte  69,15,89,212                        // mulps         %xmm12,%xmm10
   .byte  65,15,194,195,1                     // cmpltps       %xmm11,%xmm0
-  .byte  68,15,40,29,118,17,0,0              // movaps        0x1176(%rip),%xmm11        # 46d0 <_sk_callback_sse41+0xdd3>
+  .byte  68,15,40,29,191,17,0,0              // movaps        0x11bf(%rip),%xmm11        # 4800 <_sk_callback_sse41+0xe1c>
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
   .byte  102,69,15,56,20,211                 // blendvps      %xmm0,%xmm11,%xmm10
   .byte  69,15,194,200,1                     // cmpltps       %xmm8,%xmm9
-  .byte  68,15,40,29,111,17,0,0              // movaps        0x116f(%rip),%xmm11        # 46e0 <_sk_callback_sse41+0xde3>
+  .byte  68,15,40,29,184,17,0,0              // movaps        0x11b8(%rip),%xmm11        # 4810 <_sk_callback_sse41+0xe2c>
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  102,69,15,56,20,211                 // blendvps      %xmm0,%xmm11,%xmm10
   .byte  15,40,193                           // movaps        %xmm1,%xmm0
   .byte  65,15,194,192,1                     // cmpltps       %xmm8,%xmm0
-  .byte  68,15,40,13,97,17,0,0               // movaps        0x1161(%rip),%xmm9        # 46f0 <_sk_callback_sse41+0xdf3>
+  .byte  68,15,40,13,170,17,0,0              // movaps        0x11aa(%rip),%xmm9        # 4820 <_sk_callback_sse41+0xe3c>
   .byte  69,15,92,202                        // subps         %xmm10,%xmm9
   .byte  102,69,15,56,20,209                 // blendvps      %xmm0,%xmm9,%xmm10
   .byte  69,15,194,194,7                     // cmpordps      %xmm10,%xmm8
@@ -21194,7 +21475,7 @@ HIDDEN _sk_save_xy_sse41
 FUNCTION(_sk_save_xy_sse41)
 _sk_save_xy_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,76,17,0,0                // movaps        0x114c(%rip),%xmm8        # 4700 <_sk_callback_sse41+0xe03>
+  .byte  68,15,40,5,149,17,0,0               // movaps        0x1195(%rip),%xmm8        # 4830 <_sk_callback_sse41+0xe4c>
   .byte  15,17,0                             // movups        %xmm0,(%rax)
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,88,200                        // addps         %xmm8,%xmm9
@@ -21238,8 +21519,8 @@ _sk_bilinear_nx_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,206,16,0,0                  // addps         0x10ce(%rip),%xmm0        # 4710 <_sk_callback_sse41+0xe13>
-  .byte  68,15,40,13,214,16,0,0              // movaps        0x10d6(%rip),%xmm9        # 4720 <_sk_callback_sse41+0xe23>
+  .byte  15,88,5,23,17,0,0                   // addps         0x1117(%rip),%xmm0        # 4840 <_sk_callback_sse41+0xe5c>
+  .byte  68,15,40,13,31,17,0,0               // movaps        0x111f(%rip),%xmm9        # 4850 <_sk_callback_sse41+0xe6c>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,128,0,0,0              // movups        %xmm9,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21252,7 +21533,7 @@ _sk_bilinear_px_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,197,16,0,0                  // addps         0x10c5(%rip),%xmm0        # 4730 <_sk_callback_sse41+0xe33>
+  .byte  15,88,5,14,17,0,0                   // addps         0x110e(%rip),%xmm0        # 4860 <_sk_callback_sse41+0xe7c>
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -21264,8 +21545,8 @@ _sk_bilinear_ny_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,183,16,0,0                 // addps         0x10b7(%rip),%xmm1        # 4740 <_sk_callback_sse41+0xe43>
-  .byte  68,15,40,13,191,16,0,0              // movaps        0x10bf(%rip),%xmm9        # 4750 <_sk_callback_sse41+0xe53>
+  .byte  15,88,13,0,17,0,0                   // addps         0x1100(%rip),%xmm1        # 4870 <_sk_callback_sse41+0xe8c>
+  .byte  68,15,40,13,8,17,0,0                // movaps        0x1108(%rip),%xmm9        # 4880 <_sk_callback_sse41+0xe9c>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,160,0,0,0              // movups        %xmm9,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21278,7 +21559,7 @@ _sk_bilinear_py_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,173,16,0,0                 // addps         0x10ad(%rip),%xmm1        # 4760 <_sk_callback_sse41+0xe63>
+  .byte  15,88,13,246,16,0,0                 // addps         0x10f6(%rip),%xmm1        # 4890 <_sk_callback_sse41+0xeac>
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -21290,13 +21571,13 @@ _sk_bicubic_n3x_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,160,16,0,0                  // addps         0x10a0(%rip),%xmm0        # 4770 <_sk_callback_sse41+0xe73>
-  .byte  68,15,40,13,168,16,0,0              // movaps        0x10a8(%rip),%xmm9        # 4780 <_sk_callback_sse41+0xe83>
+  .byte  15,88,5,233,16,0,0                  // addps         0x10e9(%rip),%xmm0        # 48a0 <_sk_callback_sse41+0xebc>
+  .byte  68,15,40,13,241,16,0,0              // movaps        0x10f1(%rip),%xmm9        # 48b0 <_sk_callback_sse41+0xecc>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,164,16,0,0              // mulps         0x10a4(%rip),%xmm9        # 4790 <_sk_callback_sse41+0xe93>
-  .byte  68,15,88,13,172,16,0,0              // addps         0x10ac(%rip),%xmm9        # 47a0 <_sk_callback_sse41+0xea3>
+  .byte  68,15,89,13,237,16,0,0              // mulps         0x10ed(%rip),%xmm9        # 48c0 <_sk_callback_sse41+0xedc>
+  .byte  68,15,88,13,245,16,0,0              // addps         0x10f5(%rip),%xmm9        # 48d0 <_sk_callback_sse41+0xeec>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,128,0,0,0              // movups        %xmm9,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21309,16 +21590,16 @@ _sk_bicubic_n1x_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,155,16,0,0                  // addps         0x109b(%rip),%xmm0        # 47b0 <_sk_callback_sse41+0xeb3>
-  .byte  68,15,40,13,163,16,0,0              // movaps        0x10a3(%rip),%xmm9        # 47c0 <_sk_callback_sse41+0xec3>
+  .byte  15,88,5,228,16,0,0                  // addps         0x10e4(%rip),%xmm0        # 48e0 <_sk_callback_sse41+0xefc>
+  .byte  68,15,40,13,236,16,0,0              // movaps        0x10ec(%rip),%xmm9        # 48f0 <_sk_callback_sse41+0xf0c>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,167,16,0,0               // movaps        0x10a7(%rip),%xmm8        # 47d0 <_sk_callback_sse41+0xed3>
+  .byte  68,15,40,5,240,16,0,0               // movaps        0x10f0(%rip),%xmm8        # 4900 <_sk_callback_sse41+0xf1c>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,171,16,0,0               // addps         0x10ab(%rip),%xmm8        # 47e0 <_sk_callback_sse41+0xee3>
+  .byte  68,15,88,5,244,16,0,0               // addps         0x10f4(%rip),%xmm8        # 4910 <_sk_callback_sse41+0xf2c>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,175,16,0,0               // addps         0x10af(%rip),%xmm8        # 47f0 <_sk_callback_sse41+0xef3>
+  .byte  68,15,88,5,248,16,0,0               // addps         0x10f8(%rip),%xmm8        # 4920 <_sk_callback_sse41+0xf3c>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,179,16,0,0               // addps         0x10b3(%rip),%xmm8        # 4800 <_sk_callback_sse41+0xf03>
+  .byte  68,15,88,5,252,16,0,0               // addps         0x10fc(%rip),%xmm8        # 4930 <_sk_callback_sse41+0xf4c>
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -21328,17 +21609,17 @@ HIDDEN _sk_bicubic_p1x_sse41
 FUNCTION(_sk_bicubic_p1x_sse41)
 _sk_bicubic_p1x_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,173,16,0,0               // movaps        0x10ad(%rip),%xmm8        # 4810 <_sk_callback_sse41+0xf13>
+  .byte  68,15,40,5,246,16,0,0               // movaps        0x10f6(%rip),%xmm8        # 4940 <_sk_callback_sse41+0xf5c>
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,72,64                      // movups        0x40(%rax),%xmm9
   .byte  65,15,88,192                        // addps         %xmm8,%xmm0
-  .byte  68,15,40,21,169,16,0,0              // movaps        0x10a9(%rip),%xmm10        # 4820 <_sk_callback_sse41+0xf23>
+  .byte  68,15,40,21,242,16,0,0              // movaps        0x10f2(%rip),%xmm10        # 4950 <_sk_callback_sse41+0xf6c>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,173,16,0,0              // addps         0x10ad(%rip),%xmm10        # 4830 <_sk_callback_sse41+0xf33>
+  .byte  68,15,88,21,246,16,0,0              // addps         0x10f6(%rip),%xmm10        # 4960 <_sk_callback_sse41+0xf7c>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,169,16,0,0              // addps         0x10a9(%rip),%xmm10        # 4840 <_sk_callback_sse41+0xf43>
+  .byte  68,15,88,21,242,16,0,0              // addps         0x10f2(%rip),%xmm10        # 4970 <_sk_callback_sse41+0xf8c>
   .byte  68,15,17,144,128,0,0,0              // movups        %xmm10,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -21350,11 +21631,11 @@ _sk_bicubic_p3x_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,156,16,0,0                  // addps         0x109c(%rip),%xmm0        # 4850 <_sk_callback_sse41+0xf53>
+  .byte  15,88,5,229,16,0,0                  // addps         0x10e5(%rip),%xmm0        # 4980 <_sk_callback_sse41+0xf9c>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,156,16,0,0               // mulps         0x109c(%rip),%xmm8        # 4860 <_sk_callback_sse41+0xf63>
-  .byte  68,15,88,5,164,16,0,0               // addps         0x10a4(%rip),%xmm8        # 4870 <_sk_callback_sse41+0xf73>
+  .byte  68,15,89,5,229,16,0,0               // mulps         0x10e5(%rip),%xmm8        # 4990 <_sk_callback_sse41+0xfac>
+  .byte  68,15,88,5,237,16,0,0               // addps         0x10ed(%rip),%xmm8        # 49a0 <_sk_callback_sse41+0xfbc>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21367,13 +21648,13 @@ _sk_bicubic_n3y_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,146,16,0,0                 // addps         0x1092(%rip),%xmm1        # 4880 <_sk_callback_sse41+0xf83>
-  .byte  68,15,40,13,154,16,0,0              // movaps        0x109a(%rip),%xmm9        # 4890 <_sk_callback_sse41+0xf93>
+  .byte  15,88,13,219,16,0,0                 // addps         0x10db(%rip),%xmm1        # 49b0 <_sk_callback_sse41+0xfcc>
+  .byte  68,15,40,13,227,16,0,0              // movaps        0x10e3(%rip),%xmm9        # 49c0 <_sk_callback_sse41+0xfdc>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,150,16,0,0              // mulps         0x1096(%rip),%xmm9        # 48a0 <_sk_callback_sse41+0xfa3>
-  .byte  68,15,88,13,158,16,0,0              // addps         0x109e(%rip),%xmm9        # 48b0 <_sk_callback_sse41+0xfb3>
+  .byte  68,15,89,13,223,16,0,0              // mulps         0x10df(%rip),%xmm9        # 49d0 <_sk_callback_sse41+0xfec>
+  .byte  68,15,88,13,231,16,0,0              // addps         0x10e7(%rip),%xmm9        # 49e0 <_sk_callback_sse41+0xffc>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,160,0,0,0              // movups        %xmm9,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21386,16 +21667,16 @@ _sk_bicubic_n1y_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,140,16,0,0                 // addps         0x108c(%rip),%xmm1        # 48c0 <_sk_callback_sse41+0xfc3>
-  .byte  68,15,40,13,148,16,0,0              // movaps        0x1094(%rip),%xmm9        # 48d0 <_sk_callback_sse41+0xfd3>
+  .byte  15,88,13,213,16,0,0                 // addps         0x10d5(%rip),%xmm1        # 49f0 <_sk_callback_sse41+0x100c>
+  .byte  68,15,40,13,221,16,0,0              // movaps        0x10dd(%rip),%xmm9        # 4a00 <_sk_callback_sse41+0x101c>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,152,16,0,0               // movaps        0x1098(%rip),%xmm8        # 48e0 <_sk_callback_sse41+0xfe3>
+  .byte  68,15,40,5,225,16,0,0               // movaps        0x10e1(%rip),%xmm8        # 4a10 <_sk_callback_sse41+0x102c>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,156,16,0,0               // addps         0x109c(%rip),%xmm8        # 48f0 <_sk_callback_sse41+0xff3>
+  .byte  68,15,88,5,229,16,0,0               // addps         0x10e5(%rip),%xmm8        # 4a20 <_sk_callback_sse41+0x103c>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,160,16,0,0               // addps         0x10a0(%rip),%xmm8        # 4900 <_sk_callback_sse41+0x1003>
+  .byte  68,15,88,5,233,16,0,0               // addps         0x10e9(%rip),%xmm8        # 4a30 <_sk_callback_sse41+0x104c>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,164,16,0,0               // addps         0x10a4(%rip),%xmm8        # 4910 <_sk_callback_sse41+0x1013>
+  .byte  68,15,88,5,237,16,0,0               // addps         0x10ed(%rip),%xmm8        # 4a40 <_sk_callback_sse41+0x105c>
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -21405,17 +21686,17 @@ HIDDEN _sk_bicubic_p1y_sse41
 FUNCTION(_sk_bicubic_p1y_sse41)
 _sk_bicubic_p1y_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,158,16,0,0               // movaps        0x109e(%rip),%xmm8        # 4920 <_sk_callback_sse41+0x1023>
+  .byte  68,15,40,5,231,16,0,0               // movaps        0x10e7(%rip),%xmm8        # 4a50 <_sk_callback_sse41+0x106c>
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,72,96                      // movups        0x60(%rax),%xmm9
   .byte  65,15,88,200                        // addps         %xmm8,%xmm1
-  .byte  68,15,40,21,153,16,0,0              // movaps        0x1099(%rip),%xmm10        # 4930 <_sk_callback_sse41+0x1033>
+  .byte  68,15,40,21,226,16,0,0              // movaps        0x10e2(%rip),%xmm10        # 4a60 <_sk_callback_sse41+0x107c>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,157,16,0,0              // addps         0x109d(%rip),%xmm10        # 4940 <_sk_callback_sse41+0x1043>
+  .byte  68,15,88,21,230,16,0,0              // addps         0x10e6(%rip),%xmm10        # 4a70 <_sk_callback_sse41+0x108c>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,153,16,0,0              // addps         0x1099(%rip),%xmm10        # 4950 <_sk_callback_sse41+0x1053>
+  .byte  68,15,88,21,226,16,0,0              // addps         0x10e2(%rip),%xmm10        # 4a80 <_sk_callback_sse41+0x109c>
   .byte  68,15,17,144,160,0,0,0              // movups        %xmm10,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -21427,11 +21708,11 @@ _sk_bicubic_p3y_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,139,16,0,0                 // addps         0x108b(%rip),%xmm1        # 4960 <_sk_callback_sse41+0x1063>
+  .byte  15,88,13,212,16,0,0                 // addps         0x10d4(%rip),%xmm1        # 4a90 <_sk_callback_sse41+0x10ac>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,139,16,0,0               // mulps         0x108b(%rip),%xmm8        # 4970 <_sk_callback_sse41+0x1073>
-  .byte  68,15,88,5,147,16,0,0               // addps         0x1093(%rip),%xmm8        # 4980 <_sk_callback_sse41+0x1083>
+  .byte  68,15,89,5,212,16,0,0               // mulps         0x10d4(%rip),%xmm8        # 4aa0 <_sk_callback_sse41+0x10bc>
+  .byte  68,15,88,5,220,16,0,0               // addps         0x10dc(%rip),%xmm8        # 4ab0 <_sk_callback_sse41+0x10cc>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -21520,6 +21801,40 @@ BALIGN16
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
   .byte  63                                  // (bad)
   .byte  0,0                                 // add           %al,(%rax)
+  .byte  128,63,1                            // cmpb          $0x1,(%rdi)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,1                                 // add           %al,(%rcx)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,1                                 // add           %al,(%rcx)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,1                                 // add           %al,(%rcx)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,4,0                               // add           %al,(%rax,%rax,1)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  4,0                                 // add           $0x0,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  4,0                                 // add           $0x0,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  4,0                                 // add           $0x0,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  2,0                                 // add           (%rax),%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  2,0                                 // add           (%rax),%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  2,0                                 // add           (%rax),%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  2,0                                 // add           (%rax),%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  33,8                                // and           %ecx,(%rax)
+  .byte  130                                 // (bad)
+  .byte  60,33                               // cmp           $0x21,%al
+  .byte  8,130,60,33,8,130                   // or            %al,-0x7df7dec4(%rdx)
+  .byte  60,33                               // cmp           $0x21,%al
+  .byte  8,130,60,0,0,0                      // or            %al,0x3c(%rdx)
+  .byte  191,0,0,0,191                       // mov           $0xbf000000,%edi
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,191,0,0,0,191                     // add           %bh,-0x41000000(%rdi)
+  .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
   .byte  63                                  // (bad)
@@ -21610,17 +21925,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
-  .byte  63                                  // (bad)
+  .byte  191,0,0,128,191                     // mov           $0xbf800000,%edi
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,191,0,0,128,191,0               // cmpb          $0x0,-0x40800000(%rdi)
-  .byte  0,128,191,0,0,128                   // add           %al,-0x7fffff41(%rax)
-  .byte  191,0,0,224,64                      // mov           $0x40e00000,%edi
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,64                              // loopne        3b68 <.literal16+0x188>
+  .byte  0,224                               // add           %ah,%al
+  .byte  64,0,0                              // add           %al,(%rax)
+  .byte  224,64                              // loopne        3c98 <.literal16+0x1d8>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,64                              // loopne        3b6c <.literal16+0x18c>
+  .byte  224,64                              // loopne        3c9c <.literal16+0x1dc>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,64                              // loopne        3b70 <.literal16+0x190>
+  .byte  224,64                              // loopne        3ca0 <.literal16+0x1e0>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -21839,13 +22153,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        3d29 <.literal16+0x349>
+  .byte  224,7                               // loopne        3e59 <.literal16+0x399>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        3d2d <.literal16+0x34d>
+  .byte  224,7                               // loopne        3e5d <.literal16+0x39d>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        3d31 <.literal16+0x351>
+  .byte  224,7                               // loopne        3e61 <.literal16+0x3a1>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        3d35 <.literal16+0x355>
+  .byte  224,7                               // loopne        3e65 <.literal16+0x3a5>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -21885,10 +22199,10 @@ BALIGN16
   .byte  0,1                                 // add           %al,(%rcx)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a003d88 <_sk_callback_sse41+0xa00048b>
+  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a003eb8 <_sk_callback_sse41+0xa0004d4>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 3003d90 <_sk_callback_sse41+0x3000493>
+  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 3003ec0 <_sk_callback_sse41+0x30004dc>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -21943,11 +22257,11 @@ BALIGN16
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,127,67                            // add           %bh,0x43(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            3e5b <.literal16+0x47b>
+  .byte  127,67                              // jg            3f8b <.literal16+0x4cb>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            3e5f <.literal16+0x47f>
+  .byte  127,67                              // jg            3f8f <.literal16+0x4cf>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            3e63 <.literal16+0x483>
+  .byte  127,67                              // jg            3f93 <.literal16+0x4d3>
   .byte  129,128,128,59,129,128,128,59,129,128// addl          $0x80813b80,-0x7f7ec480(%rax)
   .byte  128,59,129                          // cmpb          $0x81,(%rbx)
   .byte  128,128,59,129,128,128,59           // addb          $0x3b,-0x7f7f7ec5(%rax)
@@ -21962,16 +22276,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3e54 <.literal16+0x474>
+  .byte  127,0                               // jg            3f84 <.literal16+0x4c4>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3e58 <.literal16+0x478>
+  .byte  127,0                               // jg            3f88 <.literal16+0x4c8>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3e5c <.literal16+0x47c>
+  .byte  127,0                               // jg            3f8c <.literal16+0x4cc>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3e60 <.literal16+0x480>
+  .byte  127,0                               // jg            3f90 <.literal16+0x4d0>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -21980,7 +22294,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            3ee5 <.literal16+0x505>
+  .byte  119,115                             // ja            4015 <.literal16+0x555>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -21991,7 +22305,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           3e49 <.literal16+0x469>
+  .byte  117,191                             // jne           3f79 <.literal16+0x4b9>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -22003,7 +22317,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a37e8a <_sk_callback_sse41+0xffffffffe9a3458d>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a37fba <_sk_callback_sse41+0xffffffffe9a345d6>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  81                                  // push          %rcx
   .byte  140,242                             // mov           %?,%edx
@@ -22058,16 +22372,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3f24 <.literal16+0x544>
+  .byte  127,0                               // jg            4054 <.literal16+0x594>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3f28 <.literal16+0x548>
+  .byte  127,0                               // jg            4058 <.literal16+0x598>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3f2c <.literal16+0x54c>
+  .byte  127,0                               // jg            405c <.literal16+0x59c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3f30 <.literal16+0x550>
+  .byte  127,0                               // jg            4060 <.literal16+0x5a0>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -22076,7 +22390,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            3fb5 <.literal16+0x5d5>
+  .byte  119,115                             // ja            40e5 <.literal16+0x625>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -22087,7 +22401,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           3f19 <.literal16+0x539>
+  .byte  117,191                             // jne           4049 <.literal16+0x589>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -22099,7 +22413,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a37f5a <_sk_callback_sse41+0xffffffffe9a3465d>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3808a <_sk_callback_sse41+0xffffffffe9a346a6>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  81                                  // push          %rcx
   .byte  140,242                             // mov           %?,%edx
@@ -22154,16 +22468,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3ff4 <.literal16+0x614>
+  .byte  127,0                               // jg            4124 <.literal16+0x664>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3ff8 <.literal16+0x618>
+  .byte  127,0                               // jg            4128 <.literal16+0x668>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            3ffc <.literal16+0x61c>
+  .byte  127,0                               // jg            412c <.literal16+0x66c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4000 <.literal16+0x620>
+  .byte  127,0                               // jg            4130 <.literal16+0x670>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -22172,7 +22486,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            4085 <.literal16+0x6a5>
+  .byte  119,115                             // ja            41b5 <.literal16+0x6f5>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -22183,7 +22497,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           3fe9 <.literal16+0x609>
+  .byte  117,191                             // jne           4119 <.literal16+0x659>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -22195,7 +22509,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3802a <_sk_callback_sse41+0xffffffffe9a3472d>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3815a <_sk_callback_sse41+0xffffffffe9a34776>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  81                                  // push          %rcx
   .byte  140,242                             // mov           %?,%edx
@@ -22250,16 +22564,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            40c4 <.literal16+0x6e4>
+  .byte  127,0                               // jg            41f4 <.literal16+0x734>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            40c8 <.literal16+0x6e8>
+  .byte  127,0                               // jg            41f8 <.literal16+0x738>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            40cc <.literal16+0x6ec>
+  .byte  127,0                               // jg            41fc <.literal16+0x73c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            40d0 <.literal16+0x6f0>
+  .byte  127,0                               // jg            4200 <.literal16+0x740>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -22268,7 +22582,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            4155 <.literal16+0x775>
+  .byte  119,115                             // ja            4285 <.literal16+0x7c5>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -22279,7 +22593,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           40b9 <.literal16+0x6d9>
+  .byte  117,191                             // jne           41e9 <.literal16+0x729>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -22291,7 +22605,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a380fa <_sk_callback_sse41+0xffffffffe9a347fd>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3822a <_sk_callback_sse41+0xffffffffe9a34846>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  81                                  // push          %rcx
   .byte  140,242                             // mov           %?,%edx
@@ -22342,13 +22656,13 @@ BALIGN16
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
-  .byte  127,67                              // jg            41d7 <.literal16+0x7f7>
+  .byte  127,67                              // jg            4307 <.literal16+0x847>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            41db <.literal16+0x7fb>
+  .byte  127,67                              // jg            430b <.literal16+0x84b>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            41df <.literal16+0x7ff>
+  .byte  127,67                              // jg            430f <.literal16+0x84f>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            41e3 <.literal16+0x803>
+  .byte  127,67                              // jg            4313 <.literal16+0x853>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,195                               // add           %al,%bl
   .byte  0,0                                 // add           %al,(%rax)
@@ -22395,16 +22709,16 @@ BALIGN16
   .byte  128,3,62                            // addb          $0x3e,(%rbx)
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           4263 <.literal16+0x883>
+  .byte  118,63                              // jbe           4393 <.literal16+0x8d3>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           4267 <.literal16+0x887>
+  .byte  118,63                              // jbe           4397 <.literal16+0x8d7>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           426b <.literal16+0x88b>
+  .byte  118,63                              // jbe           439b <.literal16+0x8db>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           426f <.literal16+0x88f>
+  .byte  118,63                              // jbe           439f <.literal16+0x8df>
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
@@ -22416,11 +22730,11 @@ BALIGN16
   .byte  128,59,0                            // cmpb          $0x0,(%rbx)
   .byte  0,127,67                            // add           %bh,0x43(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            42ab <.literal16+0x8cb>
+  .byte  127,67                              // jg            43db <.literal16+0x91b>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            42af <.literal16+0x8cf>
+  .byte  127,67                              // jg            43df <.literal16+0x91f>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            42b3 <.literal16+0x8d3>
+  .byte  127,67                              // jg            43e3 <.literal16+0x923>
   .byte  129,128,128,59,129,128,128,59,129,128// addl          $0x80813b80,-0x7f7ec480(%rax)
   .byte  128,59,129                          // cmpb          $0x81,(%rbx)
   .byte  128,128,59,0,0,128,63               // addb          $0x3f,-0x7fffffc5(%rax)
@@ -22449,7 +22763,7 @@ BALIGN16
   .byte  5,255,255,255,9                     // add           $0x9ffffff,%eax
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 30042e0 <_sk_callback_sse41+0x30009e3>
+  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 3004410 <_sk_callback_sse41+0x3000a2c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -22478,13 +22792,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        4319 <.literal16+0x939>
+  .byte  224,7                               // loopne        4449 <.literal16+0x989>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        431d <.literal16+0x93d>
+  .byte  224,7                               // loopne        444d <.literal16+0x98d>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        4321 <.literal16+0x941>
+  .byte  224,7                               // loopne        4451 <.literal16+0x991>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        4325 <.literal16+0x945>
+  .byte  224,7                               // loopne        4455 <.literal16+0x995>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -22530,13 +22844,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        4389 <.literal16+0x9a9>
+  .byte  224,7                               // loopne        44b9 <.literal16+0x9f9>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        438d <.literal16+0x9ad>
+  .byte  224,7                               // loopne        44bd <.literal16+0x9fd>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        4391 <.literal16+0x9b1>
+  .byte  224,7                               // loopne        44c1 <.literal16+0xa01>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        4395 <.literal16+0x9b5>
+  .byte  224,7                               // loopne        44c5 <.literal16+0xa05>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -22574,13 +22888,13 @@ BALIGN16
   .byte  65,0,0                              // add           %al,(%r8)
   .byte  248                                 // clc
   .byte  65,0,0                              // add           %al,(%r8)
-  .byte  124,66                              // jl            4426 <.literal16+0xa46>
+  .byte  124,66                              // jl            4556 <.literal16+0xa96>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            442a <.literal16+0xa4a>
+  .byte  124,66                              // jl            455a <.literal16+0xa9a>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            442e <.literal16+0xa4e>
+  .byte  124,66                              // jl            455e <.literal16+0xa9e>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            4432 <.literal16+0xa52>
+  .byte  124,66                              // jl            4562 <.literal16+0xaa2>
   .byte  0,240                               // add           %dh,%al
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,240                               // add           %dh,%al
@@ -22670,13 +22984,13 @@ BALIGN16
   .byte  136,136,61,137,136,136              // mov           %cl,-0x777776c3(%rax)
   .byte  61,137,136,136,61                   // cmp           $0x3d888889,%eax
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            4535 <.literal16+0xb55>
+  .byte  112,65                              // jo            4665 <.literal16+0xba5>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            4539 <.literal16+0xb59>
+  .byte  112,65                              // jo            4669 <.literal16+0xba9>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            453d <.literal16+0xb5d>
+  .byte  112,65                              // jo            466d <.literal16+0xbad>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            4541 <.literal16+0xb61>
+  .byte  112,65                              // jo            4671 <.literal16+0xbb1>
   .byte  255,0                               // incl          (%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  255,0                               // incl          (%rax)
@@ -22691,7 +23005,7 @@ BALIGN16
   .byte  5,255,255,255,9                     // add           $0x9ffffff,%eax
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 3004530 <_sk_callback_sse41+0x3000c33>
+  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 3004660 <_sk_callback_sse41+0x3000c7c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -22718,7 +23032,7 @@ BALIGN16
   .byte  5,255,255,255,9                     // add           $0x9ffffff,%eax
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 3004570 <_sk_callback_sse41+0x3000c73>
+  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 30046a0 <_sk_callback_sse41+0x3000cbc>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -22733,11 +23047,11 @@ BALIGN16
   .byte  255,0                               // incl          (%rax)
   .byte  0,127,67                            // add           %bh,0x43(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            45cb <.literal16+0xbeb>
+  .byte  127,67                              // jg            46fb <.literal16+0xc3b>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            45cf <.literal16+0xbef>
+  .byte  127,67                              // jg            46ff <.literal16+0xc3f>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            45d3 <.literal16+0xbf3>
+  .byte  127,67                              // jg            4703 <.literal16+0xc43>
   .byte  0,128,0,0,0,128                     // add           %al,-0x80000000(%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,128,0,0,0,128                     // add           %al,-0x80000000(%rax)
@@ -22813,13 +23127,13 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  255                                 // (bad)
-  .byte  127,71                              // jg            469b <.literal16+0xcbb>
+  .byte  127,71                              // jg            47cb <.literal16+0xd0b>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            469f <.literal16+0xcbf>
+  .byte  127,71                              // jg            47cf <.literal16+0xd0f>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            46a3 <.literal16+0xcc3>
+  .byte  127,71                              // jg            47d3 <.literal16+0xd13>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            46a7 <.literal16+0xcc7>
+  .byte  127,71                              // jg            47d7 <.literal16+0xd17>
   .byte  208                                 // (bad)
   .byte  179,89                              // mov           $0x59,%bl
   .byte  62,208                              // ds            (bad)
@@ -22945,11 +23259,11 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,114                          // cmpb          $0x72,(%rdi)
   .byte  28,199                              // sbb           $0xc7,%al
-  .byte  62,114,28                           // jb,pt         47b2 <.literal16+0xdd2>
+  .byte  62,114,28                           // jb,pt         48e2 <.literal16+0xe22>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         47b6 <.literal16+0xdd6>
+  .byte  62,114,28                           // jb,pt         48e6 <.literal16+0xe26>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         47ba <.literal16+0xdda>
+  .byte  62,114,28                           // jb,pt         48ea <.literal16+0xe2a>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -22993,7 +23307,7 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63d645 <_sk_callback_sse41+0x3d639d48>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63d775 <_sk_callback_sse41+0x3d639d91>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -23019,7 +23333,7 @@ BALIGN16
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63d685 <_sk_callback_sse41+0x3d639d88>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63d7b5 <_sk_callback_sse41+0x3d639dd1>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
@@ -23028,13 +23342,13 @@ BALIGN16
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
-  .byte  114,28                              // jb            487e <.literal16+0xe9e>
+  .byte  114,28                              // jb            49ae <.literal16+0xeee>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4882 <.literal16+0xea2>
+  .byte  62,114,28                           // jb,pt         49b2 <.literal16+0xef2>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4886 <.literal16+0xea6>
+  .byte  62,114,28                           // jb,pt         49b6 <.literal16+0xef6>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         488a <.literal16+0xeaa>
+  .byte  62,114,28                           // jb,pt         49ba <.literal16+0xefa>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -23055,11 +23369,11 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,114                          // cmpb          $0x72,(%rdi)
   .byte  28,199                              // sbb           $0xc7,%al
-  .byte  62,114,28                           // jb,pt         48c2 <.literal16+0xee2>
+  .byte  62,114,28                           // jb,pt         49f2 <.literal16+0xf32>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         48c6 <.literal16+0xee6>
+  .byte  62,114,28                           // jb,pt         49f6 <.literal16+0xf36>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         48ca <.literal16+0xeea>
+  .byte  62,114,28                           // jb,pt         49fa <.literal16+0xf3a>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -23103,7 +23417,7 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63d755 <_sk_callback_sse41+0x3d639e58>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63d885 <_sk_callback_sse41+0x3d639ea1>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -23129,7 +23443,7 @@ BALIGN16
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63d795 <_sk_callback_sse41+0x3d639e98>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63d8c5 <_sk_callback_sse41+0x3d639ee1>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
@@ -23138,13 +23452,13 @@ BALIGN16
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
-  .byte  114,28                              // jb            498e <.literal16+0xfae>
+  .byte  114,28                              // jb            4abe <.literal16+0xffe>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4992 <_sk_callback_sse41+0x1095>
+  .byte  62,114,28                           // jb,pt         4ac2 <_sk_callback_sse41+0x10de>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4996 <_sk_callback_sse41+0x1099>
+  .byte  62,114,28                           // jb,pt         4ac6 <_sk_callback_sse41+0x10e2>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         499a <_sk_callback_sse41+0x109d>
+  .byte  62,114,28                           // jb,pt         4aca <_sk_callback_sse41+0x10e6>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -23214,7 +23528,7 @@ _sk_seed_shader_sse2:
   .byte  102,15,110,199                      // movd          %edi,%xmm0
   .byte  102,15,112,192,0                    // pshufd        $0x0,%xmm0,%xmm0
   .byte  15,91,200                           // cvtdq2ps      %xmm0,%xmm1
-  .byte  15,40,21,212,61,0,0                 // movaps        0x3dd4(%rip),%xmm2        # 3e50 <_sk_callback_sse2+0xd7>
+  .byte  15,40,21,196,62,0,0                 // movaps        0x3ec4(%rip),%xmm2        # 3f40 <_sk_callback_sse2+0xe0>
   .byte  15,88,202                           // addps         %xmm2,%xmm1
   .byte  15,16,2                             // movups        (%rdx),%xmm0
   .byte  15,88,193                           // addps         %xmm1,%xmm0
@@ -23223,7 +23537,7 @@ _sk_seed_shader_sse2:
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  15,88,202                           // addps         %xmm2,%xmm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,21,195,61,0,0                 // movaps        0x3dc3(%rip),%xmm2        # 3e60 <_sk_callback_sse2+0xe7>
+  .byte  15,40,21,179,62,0,0                 // movaps        0x3eb3(%rip),%xmm2        # 3f50 <_sk_callback_sse2+0xf0>
   .byte  15,87,219                           // xorps         %xmm3,%xmm3
   .byte  15,87,228                           // xorps         %xmm4,%xmm4
   .byte  15,87,237                           // xorps         %xmm5,%xmm5
@@ -23231,6 +23545,56 @@ _sk_seed_shader_sse2:
   .byte  15,87,255                           // xorps         %xmm7,%xmm7
   .byte  255,224                             // jmpq          *%rax
 
+HIDDEN _sk_dither_sse2
+.globl _sk_dither_sse2
+FUNCTION(_sk_dither_sse2)
+_sk_dither_sse2:
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  102,68,15,110,199                   // movd          %edi,%xmm8
+  .byte  102,69,15,112,192,0                 // pshufd        $0x0,%xmm8,%xmm8
+  .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
+  .byte  68,15,16,10                         // movups        (%rdx),%xmm9
+  .byte  69,15,88,200                        // addps         %xmm8,%xmm9
+  .byte  243,69,15,91,201                    // cvttps2dq     %xmm9,%xmm9
+  .byte  72,139,8                            // mov           (%rax),%rcx
+  .byte  102,68,15,110,1                     // movd          (%rcx),%xmm8
+  .byte  102,69,15,112,192,0                 // pshufd        $0x0,%xmm8,%xmm8
+  .byte  102,69,15,239,193                   // pxor          %xmm9,%xmm8
+  .byte  102,68,15,111,21,120,62,0,0         // movdqa        0x3e78(%rip),%xmm10        # 3f60 <_sk_callback_sse2+0x100>
+  .byte  102,69,15,111,216                   // movdqa        %xmm8,%xmm11
+  .byte  102,69,15,219,218                   // pand          %xmm10,%xmm11
+  .byte  102,65,15,114,243,5                 // pslld         $0x5,%xmm11
+  .byte  102,69,15,219,209                   // pand          %xmm9,%xmm10
+  .byte  102,65,15,114,242,4                 // pslld         $0x4,%xmm10
+  .byte  102,68,15,111,37,100,62,0,0         // movdqa        0x3e64(%rip),%xmm12        # 3f70 <_sk_callback_sse2+0x110>
+  .byte  102,68,15,111,45,107,62,0,0         // movdqa        0x3e6b(%rip),%xmm13        # 3f80 <_sk_callback_sse2+0x120>
+  .byte  102,69,15,111,240                   // movdqa        %xmm8,%xmm14
+  .byte  102,69,15,219,245                   // pand          %xmm13,%xmm14
+  .byte  102,65,15,114,246,2                 // pslld         $0x2,%xmm14
+  .byte  102,69,15,219,233                   // pand          %xmm9,%xmm13
+  .byte  102,69,15,254,237                   // paddd         %xmm13,%xmm13
+  .byte  102,69,15,219,196                   // pand          %xmm12,%xmm8
+  .byte  102,65,15,114,208,1                 // psrld         $0x1,%xmm8
+  .byte  102,69,15,219,204                   // pand          %xmm12,%xmm9
+  .byte  102,65,15,114,209,2                 // psrld         $0x2,%xmm9
+  .byte  102,69,15,235,234                   // por           %xmm10,%xmm13
+  .byte  102,69,15,235,233                   // por           %xmm9,%xmm13
+  .byte  102,69,15,235,243                   // por           %xmm11,%xmm14
+  .byte  102,69,15,235,245                   // por           %xmm13,%xmm14
+  .byte  102,69,15,235,240                   // por           %xmm8,%xmm14
+  .byte  69,15,91,198                        // cvtdq2ps      %xmm14,%xmm8
+  .byte  68,15,89,5,38,62,0,0                // mulps         0x3e26(%rip),%xmm8        # 3f90 <_sk_callback_sse2+0x130>
+  .byte  68,15,88,5,46,62,0,0                // addps         0x3e2e(%rip),%xmm8        # 3fa0 <_sk_callback_sse2+0x140>
+  .byte  243,68,15,16,72,8                   // movss         0x8(%rax),%xmm9
+  .byte  69,15,198,201,0                     // shufps        $0x0,%xmm9,%xmm9
+  .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
+  .byte  68,15,89,203                        // mulps         %xmm3,%xmm9
+  .byte  65,15,88,193                        // addps         %xmm9,%xmm0
+  .byte  65,15,88,201                        // addps         %xmm9,%xmm1
+  .byte  65,15,88,209                        // addps         %xmm9,%xmm2
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  255,224                             // jmpq          *%rax
+
 HIDDEN _sk_constant_color_sse2
 .globl _sk_constant_color_sse2
 FUNCTION(_sk_constant_color_sse2)
@@ -23263,7 +23627,7 @@ HIDDEN _sk_srcatop_sse2
 FUNCTION(_sk_srcatop_sse2)
 _sk_srcatop_sse2:
   .byte  15,89,199                           // mulps         %xmm7,%xmm0
-  .byte  68,15,40,5,126,61,0,0               // movaps        0x3d7e(%rip),%xmm8        # 3e70 <_sk_callback_sse2+0xf7>
+  .byte  68,15,40,5,215,61,0,0               // movaps        0x3dd7(%rip),%xmm8        # 3fb0 <_sk_callback_sse2+0x150>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,89,204                        // mulps         %xmm4,%xmm9
@@ -23288,7 +23652,7 @@ FUNCTION(_sk_dstatop_sse2)
 _sk_dstatop_sse2:
   .byte  68,15,40,195                        // movaps        %xmm3,%xmm8
   .byte  68,15,89,196                        // mulps         %xmm4,%xmm8
-  .byte  68,15,40,13,65,61,0,0               // movaps        0x3d41(%rip),%xmm9        # 3e80 <_sk_callback_sse2+0x107>
+  .byte  68,15,40,13,154,61,0,0              // movaps        0x3d9a(%rip),%xmm9        # 3fc0 <_sk_callback_sse2+0x160>
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  65,15,88,192                        // addps         %xmm8,%xmm0
@@ -23335,7 +23699,7 @@ HIDDEN _sk_srcout_sse2
 .globl _sk_srcout_sse2
 FUNCTION(_sk_srcout_sse2)
 _sk_srcout_sse2:
-  .byte  68,15,40,5,229,60,0,0               // movaps        0x3ce5(%rip),%xmm8        # 3e90 <_sk_callback_sse2+0x117>
+  .byte  68,15,40,5,62,61,0,0                // movaps        0x3d3e(%rip),%xmm8        # 3fd0 <_sk_callback_sse2+0x170>
   .byte  68,15,92,199                        // subps         %xmm7,%xmm8
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
@@ -23348,7 +23712,7 @@ HIDDEN _sk_dstout_sse2
 .globl _sk_dstout_sse2
 FUNCTION(_sk_dstout_sse2)
 _sk_dstout_sse2:
-  .byte  68,15,40,5,213,60,0,0               // movaps        0x3cd5(%rip),%xmm8        # 3ea0 <_sk_callback_sse2+0x127>
+  .byte  68,15,40,5,46,61,0,0                // movaps        0x3d2e(%rip),%xmm8        # 3fe0 <_sk_callback_sse2+0x180>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  15,89,196                           // mulps         %xmm4,%xmm0
@@ -23365,7 +23729,7 @@ HIDDEN _sk_srcover_sse2
 .globl _sk_srcover_sse2
 FUNCTION(_sk_srcover_sse2)
 _sk_srcover_sse2:
-  .byte  68,15,40,5,184,60,0,0               // movaps        0x3cb8(%rip),%xmm8        # 3eb0 <_sk_callback_sse2+0x137>
+  .byte  68,15,40,5,17,61,0,0                // movaps        0x3d11(%rip),%xmm8        # 3ff0 <_sk_callback_sse2+0x190>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,89,204                        // mulps         %xmm4,%xmm9
@@ -23385,7 +23749,7 @@ HIDDEN _sk_dstover_sse2
 .globl _sk_dstover_sse2
 FUNCTION(_sk_dstover_sse2)
 _sk_dstover_sse2:
-  .byte  68,15,40,5,140,60,0,0               // movaps        0x3c8c(%rip),%xmm8        # 3ec0 <_sk_callback_sse2+0x147>
+  .byte  68,15,40,5,229,60,0,0               // movaps        0x3ce5(%rip),%xmm8        # 4000 <_sk_callback_sse2+0x1a0>
   .byte  68,15,92,199                        // subps         %xmm7,%xmm8
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -23413,7 +23777,7 @@ HIDDEN _sk_multiply_sse2
 .globl _sk_multiply_sse2
 FUNCTION(_sk_multiply_sse2)
 _sk_multiply_sse2:
-  .byte  68,15,40,5,96,60,0,0                // movaps        0x3c60(%rip),%xmm8        # 3ed0 <_sk_callback_sse2+0x157>
+  .byte  68,15,40,5,185,60,0,0               // movaps        0x3cb9(%rip),%xmm8        # 4010 <_sk_callback_sse2+0x1b0>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  69,15,40,209                        // movaps        %xmm9,%xmm10
@@ -23489,7 +23853,7 @@ HIDDEN _sk_xor__sse2
 FUNCTION(_sk_xor__sse2)
 _sk_xor__sse2:
   .byte  68,15,40,195                        // movaps        %xmm3,%xmm8
-  .byte  15,40,29,145,59,0,0                 // movaps        0x3b91(%rip),%xmm3        # 3ee0 <_sk_callback_sse2+0x167>
+  .byte  15,40,29,234,59,0,0                 // movaps        0x3bea(%rip),%xmm3        # 4020 <_sk_callback_sse2+0x1c0>
   .byte  68,15,40,203                        // movaps        %xmm3,%xmm9
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
@@ -23537,7 +23901,7 @@ _sk_darken_sse2:
   .byte  68,15,89,206                        // mulps         %xmm6,%xmm9
   .byte  65,15,95,209                        // maxps         %xmm9,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,252,58,0,0                 // movaps        0x3afc(%rip),%xmm2        # 3ef0 <_sk_callback_sse2+0x177>
+  .byte  15,40,21,85,59,0,0                  // movaps        0x3b55(%rip),%xmm2        # 4030 <_sk_callback_sse2+0x1d0>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -23571,7 +23935,7 @@ _sk_lighten_sse2:
   .byte  68,15,89,206                        // mulps         %xmm6,%xmm9
   .byte  65,15,93,209                        // minps         %xmm9,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,161,58,0,0                 // movaps        0x3aa1(%rip),%xmm2        # 3f00 <_sk_callback_sse2+0x187>
+  .byte  15,40,21,250,58,0,0                 // movaps        0x3afa(%rip),%xmm2        # 4040 <_sk_callback_sse2+0x1e0>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -23608,7 +23972,7 @@ _sk_difference_sse2:
   .byte  65,15,93,209                        // minps         %xmm9,%xmm2
   .byte  15,88,210                           // addps         %xmm2,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,59,58,0,0                  // movaps        0x3a3b(%rip),%xmm2        # 3f10 <_sk_callback_sse2+0x197>
+  .byte  15,40,21,148,58,0,0                 // movaps        0x3a94(%rip),%xmm2        # 4050 <_sk_callback_sse2+0x1f0>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -23635,7 +23999,7 @@ _sk_exclusion_sse2:
   .byte  15,89,214                           // mulps         %xmm6,%xmm2
   .byte  15,88,210                           // addps         %xmm2,%xmm2
   .byte  68,15,92,202                        // subps         %xmm2,%xmm9
-  .byte  15,40,13,252,57,0,0                 // movaps        0x39fc(%rip),%xmm1        # 3f20 <_sk_callback_sse2+0x1a7>
+  .byte  15,40,13,85,58,0,0                  // movaps        0x3a55(%rip),%xmm1        # 4060 <_sk_callback_sse2+0x200>
   .byte  15,92,203                           // subps         %xmm3,%xmm1
   .byte  15,89,207                           // mulps         %xmm7,%xmm1
   .byte  15,88,217                           // addps         %xmm1,%xmm3
@@ -23649,7 +24013,7 @@ HIDDEN _sk_colorburn_sse2
 FUNCTION(_sk_colorburn_sse2)
 _sk_colorburn_sse2:
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
-  .byte  68,15,40,21,235,57,0,0              // movaps        0x39eb(%rip),%xmm10        # 3f30 <_sk_callback_sse2+0x1b7>
+  .byte  68,15,40,21,68,58,0,0               // movaps        0x3a44(%rip),%xmm10        # 4070 <_sk_callback_sse2+0x210>
   .byte  69,15,40,202                        // movaps        %xmm10,%xmm9
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  69,15,40,217                        // movaps        %xmm9,%xmm11
@@ -23743,7 +24107,7 @@ HIDDEN _sk_colordodge_sse2
 FUNCTION(_sk_colordodge_sse2)
 _sk_colordodge_sse2:
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
-  .byte  68,15,40,21,161,56,0,0              // movaps        0x38a1(%rip),%xmm10        # 3f40 <_sk_callback_sse2+0x1c7>
+  .byte  68,15,40,21,250,56,0,0              // movaps        0x38fa(%rip),%xmm10        # 4080 <_sk_callback_sse2+0x220>
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
   .byte  68,15,92,223                        // subps         %xmm7,%xmm11
   .byte  69,15,40,227                        // movaps        %xmm11,%xmm12
@@ -23837,7 +24201,7 @@ _sk_hardlight_sse2:
   .byte  15,41,116,36,232                    // movaps        %xmm6,-0x18(%rsp)
   .byte  15,40,245                           // movaps        %xmm5,%xmm6
   .byte  15,40,236                           // movaps        %xmm4,%xmm5
-  .byte  68,15,40,29,86,55,0,0               // movaps        0x3756(%rip),%xmm11        # 3f50 <_sk_callback_sse2+0x1d7>
+  .byte  68,15,40,29,175,55,0,0              // movaps        0x37af(%rip),%xmm11        # 4090 <_sk_callback_sse2+0x230>
   .byte  69,15,40,211                        // movaps        %xmm11,%xmm10
   .byte  68,15,92,215                        // subps         %xmm7,%xmm10
   .byte  69,15,40,194                        // movaps        %xmm10,%xmm8
@@ -23925,7 +24289,7 @@ FUNCTION(_sk_overlay_sse2)
 _sk_overlay_sse2:
   .byte  68,15,40,193                        // movaps        %xmm1,%xmm8
   .byte  68,15,40,232                        // movaps        %xmm0,%xmm13
-  .byte  68,15,40,13,36,54,0,0               // movaps        0x3624(%rip),%xmm9        # 3f60 <_sk_callback_sse2+0x1e7>
+  .byte  68,15,40,13,125,54,0,0              // movaps        0x367d(%rip),%xmm9        # 40a0 <_sk_callback_sse2+0x240>
   .byte  69,15,40,209                        // movaps        %xmm9,%xmm10
   .byte  68,15,92,215                        // subps         %xmm7,%xmm10
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
@@ -24016,7 +24380,7 @@ _sk_softlight_sse2:
   .byte  68,15,40,213                        // movaps        %xmm5,%xmm10
   .byte  68,15,94,215                        // divps         %xmm7,%xmm10
   .byte  69,15,84,212                        // andps         %xmm12,%xmm10
-  .byte  68,15,40,13,225,52,0,0              // movaps        0x34e1(%rip),%xmm9        # 3f70 <_sk_callback_sse2+0x1f7>
+  .byte  68,15,40,13,58,53,0,0               // movaps        0x353a(%rip),%xmm9        # 40b0 <_sk_callback_sse2+0x250>
   .byte  69,15,40,249                        // movaps        %xmm9,%xmm15
   .byte  69,15,92,250                        // subps         %xmm10,%xmm15
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
@@ -24029,10 +24393,10 @@ _sk_softlight_sse2:
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
   .byte  65,15,88,194                        // addps         %xmm10,%xmm0
-  .byte  68,15,40,53,187,52,0,0              // movaps        0x34bb(%rip),%xmm14        # 3f80 <_sk_callback_sse2+0x207>
+  .byte  68,15,40,53,20,53,0,0               // movaps        0x3514(%rip),%xmm14        # 40c0 <_sk_callback_sse2+0x260>
   .byte  69,15,88,222                        // addps         %xmm14,%xmm11
   .byte  68,15,89,216                        // mulps         %xmm0,%xmm11
-  .byte  68,15,40,21,187,52,0,0              // movaps        0x34bb(%rip),%xmm10        # 3f90 <_sk_callback_sse2+0x217>
+  .byte  68,15,40,21,20,53,0,0               // movaps        0x3514(%rip),%xmm10        # 40d0 <_sk_callback_sse2+0x270>
   .byte  69,15,89,234                        // mulps         %xmm10,%xmm13
   .byte  69,15,88,235                        // addps         %xmm11,%xmm13
   .byte  15,88,228                           // addps         %xmm4,%xmm4
@@ -24184,7 +24548,7 @@ HIDDEN _sk_clamp_1_sse2
 .globl _sk_clamp_1_sse2
 FUNCTION(_sk_clamp_1_sse2)
 _sk_clamp_1_sse2:
-  .byte  68,15,40,5,202,50,0,0               // movaps        0x32ca(%rip),%xmm8        # 3fa0 <_sk_callback_sse2+0x227>
+  .byte  68,15,40,5,35,51,0,0                // movaps        0x3323(%rip),%xmm8        # 40e0 <_sk_callback_sse2+0x280>
   .byte  65,15,93,192                        // minps         %xmm8,%xmm0
   .byte  65,15,93,200                        // minps         %xmm8,%xmm1
   .byte  65,15,93,208                        // minps         %xmm8,%xmm2
@@ -24196,7 +24560,7 @@ HIDDEN _sk_clamp_a_sse2
 .globl _sk_clamp_a_sse2
 FUNCTION(_sk_clamp_a_sse2)
 _sk_clamp_a_sse2:
-  .byte  15,93,29,191,50,0,0                 // minps         0x32bf(%rip),%xmm3        # 3fb0 <_sk_callback_sse2+0x237>
+  .byte  15,93,29,24,51,0,0                  // minps         0x3318(%rip),%xmm3        # 40f0 <_sk_callback_sse2+0x290>
   .byte  15,93,195                           // minps         %xmm3,%xmm0
   .byte  15,93,203                           // minps         %xmm3,%xmm1
   .byte  15,93,211                           // minps         %xmm3,%xmm2
@@ -24283,7 +24647,7 @@ HIDDEN _sk_unpremul_sse2
 FUNCTION(_sk_unpremul_sse2)
 _sk_unpremul_sse2:
   .byte  69,15,87,192                        // xorps         %xmm8,%xmm8
-  .byte  68,15,40,13,42,50,0,0               // movaps        0x322a(%rip),%xmm9        # 3fc0 <_sk_callback_sse2+0x247>
+  .byte  68,15,40,13,131,50,0,0              // movaps        0x3283(%rip),%xmm9        # 4100 <_sk_callback_sse2+0x2a0>
   .byte  68,15,94,203                        // divps         %xmm3,%xmm9
   .byte  68,15,194,195,4                     // cmpneqps      %xmm3,%xmm8
   .byte  69,15,84,193                        // andps         %xmm9,%xmm8
@@ -24297,20 +24661,20 @@ HIDDEN _sk_from_srgb_sse2
 .globl _sk_from_srgb_sse2
 FUNCTION(_sk_from_srgb_sse2)
 _sk_from_srgb_sse2:
-  .byte  68,15,40,5,21,50,0,0                // movaps        0x3215(%rip),%xmm8        # 3fd0 <_sk_callback_sse2+0x257>
+  .byte  68,15,40,5,110,50,0,0               // movaps        0x326e(%rip),%xmm8        # 4110 <_sk_callback_sse2+0x2b0>
   .byte  68,15,40,232                        // movaps        %xmm0,%xmm13
   .byte  69,15,89,232                        // mulps         %xmm8,%xmm13
   .byte  68,15,40,216                        // movaps        %xmm0,%xmm11
   .byte  69,15,89,219                        // mulps         %xmm11,%xmm11
-  .byte  68,15,40,13,13,50,0,0               // movaps        0x320d(%rip),%xmm9        # 3fe0 <_sk_callback_sse2+0x267>
+  .byte  68,15,40,13,102,50,0,0              // movaps        0x3266(%rip),%xmm9        # 4120 <_sk_callback_sse2+0x2c0>
   .byte  68,15,40,240                        // movaps        %xmm0,%xmm14
   .byte  69,15,89,241                        // mulps         %xmm9,%xmm14
-  .byte  68,15,40,21,13,50,0,0               // movaps        0x320d(%rip),%xmm10        # 3ff0 <_sk_callback_sse2+0x277>
+  .byte  68,15,40,21,102,50,0,0              // movaps        0x3266(%rip),%xmm10        # 4130 <_sk_callback_sse2+0x2d0>
   .byte  69,15,88,242                        // addps         %xmm10,%xmm14
   .byte  69,15,89,243                        // mulps         %xmm11,%xmm14
-  .byte  68,15,40,29,13,50,0,0               // movaps        0x320d(%rip),%xmm11        # 4000 <_sk_callback_sse2+0x287>
+  .byte  68,15,40,29,102,50,0,0              // movaps        0x3266(%rip),%xmm11        # 4140 <_sk_callback_sse2+0x2e0>
   .byte  69,15,88,243                        // addps         %xmm11,%xmm14
-  .byte  68,15,40,37,17,50,0,0               // movaps        0x3211(%rip),%xmm12        # 4010 <_sk_callback_sse2+0x297>
+  .byte  68,15,40,37,106,50,0,0              // movaps        0x326a(%rip),%xmm12        # 4150 <_sk_callback_sse2+0x2f0>
   .byte  65,15,194,196,1                     // cmpltps       %xmm12,%xmm0
   .byte  68,15,84,232                        // andps         %xmm0,%xmm13
   .byte  65,15,85,198                        // andnps        %xmm14,%xmm0
@@ -24349,20 +24713,20 @@ _sk_to_srgb_sse2:
   .byte  68,15,82,192                        // rsqrtps       %xmm0,%xmm8
   .byte  69,15,83,200                        // rcpps         %xmm8,%xmm9
   .byte  69,15,82,232                        // rsqrtps       %xmm8,%xmm13
-  .byte  68,15,40,5,150,49,0,0               // movaps        0x3196(%rip),%xmm8        # 4020 <_sk_callback_sse2+0x2a7>
+  .byte  68,15,40,5,239,49,0,0               // movaps        0x31ef(%rip),%xmm8        # 4160 <_sk_callback_sse2+0x300>
   .byte  68,15,40,240                        // movaps        %xmm0,%xmm14
   .byte  69,15,89,240                        // mulps         %xmm8,%xmm14
-  .byte  68,15,40,21,150,49,0,0              // movaps        0x3196(%rip),%xmm10        # 4030 <_sk_callback_sse2+0x2b7>
+  .byte  68,15,40,21,239,49,0,0              // movaps        0x31ef(%rip),%xmm10        # 4170 <_sk_callback_sse2+0x310>
   .byte  69,15,89,202                        // mulps         %xmm10,%xmm9
-  .byte  68,15,40,29,154,49,0,0              // movaps        0x319a(%rip),%xmm11        # 4040 <_sk_callback_sse2+0x2c7>
+  .byte  68,15,40,29,243,49,0,0              // movaps        0x31f3(%rip),%xmm11        # 4180 <_sk_callback_sse2+0x320>
   .byte  69,15,88,203                        // addps         %xmm11,%xmm9
-  .byte  68,15,40,37,158,49,0,0              // movaps        0x319e(%rip),%xmm12        # 4050 <_sk_callback_sse2+0x2d7>
+  .byte  68,15,40,37,247,49,0,0              // movaps        0x31f7(%rip),%xmm12        # 4190 <_sk_callback_sse2+0x330>
   .byte  69,15,89,236                        // mulps         %xmm12,%xmm13
   .byte  69,15,88,233                        // addps         %xmm9,%xmm13
-  .byte  68,15,40,13,158,49,0,0              // movaps        0x319e(%rip),%xmm9        # 4060 <_sk_callback_sse2+0x2e7>
+  .byte  68,15,40,13,247,49,0,0              // movaps        0x31f7(%rip),%xmm9        # 41a0 <_sk_callback_sse2+0x340>
   .byte  69,15,40,249                        // movaps        %xmm9,%xmm15
   .byte  69,15,93,253                        // minps         %xmm13,%xmm15
-  .byte  68,15,40,45,158,49,0,0              // movaps        0x319e(%rip),%xmm13        # 4070 <_sk_callback_sse2+0x2f7>
+  .byte  68,15,40,45,247,49,0,0              // movaps        0x31f7(%rip),%xmm13        # 41b0 <_sk_callback_sse2+0x350>
   .byte  65,15,194,197,1                     // cmpltps       %xmm13,%xmm0
   .byte  68,15,84,240                        // andps         %xmm0,%xmm14
   .byte  65,15,85,199                        // andnps        %xmm15,%xmm0
@@ -24412,7 +24776,7 @@ _sk_rgb_to_hsl_sse2:
   .byte  68,15,93,218                        // minps         %xmm2,%xmm11
   .byte  65,15,40,202                        // movaps        %xmm10,%xmm1
   .byte  65,15,92,203                        // subps         %xmm11,%xmm1
-  .byte  68,15,40,45,247,48,0,0              // movaps        0x30f7(%rip),%xmm13        # 4080 <_sk_callback_sse2+0x307>
+  .byte  68,15,40,45,80,49,0,0               // movaps        0x3150(%rip),%xmm13        # 41c0 <_sk_callback_sse2+0x360>
   .byte  68,15,94,233                        // divps         %xmm1,%xmm13
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  65,15,194,192,0                     // cmpeqps       %xmm8,%xmm0
@@ -24421,30 +24785,30 @@ _sk_rgb_to_hsl_sse2:
   .byte  69,15,89,229                        // mulps         %xmm13,%xmm12
   .byte  69,15,40,241                        // movaps        %xmm9,%xmm14
   .byte  68,15,194,242,1                     // cmpltps       %xmm2,%xmm14
-  .byte  68,15,84,53,221,48,0,0              // andps         0x30dd(%rip),%xmm14        # 4090 <_sk_callback_sse2+0x317>
+  .byte  68,15,84,53,54,49,0,0               // andps         0x3136(%rip),%xmm14        # 41d0 <_sk_callback_sse2+0x370>
   .byte  69,15,88,244                        // addps         %xmm12,%xmm14
   .byte  69,15,40,250                        // movaps        %xmm10,%xmm15
   .byte  69,15,194,249,0                     // cmpeqps       %xmm9,%xmm15
   .byte  65,15,92,208                        // subps         %xmm8,%xmm2
   .byte  65,15,89,213                        // mulps         %xmm13,%xmm2
-  .byte  68,15,40,37,208,48,0,0              // movaps        0x30d0(%rip),%xmm12        # 40a0 <_sk_callback_sse2+0x327>
+  .byte  68,15,40,37,41,49,0,0               // movaps        0x3129(%rip),%xmm12        # 41e0 <_sk_callback_sse2+0x380>
   .byte  65,15,88,212                        // addps         %xmm12,%xmm2
   .byte  69,15,92,193                        // subps         %xmm9,%xmm8
   .byte  69,15,89,197                        // mulps         %xmm13,%xmm8
-  .byte  68,15,88,5,204,48,0,0               // addps         0x30cc(%rip),%xmm8        # 40b0 <_sk_callback_sse2+0x337>
+  .byte  68,15,88,5,37,49,0,0                // addps         0x3125(%rip),%xmm8        # 41f0 <_sk_callback_sse2+0x390>
   .byte  65,15,84,215                        // andps         %xmm15,%xmm2
   .byte  69,15,85,248                        // andnps        %xmm8,%xmm15
   .byte  68,15,86,250                        // orps          %xmm2,%xmm15
   .byte  68,15,84,240                        // andps         %xmm0,%xmm14
   .byte  65,15,85,199                        // andnps        %xmm15,%xmm0
   .byte  65,15,86,198                        // orps          %xmm14,%xmm0
-  .byte  15,89,5,189,48,0,0                  // mulps         0x30bd(%rip),%xmm0        # 40c0 <_sk_callback_sse2+0x347>
+  .byte  15,89,5,22,49,0,0                   // mulps         0x3116(%rip),%xmm0        # 4200 <_sk_callback_sse2+0x3a0>
   .byte  69,15,40,194                        // movaps        %xmm10,%xmm8
   .byte  69,15,194,195,4                     // cmpneqps      %xmm11,%xmm8
   .byte  65,15,84,192                        // andps         %xmm8,%xmm0
   .byte  69,15,92,226                        // subps         %xmm10,%xmm12
   .byte  69,15,88,211                        // addps         %xmm11,%xmm10
-  .byte  68,15,40,13,176,48,0,0              // movaps        0x30b0(%rip),%xmm9        # 40d0 <_sk_callback_sse2+0x357>
+  .byte  68,15,40,13,9,49,0,0                // movaps        0x3109(%rip),%xmm9        # 4210 <_sk_callback_sse2+0x3b0>
   .byte  65,15,40,210                        // movaps        %xmm10,%xmm2
   .byte  65,15,89,209                        // mulps         %xmm9,%xmm2
   .byte  68,15,194,202,1                     // cmpltps       %xmm2,%xmm9
@@ -24468,7 +24832,7 @@ _sk_hsl_to_rgb_sse2:
   .byte  15,41,92,36,168                     // movaps        %xmm3,-0x58(%rsp)
   .byte  68,15,40,218                        // movaps        %xmm2,%xmm11
   .byte  15,40,240                           // movaps        %xmm0,%xmm6
-  .byte  68,15,40,13,111,48,0,0              // movaps        0x306f(%rip),%xmm9        # 40e0 <_sk_callback_sse2+0x367>
+  .byte  68,15,40,13,200,48,0,0              // movaps        0x30c8(%rip),%xmm9        # 4220 <_sk_callback_sse2+0x3c0>
   .byte  69,15,40,209                        // movaps        %xmm9,%xmm10
   .byte  69,15,194,211,2                     // cmpleps       %xmm11,%xmm10
   .byte  15,40,193                           // movaps        %xmm1,%xmm0
@@ -24485,28 +24849,28 @@ _sk_hsl_to_rgb_sse2:
   .byte  69,15,88,211                        // addps         %xmm11,%xmm10
   .byte  69,15,88,219                        // addps         %xmm11,%xmm11
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
-  .byte  15,40,5,56,48,0,0                   // movaps        0x3038(%rip),%xmm0        # 40f0 <_sk_callback_sse2+0x377>
+  .byte  15,40,5,145,48,0,0                  // movaps        0x3091(%rip),%xmm0        # 4230 <_sk_callback_sse2+0x3d0>
   .byte  15,88,198                           // addps         %xmm6,%xmm0
   .byte  243,15,91,200                       // cvttps2dq     %xmm0,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  15,40,216                           // movaps        %xmm0,%xmm3
   .byte  15,194,217,1                        // cmpltps       %xmm1,%xmm3
-  .byte  15,84,29,48,48,0,0                  // andps         0x3030(%rip),%xmm3        # 4100 <_sk_callback_sse2+0x387>
+  .byte  15,84,29,137,48,0,0                 // andps         0x3089(%rip),%xmm3        # 4240 <_sk_callback_sse2+0x3e0>
   .byte  15,92,203                           // subps         %xmm3,%xmm1
   .byte  15,92,193                           // subps         %xmm1,%xmm0
-  .byte  68,15,40,45,50,48,0,0               // movaps        0x3032(%rip),%xmm13        # 4110 <_sk_callback_sse2+0x397>
+  .byte  68,15,40,45,139,48,0,0              // movaps        0x308b(%rip),%xmm13        # 4250 <_sk_callback_sse2+0x3f0>
   .byte  69,15,40,197                        // movaps        %xmm13,%xmm8
   .byte  68,15,194,192,2                     // cmpleps       %xmm0,%xmm8
   .byte  69,15,40,242                        // movaps        %xmm10,%xmm14
   .byte  69,15,92,243                        // subps         %xmm11,%xmm14
   .byte  65,15,40,217                        // movaps        %xmm9,%xmm3
   .byte  15,194,216,2                        // cmpleps       %xmm0,%xmm3
-  .byte  15,40,21,66,48,0,0                  // movaps        0x3042(%rip),%xmm2        # 4140 <_sk_callback_sse2+0x3c7>
+  .byte  15,40,21,155,48,0,0                 // movaps        0x309b(%rip),%xmm2        # 4280 <_sk_callback_sse2+0x420>
   .byte  68,15,40,250                        // movaps        %xmm2,%xmm15
   .byte  68,15,194,248,2                     // cmpleps       %xmm0,%xmm15
-  .byte  15,40,13,18,48,0,0                  // movaps        0x3012(%rip),%xmm1        # 4120 <_sk_callback_sse2+0x3a7>
+  .byte  15,40,13,107,48,0,0                 // movaps        0x306b(%rip),%xmm1        # 4260 <_sk_callback_sse2+0x400>
   .byte  15,89,193                           // mulps         %xmm1,%xmm0
-  .byte  15,40,45,24,48,0,0                  // movaps        0x3018(%rip),%xmm5        # 4130 <_sk_callback_sse2+0x3b7>
+  .byte  15,40,45,113,48,0,0                 // movaps        0x3071(%rip),%xmm5        # 4270 <_sk_callback_sse2+0x410>
   .byte  15,40,229                           // movaps        %xmm5,%xmm4
   .byte  15,92,224                           // subps         %xmm0,%xmm4
   .byte  65,15,89,230                        // mulps         %xmm14,%xmm4
@@ -24529,7 +24893,7 @@ _sk_hsl_to_rgb_sse2:
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
   .byte  15,40,222                           // movaps        %xmm6,%xmm3
   .byte  15,194,216,1                        // cmpltps       %xmm0,%xmm3
-  .byte  15,84,29,141,47,0,0                 // andps         0x2f8d(%rip),%xmm3        # 4100 <_sk_callback_sse2+0x387>
+  .byte  15,84,29,230,47,0,0                 // andps         0x2fe6(%rip),%xmm3        # 4240 <_sk_callback_sse2+0x3e0>
   .byte  15,92,195                           // subps         %xmm3,%xmm0
   .byte  68,15,40,230                        // movaps        %xmm6,%xmm12
   .byte  68,15,92,224                        // subps         %xmm0,%xmm12
@@ -24559,12 +24923,12 @@ _sk_hsl_to_rgb_sse2:
   .byte  15,40,124,36,136                    // movaps        -0x78(%rsp),%xmm7
   .byte  15,40,231                           // movaps        %xmm7,%xmm4
   .byte  15,85,227                           // andnps        %xmm3,%xmm4
-  .byte  15,88,53,101,47,0,0                 // addps         0x2f65(%rip),%xmm6        # 4150 <_sk_callback_sse2+0x3d7>
+  .byte  15,88,53,190,47,0,0                 // addps         0x2fbe(%rip),%xmm6        # 4290 <_sk_callback_sse2+0x430>
   .byte  243,15,91,198                       // cvttps2dq     %xmm6,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
   .byte  15,40,222                           // movaps        %xmm6,%xmm3
   .byte  15,194,216,1                        // cmpltps       %xmm0,%xmm3
-  .byte  15,84,29,0,47,0,0                   // andps         0x2f00(%rip),%xmm3        # 4100 <_sk_callback_sse2+0x387>
+  .byte  15,84,29,89,47,0,0                  // andps         0x2f59(%rip),%xmm3        # 4240 <_sk_callback_sse2+0x3e0>
   .byte  15,92,195                           // subps         %xmm3,%xmm0
   .byte  15,92,240                           // subps         %xmm0,%xmm6
   .byte  15,89,206                           // mulps         %xmm6,%xmm1
@@ -24628,7 +24992,7 @@ _sk_scale_u8_sse2:
   .byte  102,69,15,96,193                    // punpcklbw     %xmm9,%xmm8
   .byte  102,69,15,97,193                    // punpcklwd     %xmm9,%xmm8
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,142,46,0,0               // mulps         0x2e8e(%rip),%xmm8        # 4160 <_sk_callback_sse2+0x3e7>
+  .byte  68,15,89,5,231,46,0,0               // mulps         0x2ee7(%rip),%xmm8        # 42a0 <_sk_callback_sse2+0x440>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
@@ -24669,7 +25033,7 @@ _sk_lerp_u8_sse2:
   .byte  102,69,15,96,193                    // punpcklbw     %xmm9,%xmm8
   .byte  102,69,15,97,193                    // punpcklwd     %xmm9,%xmm8
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,44,46,0,0                // mulps         0x2e2c(%rip),%xmm8        # 4170 <_sk_callback_sse2+0x3f7>
+  .byte  68,15,89,5,133,46,0,0               // mulps         0x2e85(%rip),%xmm8        # 42b0 <_sk_callback_sse2+0x450>
   .byte  15,92,196                           // subps         %xmm4,%xmm0
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -24694,17 +25058,17 @@ _sk_lerp_565_sse2:
   .byte  243,68,15,126,4,120                 // movq          (%rax,%rdi,2),%xmm8
   .byte  102,15,239,219                      // pxor          %xmm3,%xmm3
   .byte  102,68,15,97,195                    // punpcklwd     %xmm3,%xmm8
-  .byte  102,15,111,29,244,45,0,0            // movdqa        0x2df4(%rip),%xmm3        # 4180 <_sk_callback_sse2+0x407>
+  .byte  102,15,111,29,77,46,0,0             // movdqa        0x2e4d(%rip),%xmm3        # 42c0 <_sk_callback_sse2+0x460>
   .byte  102,65,15,219,216                   // pand          %xmm8,%xmm3
   .byte  68,15,91,203                        // cvtdq2ps      %xmm3,%xmm9
-  .byte  68,15,89,13,243,45,0,0              // mulps         0x2df3(%rip),%xmm9        # 4190 <_sk_callback_sse2+0x417>
-  .byte  102,15,111,29,251,45,0,0            // movdqa        0x2dfb(%rip),%xmm3        # 41a0 <_sk_callback_sse2+0x427>
+  .byte  68,15,89,13,76,46,0,0               // mulps         0x2e4c(%rip),%xmm9        # 42d0 <_sk_callback_sse2+0x470>
+  .byte  102,15,111,29,84,46,0,0             // movdqa        0x2e54(%rip),%xmm3        # 42e0 <_sk_callback_sse2+0x480>
   .byte  102,65,15,219,216                   // pand          %xmm8,%xmm3
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,252,45,0,0                 // mulps         0x2dfc(%rip),%xmm3        # 41b0 <_sk_callback_sse2+0x437>
-  .byte  102,68,15,219,5,3,46,0,0            // pand          0x2e03(%rip),%xmm8        # 41c0 <_sk_callback_sse2+0x447>
+  .byte  15,89,29,85,46,0,0                  // mulps         0x2e55(%rip),%xmm3        # 42f0 <_sk_callback_sse2+0x490>
+  .byte  102,68,15,219,5,92,46,0,0           // pand          0x2e5c(%rip),%xmm8        # 4300 <_sk_callback_sse2+0x4a0>
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,7,46,0,0                 // mulps         0x2e07(%rip),%xmm8        # 41d0 <_sk_callback_sse2+0x457>
+  .byte  68,15,89,5,96,46,0,0                // mulps         0x2e60(%rip),%xmm8        # 4310 <_sk_callback_sse2+0x4b0>
   .byte  15,92,196                           // subps         %xmm4,%xmm0
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -24715,7 +25079,7 @@ _sk_lerp_565_sse2:
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  15,88,214                           // addps         %xmm6,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,241,45,0,0                 // movaps        0x2df1(%rip),%xmm3        # 41e0 <_sk_callback_sse2+0x467>
+  .byte  15,40,29,74,46,0,0                  // movaps        0x2e4a(%rip),%xmm3        # 4320 <_sk_callback_sse2+0x4c0>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_load_tables_sse2
@@ -24726,7 +25090,7 @@ _sk_load_tables_sse2:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,139,72,8                         // mov           0x8(%rax),%r9
   .byte  243,69,15,111,12,184                // movdqu        (%r8,%rdi,4),%xmm9
-  .byte  102,68,15,111,5,231,45,0,0          // movdqa        0x2de7(%rip),%xmm8        # 41f0 <_sk_callback_sse2+0x477>
+  .byte  102,68,15,111,5,64,46,0,0           // movdqa        0x2e40(%rip),%xmm8        # 4330 <_sk_callback_sse2+0x4d0>
   .byte  102,65,15,111,193                   // movdqa        %xmm9,%xmm0
   .byte  102,65,15,219,192                   // pand          %xmm8,%xmm0
   .byte  102,15,112,200,78                   // pshufd        $0x4e,%xmm0,%xmm1
@@ -24781,7 +25145,7 @@ _sk_load_tables_sse2:
   .byte  65,15,20,208                        // unpcklps      %xmm8,%xmm2
   .byte  102,65,15,114,209,24                // psrld         $0x18,%xmm9
   .byte  65,15,91,217                        // cvtdq2ps      %xmm9,%xmm3
-  .byte  15,89,29,244,44,0,0                 // mulps         0x2cf4(%rip),%xmm3        # 4200 <_sk_callback_sse2+0x487>
+  .byte  15,89,29,77,45,0,0                  // mulps         0x2d4d(%rip),%xmm3        # 4340 <_sk_callback_sse2+0x4e0>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -24800,7 +25164,7 @@ _sk_load_tables_u16_be_sse2:
   .byte  102,65,15,111,201                   // movdqa        %xmm9,%xmm1
   .byte  102,15,97,200                       // punpcklwd     %xmm0,%xmm1
   .byte  102,68,15,105,200                   // punpckhwd     %xmm0,%xmm9
-  .byte  102,68,15,111,21,199,44,0,0         // movdqa        0x2cc7(%rip),%xmm10        # 4210 <_sk_callback_sse2+0x497>
+  .byte  102,68,15,111,21,32,45,0,0          // movdqa        0x2d20(%rip),%xmm10        # 4350 <_sk_callback_sse2+0x4f0>
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,65,15,219,194                   // pand          %xmm10,%xmm0
   .byte  102,69,15,239,192                   // pxor          %xmm8,%xmm8
@@ -24861,7 +25225,7 @@ _sk_load_tables_u16_be_sse2:
   .byte  102,65,15,235,217                   // por           %xmm9,%xmm3
   .byte  102,65,15,97,216                    // punpcklwd     %xmm8,%xmm3
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,182,43,0,0                 // mulps         0x2bb6(%rip),%xmm3        # 4220 <_sk_callback_sse2+0x4a7>
+  .byte  15,89,29,15,44,0,0                  // mulps         0x2c0f(%rip),%xmm3        # 4360 <_sk_callback_sse2+0x500>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -24883,7 +25247,7 @@ _sk_load_tables_rgb_u16_be_sse2:
   .byte  102,68,15,97,208                    // punpcklwd     %xmm0,%xmm10
   .byte  102,65,15,111,195                   // movdqa        %xmm11,%xmm0
   .byte  102,65,15,97,194                    // punpcklwd     %xmm10,%xmm0
-  .byte  102,68,15,111,5,118,43,0,0          // movdqa        0x2b76(%rip),%xmm8        # 4230 <_sk_callback_sse2+0x4b7>
+  .byte  102,68,15,111,5,207,43,0,0          // movdqa        0x2bcf(%rip),%xmm8        # 4370 <_sk_callback_sse2+0x510>
   .byte  102,15,112,200,78                   // pshufd        $0x4e,%xmm0,%xmm1
   .byte  102,65,15,219,192                   // pand          %xmm8,%xmm0
   .byte  102,69,15,239,201                   // pxor          %xmm9,%xmm9
@@ -24938,7 +25302,7 @@ _sk_load_tables_rgb_u16_be_sse2:
   .byte  15,20,211                           // unpcklps      %xmm3,%xmm2
   .byte  65,15,20,208                        // unpcklps      %xmm8,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,133,42,0,0                 // movaps        0x2a85(%rip),%xmm3        # 4240 <_sk_callback_sse2+0x4c7>
+  .byte  15,40,29,222,42,0,0                 // movaps        0x2ade(%rip),%xmm3        # 4380 <_sk_callback_sse2+0x520>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_byte_tables_sse2
@@ -24948,7 +25312,7 @@ _sk_byte_tables_sse2:
   .byte  65,86                               // push          %r14
   .byte  83                                  // push          %rbx
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,134,42,0,0               // movaps        0x2a86(%rip),%xmm8        # 4250 <_sk_callback_sse2+0x4d7>
+  .byte  68,15,40,5,223,42,0,0               // movaps        0x2adf(%rip),%xmm8        # 4390 <_sk_callback_sse2+0x530>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,91,192                       // cvtps2dq      %xmm0,%xmm0
   .byte  102,72,15,126,193                   // movq          %xmm0,%rcx
@@ -24975,7 +25339,7 @@ _sk_byte_tables_sse2:
   .byte  102,65,15,96,193                    // punpcklbw     %xmm9,%xmm0
   .byte  102,65,15,97,193                    // punpcklwd     %xmm9,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,21,35,42,0,0               // movaps        0x2a23(%rip),%xmm10        # 4260 <_sk_callback_sse2+0x4e7>
+  .byte  68,15,40,21,124,42,0,0              // movaps        0x2a7c(%rip),%xmm10        # 43a0 <_sk_callback_sse2+0x540>
   .byte  65,15,89,194                        // mulps         %xmm10,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,91,201                       // cvtps2dq      %xmm1,%xmm1
@@ -25091,7 +25455,7 @@ _sk_byte_tables_rgb_sse2:
   .byte  102,65,15,96,193                    // punpcklbw     %xmm9,%xmm0
   .byte  102,65,15,97,193                    // punpcklwd     %xmm9,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,21,118,40,0,0              // movaps        0x2876(%rip),%xmm10        # 4270 <_sk_callback_sse2+0x4f7>
+  .byte  68,15,40,21,207,40,0,0              // movaps        0x28cf(%rip),%xmm10        # 43b0 <_sk_callback_sse2+0x550>
   .byte  65,15,89,194                        // mulps         %xmm10,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,91,201                       // cvtps2dq      %xmm1,%xmm1
@@ -25288,15 +25652,15 @@ _sk_parametric_r_sse2:
   .byte  69,15,88,209                        // addps         %xmm9,%xmm10
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,91,202                        // cvtdq2ps      %xmm10,%xmm9
-  .byte  68,15,89,13,181,37,0,0              // mulps         0x25b5(%rip),%xmm9        # 4280 <_sk_callback_sse2+0x507>
-  .byte  68,15,84,21,189,37,0,0              // andps         0x25bd(%rip),%xmm10        # 4290 <_sk_callback_sse2+0x517>
-  .byte  68,15,86,21,197,37,0,0              // orps          0x25c5(%rip),%xmm10        # 42a0 <_sk_callback_sse2+0x527>
-  .byte  68,15,88,13,205,37,0,0              // addps         0x25cd(%rip),%xmm9        # 42b0 <_sk_callback_sse2+0x537>
-  .byte  68,15,40,37,213,37,0,0              // movaps        0x25d5(%rip),%xmm12        # 42c0 <_sk_callback_sse2+0x547>
+  .byte  68,15,89,13,14,38,0,0               // mulps         0x260e(%rip),%xmm9        # 43c0 <_sk_callback_sse2+0x560>
+  .byte  68,15,84,21,22,38,0,0               // andps         0x2616(%rip),%xmm10        # 43d0 <_sk_callback_sse2+0x570>
+  .byte  68,15,86,21,30,38,0,0               // orps          0x261e(%rip),%xmm10        # 43e0 <_sk_callback_sse2+0x580>
+  .byte  68,15,88,13,38,38,0,0               // addps         0x2626(%rip),%xmm9        # 43f0 <_sk_callback_sse2+0x590>
+  .byte  68,15,40,37,46,38,0,0               // movaps        0x262e(%rip),%xmm12        # 4400 <_sk_callback_sse2+0x5a0>
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,88,21,213,37,0,0              // addps         0x25d5(%rip),%xmm10        # 42d0 <_sk_callback_sse2+0x557>
-  .byte  68,15,40,37,221,37,0,0              // movaps        0x25dd(%rip),%xmm12        # 42e0 <_sk_callback_sse2+0x567>
+  .byte  68,15,88,21,46,38,0,0               // addps         0x262e(%rip),%xmm10        # 4410 <_sk_callback_sse2+0x5b0>
+  .byte  68,15,40,37,54,38,0,0               // movaps        0x2636(%rip),%xmm12        # 4420 <_sk_callback_sse2+0x5c0>
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
   .byte  69,15,89,203                        // mulps         %xmm11,%xmm9
@@ -25304,22 +25668,22 @@ _sk_parametric_r_sse2:
   .byte  69,15,91,226                        // cvtdq2ps      %xmm10,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,194,236,1                     // cmpltps       %xmm12,%xmm13
-  .byte  68,15,40,21,199,37,0,0              // movaps        0x25c7(%rip),%xmm10        # 42f0 <_sk_callback_sse2+0x577>
+  .byte  68,15,40,21,32,38,0,0               // movaps        0x2620(%rip),%xmm10        # 4430 <_sk_callback_sse2+0x5d0>
   .byte  69,15,84,234                        // andps         %xmm10,%xmm13
   .byte  69,15,87,219                        // xorps         %xmm11,%xmm11
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,92,236                        // subps         %xmm12,%xmm13
-  .byte  68,15,88,13,187,37,0,0              // addps         0x25bb(%rip),%xmm9        # 4300 <_sk_callback_sse2+0x587>
-  .byte  68,15,40,37,195,37,0,0              // movaps        0x25c3(%rip),%xmm12        # 4310 <_sk_callback_sse2+0x597>
+  .byte  68,15,88,13,20,38,0,0               // addps         0x2614(%rip),%xmm9        # 4440 <_sk_callback_sse2+0x5e0>
+  .byte  68,15,40,37,28,38,0,0               // movaps        0x261c(%rip),%xmm12        # 4450 <_sk_callback_sse2+0x5f0>
   .byte  69,15,89,229                        // mulps         %xmm13,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,40,37,195,37,0,0              // movaps        0x25c3(%rip),%xmm12        # 4320 <_sk_callback_sse2+0x5a7>
+  .byte  68,15,40,37,28,38,0,0               // movaps        0x261c(%rip),%xmm12        # 4460 <_sk_callback_sse2+0x600>
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
-  .byte  68,15,40,45,199,37,0,0              // movaps        0x25c7(%rip),%xmm13        # 4330 <_sk_callback_sse2+0x5b7>
+  .byte  68,15,40,45,32,38,0,0               // movaps        0x2620(%rip),%xmm13        # 4470 <_sk_callback_sse2+0x610>
   .byte  69,15,94,236                        // divps         %xmm12,%xmm13
   .byte  69,15,88,233                        // addps         %xmm9,%xmm13
-  .byte  68,15,89,45,199,37,0,0              // mulps         0x25c7(%rip),%xmm13        # 4340 <_sk_callback_sse2+0x5c7>
+  .byte  68,15,89,45,32,38,0,0               // mulps         0x2620(%rip),%xmm13        # 4480 <_sk_callback_sse2+0x620>
   .byte  102,69,15,91,205                    // cvtps2dq      %xmm13,%xmm9
   .byte  243,68,15,16,96,20                  // movss         0x14(%rax),%xmm12
   .byte  69,15,198,228,0                     // shufps        $0x0,%xmm12,%xmm12
@@ -25355,15 +25719,15 @@ _sk_parametric_g_sse2:
   .byte  69,15,88,209                        // addps         %xmm9,%xmm10
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,91,202                        // cvtdq2ps      %xmm10,%xmm9
-  .byte  68,15,89,13,71,37,0,0               // mulps         0x2547(%rip),%xmm9        # 4350 <_sk_callback_sse2+0x5d7>
-  .byte  68,15,84,21,79,37,0,0               // andps         0x254f(%rip),%xmm10        # 4360 <_sk_callback_sse2+0x5e7>
-  .byte  68,15,86,21,87,37,0,0               // orps          0x2557(%rip),%xmm10        # 4370 <_sk_callback_sse2+0x5f7>
-  .byte  68,15,88,13,95,37,0,0               // addps         0x255f(%rip),%xmm9        # 4380 <_sk_callback_sse2+0x607>
-  .byte  68,15,40,37,103,37,0,0              // movaps        0x2567(%rip),%xmm12        # 4390 <_sk_callback_sse2+0x617>
+  .byte  68,15,89,13,160,37,0,0              // mulps         0x25a0(%rip),%xmm9        # 4490 <_sk_callback_sse2+0x630>
+  .byte  68,15,84,21,168,37,0,0              // andps         0x25a8(%rip),%xmm10        # 44a0 <_sk_callback_sse2+0x640>
+  .byte  68,15,86,21,176,37,0,0              // orps          0x25b0(%rip),%xmm10        # 44b0 <_sk_callback_sse2+0x650>
+  .byte  68,15,88,13,184,37,0,0              // addps         0x25b8(%rip),%xmm9        # 44c0 <_sk_callback_sse2+0x660>
+  .byte  68,15,40,37,192,37,0,0              // movaps        0x25c0(%rip),%xmm12        # 44d0 <_sk_callback_sse2+0x670>
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,88,21,103,37,0,0              // addps         0x2567(%rip),%xmm10        # 43a0 <_sk_callback_sse2+0x627>
-  .byte  68,15,40,37,111,37,0,0              // movaps        0x256f(%rip),%xmm12        # 43b0 <_sk_callback_sse2+0x637>
+  .byte  68,15,88,21,192,37,0,0              // addps         0x25c0(%rip),%xmm10        # 44e0 <_sk_callback_sse2+0x680>
+  .byte  68,15,40,37,200,37,0,0              // movaps        0x25c8(%rip),%xmm12        # 44f0 <_sk_callback_sse2+0x690>
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
   .byte  69,15,89,203                        // mulps         %xmm11,%xmm9
@@ -25371,22 +25735,22 @@ _sk_parametric_g_sse2:
   .byte  69,15,91,226                        // cvtdq2ps      %xmm10,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,194,236,1                     // cmpltps       %xmm12,%xmm13
-  .byte  68,15,40,21,89,37,0,0               // movaps        0x2559(%rip),%xmm10        # 43c0 <_sk_callback_sse2+0x647>
+  .byte  68,15,40,21,178,37,0,0              // movaps        0x25b2(%rip),%xmm10        # 4500 <_sk_callback_sse2+0x6a0>
   .byte  69,15,84,234                        // andps         %xmm10,%xmm13
   .byte  69,15,87,219                        // xorps         %xmm11,%xmm11
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,92,236                        // subps         %xmm12,%xmm13
-  .byte  68,15,88,13,77,37,0,0               // addps         0x254d(%rip),%xmm9        # 43d0 <_sk_callback_sse2+0x657>
-  .byte  68,15,40,37,85,37,0,0               // movaps        0x2555(%rip),%xmm12        # 43e0 <_sk_callback_sse2+0x667>
+  .byte  68,15,88,13,166,37,0,0              // addps         0x25a6(%rip),%xmm9        # 4510 <_sk_callback_sse2+0x6b0>
+  .byte  68,15,40,37,174,37,0,0              // movaps        0x25ae(%rip),%xmm12        # 4520 <_sk_callback_sse2+0x6c0>
   .byte  69,15,89,229                        // mulps         %xmm13,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,40,37,85,37,0,0               // movaps        0x2555(%rip),%xmm12        # 43f0 <_sk_callback_sse2+0x677>
+  .byte  68,15,40,37,174,37,0,0              // movaps        0x25ae(%rip),%xmm12        # 4530 <_sk_callback_sse2+0x6d0>
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
-  .byte  68,15,40,45,89,37,0,0               // movaps        0x2559(%rip),%xmm13        # 4400 <_sk_callback_sse2+0x687>
+  .byte  68,15,40,45,178,37,0,0              // movaps        0x25b2(%rip),%xmm13        # 4540 <_sk_callback_sse2+0x6e0>
   .byte  69,15,94,236                        // divps         %xmm12,%xmm13
   .byte  69,15,88,233                        // addps         %xmm9,%xmm13
-  .byte  68,15,89,45,89,37,0,0               // mulps         0x2559(%rip),%xmm13        # 4410 <_sk_callback_sse2+0x697>
+  .byte  68,15,89,45,178,37,0,0              // mulps         0x25b2(%rip),%xmm13        # 4550 <_sk_callback_sse2+0x6f0>
   .byte  102,69,15,91,205                    // cvtps2dq      %xmm13,%xmm9
   .byte  243,68,15,16,96,20                  // movss         0x14(%rax),%xmm12
   .byte  69,15,198,228,0                     // shufps        $0x0,%xmm12,%xmm12
@@ -25422,15 +25786,15 @@ _sk_parametric_b_sse2:
   .byte  69,15,88,209                        // addps         %xmm9,%xmm10
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,91,202                        // cvtdq2ps      %xmm10,%xmm9
-  .byte  68,15,89,13,217,36,0,0              // mulps         0x24d9(%rip),%xmm9        # 4420 <_sk_callback_sse2+0x6a7>
-  .byte  68,15,84,21,225,36,0,0              // andps         0x24e1(%rip),%xmm10        # 4430 <_sk_callback_sse2+0x6b7>
-  .byte  68,15,86,21,233,36,0,0              // orps          0x24e9(%rip),%xmm10        # 4440 <_sk_callback_sse2+0x6c7>
-  .byte  68,15,88,13,241,36,0,0              // addps         0x24f1(%rip),%xmm9        # 4450 <_sk_callback_sse2+0x6d7>
-  .byte  68,15,40,37,249,36,0,0              // movaps        0x24f9(%rip),%xmm12        # 4460 <_sk_callback_sse2+0x6e7>
+  .byte  68,15,89,13,50,37,0,0               // mulps         0x2532(%rip),%xmm9        # 4560 <_sk_callback_sse2+0x700>
+  .byte  68,15,84,21,58,37,0,0               // andps         0x253a(%rip),%xmm10        # 4570 <_sk_callback_sse2+0x710>
+  .byte  68,15,86,21,66,37,0,0               // orps          0x2542(%rip),%xmm10        # 4580 <_sk_callback_sse2+0x720>
+  .byte  68,15,88,13,74,37,0,0               // addps         0x254a(%rip),%xmm9        # 4590 <_sk_callback_sse2+0x730>
+  .byte  68,15,40,37,82,37,0,0               // movaps        0x2552(%rip),%xmm12        # 45a0 <_sk_callback_sse2+0x740>
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,88,21,249,36,0,0              // addps         0x24f9(%rip),%xmm10        # 4470 <_sk_callback_sse2+0x6f7>
-  .byte  68,15,40,37,1,37,0,0                // movaps        0x2501(%rip),%xmm12        # 4480 <_sk_callback_sse2+0x707>
+  .byte  68,15,88,21,82,37,0,0               // addps         0x2552(%rip),%xmm10        # 45b0 <_sk_callback_sse2+0x750>
+  .byte  68,15,40,37,90,37,0,0               // movaps        0x255a(%rip),%xmm12        # 45c0 <_sk_callback_sse2+0x760>
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
   .byte  69,15,89,203                        // mulps         %xmm11,%xmm9
@@ -25438,22 +25802,22 @@ _sk_parametric_b_sse2:
   .byte  69,15,91,226                        // cvtdq2ps      %xmm10,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,194,236,1                     // cmpltps       %xmm12,%xmm13
-  .byte  68,15,40,21,235,36,0,0              // movaps        0x24eb(%rip),%xmm10        # 4490 <_sk_callback_sse2+0x717>
+  .byte  68,15,40,21,68,37,0,0               // movaps        0x2544(%rip),%xmm10        # 45d0 <_sk_callback_sse2+0x770>
   .byte  69,15,84,234                        // andps         %xmm10,%xmm13
   .byte  69,15,87,219                        // xorps         %xmm11,%xmm11
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,92,236                        // subps         %xmm12,%xmm13
-  .byte  68,15,88,13,223,36,0,0              // addps         0x24df(%rip),%xmm9        # 44a0 <_sk_callback_sse2+0x727>
-  .byte  68,15,40,37,231,36,0,0              // movaps        0x24e7(%rip),%xmm12        # 44b0 <_sk_callback_sse2+0x737>
+  .byte  68,15,88,13,56,37,0,0               // addps         0x2538(%rip),%xmm9        # 45e0 <_sk_callback_sse2+0x780>
+  .byte  68,15,40,37,64,37,0,0               // movaps        0x2540(%rip),%xmm12        # 45f0 <_sk_callback_sse2+0x790>
   .byte  69,15,89,229                        // mulps         %xmm13,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,40,37,231,36,0,0              // movaps        0x24e7(%rip),%xmm12        # 44c0 <_sk_callback_sse2+0x747>
+  .byte  68,15,40,37,64,37,0,0               // movaps        0x2540(%rip),%xmm12        # 4600 <_sk_callback_sse2+0x7a0>
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
-  .byte  68,15,40,45,235,36,0,0              // movaps        0x24eb(%rip),%xmm13        # 44d0 <_sk_callback_sse2+0x757>
+  .byte  68,15,40,45,68,37,0,0               // movaps        0x2544(%rip),%xmm13        # 4610 <_sk_callback_sse2+0x7b0>
   .byte  69,15,94,236                        // divps         %xmm12,%xmm13
   .byte  69,15,88,233                        // addps         %xmm9,%xmm13
-  .byte  68,15,89,45,235,36,0,0              // mulps         0x24eb(%rip),%xmm13        # 44e0 <_sk_callback_sse2+0x767>
+  .byte  68,15,89,45,68,37,0,0               // mulps         0x2544(%rip),%xmm13        # 4620 <_sk_callback_sse2+0x7c0>
   .byte  102,69,15,91,205                    // cvtps2dq      %xmm13,%xmm9
   .byte  243,68,15,16,96,20                  // movss         0x14(%rax),%xmm12
   .byte  69,15,198,228,0                     // shufps        $0x0,%xmm12,%xmm12
@@ -25489,15 +25853,15 @@ _sk_parametric_a_sse2:
   .byte  69,15,88,209                        // addps         %xmm9,%xmm10
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,91,202                        // cvtdq2ps      %xmm10,%xmm9
-  .byte  68,15,89,13,107,36,0,0              // mulps         0x246b(%rip),%xmm9        # 44f0 <_sk_callback_sse2+0x777>
-  .byte  68,15,84,21,115,36,0,0              // andps         0x2473(%rip),%xmm10        # 4500 <_sk_callback_sse2+0x787>
-  .byte  68,15,86,21,123,36,0,0              // orps          0x247b(%rip),%xmm10        # 4510 <_sk_callback_sse2+0x797>
-  .byte  68,15,88,13,131,36,0,0              // addps         0x2483(%rip),%xmm9        # 4520 <_sk_callback_sse2+0x7a7>
-  .byte  68,15,40,37,139,36,0,0              // movaps        0x248b(%rip),%xmm12        # 4530 <_sk_callback_sse2+0x7b7>
+  .byte  68,15,89,13,196,36,0,0              // mulps         0x24c4(%rip),%xmm9        # 4630 <_sk_callback_sse2+0x7d0>
+  .byte  68,15,84,21,204,36,0,0              // andps         0x24cc(%rip),%xmm10        # 4640 <_sk_callback_sse2+0x7e0>
+  .byte  68,15,86,21,212,36,0,0              // orps          0x24d4(%rip),%xmm10        # 4650 <_sk_callback_sse2+0x7f0>
+  .byte  68,15,88,13,220,36,0,0              // addps         0x24dc(%rip),%xmm9        # 4660 <_sk_callback_sse2+0x800>
+  .byte  68,15,40,37,228,36,0,0              // movaps        0x24e4(%rip),%xmm12        # 4670 <_sk_callback_sse2+0x810>
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,88,21,139,36,0,0              // addps         0x248b(%rip),%xmm10        # 4540 <_sk_callback_sse2+0x7c7>
-  .byte  68,15,40,37,147,36,0,0              // movaps        0x2493(%rip),%xmm12        # 4550 <_sk_callback_sse2+0x7d7>
+  .byte  68,15,88,21,228,36,0,0              // addps         0x24e4(%rip),%xmm10        # 4680 <_sk_callback_sse2+0x820>
+  .byte  68,15,40,37,236,36,0,0              // movaps        0x24ec(%rip),%xmm12        # 4690 <_sk_callback_sse2+0x830>
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
   .byte  69,15,89,203                        // mulps         %xmm11,%xmm9
@@ -25505,22 +25869,22 @@ _sk_parametric_a_sse2:
   .byte  69,15,91,226                        // cvtdq2ps      %xmm10,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,194,236,1                     // cmpltps       %xmm12,%xmm13
-  .byte  68,15,40,21,125,36,0,0              // movaps        0x247d(%rip),%xmm10        # 4560 <_sk_callback_sse2+0x7e7>
+  .byte  68,15,40,21,214,36,0,0              // movaps        0x24d6(%rip),%xmm10        # 46a0 <_sk_callback_sse2+0x840>
   .byte  69,15,84,234                        // andps         %xmm10,%xmm13
   .byte  69,15,87,219                        // xorps         %xmm11,%xmm11
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,92,236                        // subps         %xmm12,%xmm13
-  .byte  68,15,88,13,113,36,0,0              // addps         0x2471(%rip),%xmm9        # 4570 <_sk_callback_sse2+0x7f7>
-  .byte  68,15,40,37,121,36,0,0              // movaps        0x2479(%rip),%xmm12        # 4580 <_sk_callback_sse2+0x807>
+  .byte  68,15,88,13,202,36,0,0              // addps         0x24ca(%rip),%xmm9        # 46b0 <_sk_callback_sse2+0x850>
+  .byte  68,15,40,37,210,36,0,0              // movaps        0x24d2(%rip),%xmm12        # 46c0 <_sk_callback_sse2+0x860>
   .byte  69,15,89,229                        // mulps         %xmm13,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,40,37,121,36,0,0              // movaps        0x2479(%rip),%xmm12        # 4590 <_sk_callback_sse2+0x817>
+  .byte  68,15,40,37,210,36,0,0              // movaps        0x24d2(%rip),%xmm12        # 46d0 <_sk_callback_sse2+0x870>
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
-  .byte  68,15,40,45,125,36,0,0              // movaps        0x247d(%rip),%xmm13        # 45a0 <_sk_callback_sse2+0x827>
+  .byte  68,15,40,45,214,36,0,0              // movaps        0x24d6(%rip),%xmm13        # 46e0 <_sk_callback_sse2+0x880>
   .byte  69,15,94,236                        // divps         %xmm12,%xmm13
   .byte  69,15,88,233                        // addps         %xmm9,%xmm13
-  .byte  68,15,89,45,125,36,0,0              // mulps         0x247d(%rip),%xmm13        # 45b0 <_sk_callback_sse2+0x837>
+  .byte  68,15,89,45,214,36,0,0              // mulps         0x24d6(%rip),%xmm13        # 46f0 <_sk_callback_sse2+0x890>
   .byte  102,69,15,91,205                    // cvtps2dq      %xmm13,%xmm9
   .byte  243,68,15,16,96,20                  // movss         0x14(%rax),%xmm12
   .byte  69,15,198,228,0                     // shufps        $0x0,%xmm12,%xmm12
@@ -25537,29 +25901,29 @@ HIDDEN _sk_lab_to_xyz_sse2
 .globl _sk_lab_to_xyz_sse2
 FUNCTION(_sk_lab_to_xyz_sse2)
 _sk_lab_to_xyz_sse2:
-  .byte  15,89,5,90,36,0,0                   // mulps         0x245a(%rip),%xmm0        # 45c0 <_sk_callback_sse2+0x847>
-  .byte  68,15,40,5,98,36,0,0                // movaps        0x2462(%rip),%xmm8        # 45d0 <_sk_callback_sse2+0x857>
+  .byte  15,89,5,179,36,0,0                  // mulps         0x24b3(%rip),%xmm0        # 4700 <_sk_callback_sse2+0x8a0>
+  .byte  68,15,40,5,187,36,0,0               // movaps        0x24bb(%rip),%xmm8        # 4710 <_sk_callback_sse2+0x8b0>
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
-  .byte  68,15,40,13,102,36,0,0              // movaps        0x2466(%rip),%xmm9        # 45e0 <_sk_callback_sse2+0x867>
+  .byte  68,15,40,13,191,36,0,0              // movaps        0x24bf(%rip),%xmm9        # 4720 <_sk_callback_sse2+0x8c0>
   .byte  65,15,88,201                        // addps         %xmm9,%xmm1
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  65,15,88,209                        // addps         %xmm9,%xmm2
-  .byte  15,88,5,99,36,0,0                   // addps         0x2463(%rip),%xmm0        # 45f0 <_sk_callback_sse2+0x877>
-  .byte  15,89,5,108,36,0,0                  // mulps         0x246c(%rip),%xmm0        # 4600 <_sk_callback_sse2+0x887>
-  .byte  15,89,13,117,36,0,0                 // mulps         0x2475(%rip),%xmm1        # 4610 <_sk_callback_sse2+0x897>
+  .byte  15,88,5,188,36,0,0                  // addps         0x24bc(%rip),%xmm0        # 4730 <_sk_callback_sse2+0x8d0>
+  .byte  15,89,5,197,36,0,0                  // mulps         0x24c5(%rip),%xmm0        # 4740 <_sk_callback_sse2+0x8e0>
+  .byte  15,89,13,206,36,0,0                 // mulps         0x24ce(%rip),%xmm1        # 4750 <_sk_callback_sse2+0x8f0>
   .byte  15,88,200                           // addps         %xmm0,%xmm1
-  .byte  15,89,21,123,36,0,0                 // mulps         0x247b(%rip),%xmm2        # 4620 <_sk_callback_sse2+0x8a7>
+  .byte  15,89,21,212,36,0,0                 // mulps         0x24d4(%rip),%xmm2        # 4760 <_sk_callback_sse2+0x900>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  68,15,92,202                        // subps         %xmm2,%xmm9
   .byte  68,15,40,225                        // movaps        %xmm1,%xmm12
   .byte  69,15,89,228                        // mulps         %xmm12,%xmm12
   .byte  68,15,89,225                        // mulps         %xmm1,%xmm12
-  .byte  15,40,21,112,36,0,0                 // movaps        0x2470(%rip),%xmm2        # 4630 <_sk_callback_sse2+0x8b7>
+  .byte  15,40,21,201,36,0,0                 // movaps        0x24c9(%rip),%xmm2        # 4770 <_sk_callback_sse2+0x910>
   .byte  68,15,40,194                        // movaps        %xmm2,%xmm8
   .byte  69,15,194,196,1                     // cmpltps       %xmm12,%xmm8
-  .byte  68,15,40,21,111,36,0,0              // movaps        0x246f(%rip),%xmm10        # 4640 <_sk_callback_sse2+0x8c7>
+  .byte  68,15,40,21,200,36,0,0              // movaps        0x24c8(%rip),%xmm10        # 4780 <_sk_callback_sse2+0x920>
   .byte  65,15,88,202                        // addps         %xmm10,%xmm1
-  .byte  68,15,40,29,115,36,0,0              // movaps        0x2473(%rip),%xmm11        # 4650 <_sk_callback_sse2+0x8d7>
+  .byte  68,15,40,29,204,36,0,0              // movaps        0x24cc(%rip),%xmm11        # 4790 <_sk_callback_sse2+0x930>
   .byte  65,15,89,203                        // mulps         %xmm11,%xmm1
   .byte  69,15,84,224                        // andps         %xmm8,%xmm12
   .byte  68,15,85,193                        // andnps        %xmm1,%xmm8
@@ -25583,8 +25947,8 @@ _sk_lab_to_xyz_sse2:
   .byte  15,84,194                           // andps         %xmm2,%xmm0
   .byte  65,15,85,209                        // andnps        %xmm9,%xmm2
   .byte  15,86,208                           // orps          %xmm0,%xmm2
-  .byte  68,15,89,5,35,36,0,0                // mulps         0x2423(%rip),%xmm8        # 4660 <_sk_callback_sse2+0x8e7>
-  .byte  15,89,21,44,36,0,0                  // mulps         0x242c(%rip),%xmm2        # 4670 <_sk_callback_sse2+0x8f7>
+  .byte  68,15,89,5,124,36,0,0               // mulps         0x247c(%rip),%xmm8        # 47a0 <_sk_callback_sse2+0x940>
+  .byte  15,89,21,133,36,0,0                 // mulps         0x2485(%rip),%xmm2        # 47b0 <_sk_callback_sse2+0x950>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  255,224                             // jmpq          *%rax
@@ -25600,7 +25964,7 @@ _sk_load_a8_sse2:
   .byte  102,15,96,193                       // punpcklbw     %xmm1,%xmm0
   .byte  102,15,97,193                       // punpcklwd     %xmm1,%xmm0
   .byte  15,91,216                           // cvtdq2ps      %xmm0,%xmm3
-  .byte  15,89,29,20,36,0,0                  // mulps         0x2414(%rip),%xmm3        # 4680 <_sk_callback_sse2+0x907>
+  .byte  15,89,29,109,36,0,0                 // mulps         0x246d(%rip),%xmm3        # 47c0 <_sk_callback_sse2+0x960>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
@@ -25645,7 +26009,7 @@ _sk_gather_a8_sse2:
   .byte  102,15,96,193                       // punpcklbw     %xmm1,%xmm0
   .byte  102,15,97,193                       // punpcklwd     %xmm1,%xmm0
   .byte  15,91,216                           // cvtdq2ps      %xmm0,%xmm3
-  .byte  15,89,29,131,35,0,0                 // mulps         0x2383(%rip),%xmm3        # 4690 <_sk_callback_sse2+0x917>
+  .byte  15,89,29,220,35,0,0                 // mulps         0x23dc(%rip),%xmm3        # 47d0 <_sk_callback_sse2+0x970>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
@@ -25658,7 +26022,7 @@ FUNCTION(_sk_store_a8_sse2)
 _sk_store_a8_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,119,35,0,0               // movaps        0x2377(%rip),%xmm8        # 46a0 <_sk_callback_sse2+0x927>
+  .byte  68,15,40,5,208,35,0,0               // movaps        0x23d0(%rip),%xmm8        # 47e0 <_sk_callback_sse2+0x980>
   .byte  68,15,89,195                        // mulps         %xmm3,%xmm8
   .byte  102,69,15,91,192                    // cvtps2dq      %xmm8,%xmm8
   .byte  102,65,15,114,240,16                // pslld         $0x10,%xmm8
@@ -25680,9 +26044,9 @@ _sk_load_g8_sse2:
   .byte  102,15,96,193                       // punpcklbw     %xmm1,%xmm0
   .byte  102,15,97,193                       // punpcklwd     %xmm1,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,62,35,0,0                   // mulps         0x233e(%rip),%xmm0        # 46b0 <_sk_callback_sse2+0x937>
+  .byte  15,89,5,151,35,0,0                  // mulps         0x2397(%rip),%xmm0        # 47f0 <_sk_callback_sse2+0x990>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,69,35,0,0                  // movaps        0x2345(%rip),%xmm3        # 46c0 <_sk_callback_sse2+0x947>
+  .byte  15,40,29,158,35,0,0                 // movaps        0x239e(%rip),%xmm3        # 4800 <_sk_callback_sse2+0x9a0>
   .byte  15,40,200                           // movaps        %xmm0,%xmm1
   .byte  15,40,208                           // movaps        %xmm0,%xmm2
   .byte  255,224                             // jmpq          *%rax
@@ -25725,9 +26089,9 @@ _sk_gather_g8_sse2:
   .byte  102,15,96,193                       // punpcklbw     %xmm1,%xmm0
   .byte  102,15,97,193                       // punpcklwd     %xmm1,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,186,34,0,0                  // mulps         0x22ba(%rip),%xmm0        # 46d0 <_sk_callback_sse2+0x957>
+  .byte  15,89,5,19,35,0,0                   // mulps         0x2313(%rip),%xmm0        # 4810 <_sk_callback_sse2+0x9b0>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,193,34,0,0                 // movaps        0x22c1(%rip),%xmm3        # 46e0 <_sk_callback_sse2+0x967>
+  .byte  15,40,29,26,35,0,0                  // movaps        0x231a(%rip),%xmm3        # 4820 <_sk_callback_sse2+0x9c0>
   .byte  15,40,200                           // movaps        %xmm0,%xmm1
   .byte  15,40,208                           // movaps        %xmm0,%xmm2
   .byte  255,224                             // jmpq          *%rax
@@ -25739,9 +26103,9 @@ _sk_gather_i8_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  73,137,192                          // mov           %rax,%r8
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  116,5                               // je            2436 <_sk_gather_i8_sse2+0xf>
+  .byte  116,5                               // je            251d <_sk_gather_i8_sse2+0xf>
   .byte  76,137,192                          // mov           %r8,%rax
-  .byte  235,2                               // jmp           2438 <_sk_gather_i8_sse2+0x11>
+  .byte  235,2                               // jmp           251f <_sk_gather_i8_sse2+0x11>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  243,15,91,201                       // cvttps2dq     %xmm1,%xmm1
@@ -25790,11 +26154,11 @@ _sk_gather_i8_sse2:
   .byte  102,67,15,110,12,136                // movd          (%r8,%r9,4),%xmm1
   .byte  102,68,15,98,201                    // punpckldq     %xmm1,%xmm9
   .byte  102,68,15,98,200                    // punpckldq     %xmm0,%xmm9
-  .byte  102,15,111,21,224,33,0,0            // movdqa        0x21e0(%rip),%xmm2        # 46f0 <_sk_callback_sse2+0x977>
+  .byte  102,15,111,21,57,34,0,0             // movdqa        0x2239(%rip),%xmm2        # 4830 <_sk_callback_sse2+0x9d0>
   .byte  102,65,15,111,193                   // movdqa        %xmm9,%xmm0
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,220,33,0,0               // movaps        0x21dc(%rip),%xmm8        # 4700 <_sk_callback_sse2+0x987>
+  .byte  68,15,40,5,53,34,0,0                // movaps        0x2235(%rip),%xmm8        # 4840 <_sk_callback_sse2+0x9e0>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,65,15,111,201                   // movdqa        %xmm9,%xmm1
   .byte  102,15,114,209,8                    // psrld         $0x8,%xmm1
@@ -25821,19 +26185,19 @@ _sk_load_565_sse2:
   .byte  243,15,126,20,120                   // movq          (%rax,%rdi,2),%xmm2
   .byte  102,15,239,192                      // pxor          %xmm0,%xmm0
   .byte  102,15,97,208                       // punpcklwd     %xmm0,%xmm2
-  .byte  102,15,111,5,146,33,0,0             // movdqa        0x2192(%rip),%xmm0        # 4710 <_sk_callback_sse2+0x997>
+  .byte  102,15,111,5,235,33,0,0             // movdqa        0x21eb(%rip),%xmm0        # 4850 <_sk_callback_sse2+0x9f0>
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,148,33,0,0                  // mulps         0x2194(%rip),%xmm0        # 4720 <_sk_callback_sse2+0x9a7>
-  .byte  102,15,111,13,156,33,0,0            // movdqa        0x219c(%rip),%xmm1        # 4730 <_sk_callback_sse2+0x9b7>
+  .byte  15,89,5,237,33,0,0                  // mulps         0x21ed(%rip),%xmm0        # 4860 <_sk_callback_sse2+0xa00>
+  .byte  102,15,111,13,245,33,0,0            // movdqa        0x21f5(%rip),%xmm1        # 4870 <_sk_callback_sse2+0xa10>
   .byte  102,15,219,202                      // pand          %xmm2,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,158,33,0,0                 // mulps         0x219e(%rip),%xmm1        # 4740 <_sk_callback_sse2+0x9c7>
-  .byte  102,15,219,21,166,33,0,0            // pand          0x21a6(%rip),%xmm2        # 4750 <_sk_callback_sse2+0x9d7>
+  .byte  15,89,13,247,33,0,0                 // mulps         0x21f7(%rip),%xmm1        # 4880 <_sk_callback_sse2+0xa20>
+  .byte  102,15,219,21,255,33,0,0            // pand          0x21ff(%rip),%xmm2        # 4890 <_sk_callback_sse2+0xa30>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,172,33,0,0                 // mulps         0x21ac(%rip),%xmm2        # 4760 <_sk_callback_sse2+0x9e7>
+  .byte  15,89,21,5,34,0,0                   // mulps         0x2205(%rip),%xmm2        # 48a0 <_sk_callback_sse2+0xa40>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,179,33,0,0                 // movaps        0x21b3(%rip),%xmm3        # 4770 <_sk_callback_sse2+0x9f7>
+  .byte  15,40,29,12,34,0,0                  // movaps        0x220c(%rip),%xmm3        # 48b0 <_sk_callback_sse2+0xa50>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_gather_565_sse2
@@ -25868,19 +26232,19 @@ _sk_gather_565_sse2:
   .byte  102,15,196,208,3                    // pinsrw        $0x3,%eax,%xmm2
   .byte  102,15,239,192                      // pxor          %xmm0,%xmm0
   .byte  102,15,97,208                       // punpcklwd     %xmm0,%xmm2
-  .byte  102,15,111,5,60,33,0,0              // movdqa        0x213c(%rip),%xmm0        # 4780 <_sk_callback_sse2+0xa07>
+  .byte  102,15,111,5,149,33,0,0             // movdqa        0x2195(%rip),%xmm0        # 48c0 <_sk_callback_sse2+0xa60>
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,62,33,0,0                   // mulps         0x213e(%rip),%xmm0        # 4790 <_sk_callback_sse2+0xa17>
-  .byte  102,15,111,13,70,33,0,0             // movdqa        0x2146(%rip),%xmm1        # 47a0 <_sk_callback_sse2+0xa27>
+  .byte  15,89,5,151,33,0,0                  // mulps         0x2197(%rip),%xmm0        # 48d0 <_sk_callback_sse2+0xa70>
+  .byte  102,15,111,13,159,33,0,0            // movdqa        0x219f(%rip),%xmm1        # 48e0 <_sk_callback_sse2+0xa80>
   .byte  102,15,219,202                      // pand          %xmm2,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,72,33,0,0                  // mulps         0x2148(%rip),%xmm1        # 47b0 <_sk_callback_sse2+0xa37>
-  .byte  102,15,219,21,80,33,0,0             // pand          0x2150(%rip),%xmm2        # 47c0 <_sk_callback_sse2+0xa47>
+  .byte  15,89,13,161,33,0,0                 // mulps         0x21a1(%rip),%xmm1        # 48f0 <_sk_callback_sse2+0xa90>
+  .byte  102,15,219,21,169,33,0,0            // pand          0x21a9(%rip),%xmm2        # 4900 <_sk_callback_sse2+0xaa0>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,86,33,0,0                  // mulps         0x2156(%rip),%xmm2        # 47d0 <_sk_callback_sse2+0xa57>
+  .byte  15,89,21,175,33,0,0                 // mulps         0x21af(%rip),%xmm2        # 4910 <_sk_callback_sse2+0xab0>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,93,33,0,0                  // movaps        0x215d(%rip),%xmm3        # 47e0 <_sk_callback_sse2+0xa67>
+  .byte  15,40,29,182,33,0,0                 // movaps        0x21b6(%rip),%xmm3        # 4920 <_sk_callback_sse2+0xac0>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_store_565_sse2
@@ -25889,12 +26253,12 @@ FUNCTION(_sk_store_565_sse2)
 _sk_store_565_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,94,33,0,0                // movaps        0x215e(%rip),%xmm8        # 47f0 <_sk_callback_sse2+0xa77>
+  .byte  68,15,40,5,183,33,0,0               // movaps        0x21b7(%rip),%xmm8        # 4930 <_sk_callback_sse2+0xad0>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
   .byte  102,65,15,114,241,11                // pslld         $0xb,%xmm9
-  .byte  68,15,40,21,83,33,0,0               // movaps        0x2153(%rip),%xmm10        # 4800 <_sk_callback_sse2+0xa87>
+  .byte  68,15,40,21,172,33,0,0              // movaps        0x21ac(%rip),%xmm10        # 4940 <_sk_callback_sse2+0xae0>
   .byte  68,15,89,209                        // mulps         %xmm1,%xmm10
   .byte  102,69,15,91,210                    // cvtps2dq      %xmm10,%xmm10
   .byte  102,65,15,114,242,5                 // pslld         $0x5,%xmm10
@@ -25918,21 +26282,21 @@ _sk_load_4444_sse2:
   .byte  243,15,126,28,120                   // movq          (%rax,%rdi,2),%xmm3
   .byte  102,15,239,192                      // pxor          %xmm0,%xmm0
   .byte  102,15,97,216                       // punpcklwd     %xmm0,%xmm3
-  .byte  102,15,111,5,12,33,0,0              // movdqa        0x210c(%rip),%xmm0        # 4810 <_sk_callback_sse2+0xa97>
+  .byte  102,15,111,5,101,33,0,0             // movdqa        0x2165(%rip),%xmm0        # 4950 <_sk_callback_sse2+0xaf0>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,14,33,0,0                   // mulps         0x210e(%rip),%xmm0        # 4820 <_sk_callback_sse2+0xaa7>
-  .byte  102,15,111,13,22,33,0,0             // movdqa        0x2116(%rip),%xmm1        # 4830 <_sk_callback_sse2+0xab7>
+  .byte  15,89,5,103,33,0,0                  // mulps         0x2167(%rip),%xmm0        # 4960 <_sk_callback_sse2+0xb00>
+  .byte  102,15,111,13,111,33,0,0            // movdqa        0x216f(%rip),%xmm1        # 4970 <_sk_callback_sse2+0xb10>
   .byte  102,15,219,203                      // pand          %xmm3,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,24,33,0,0                  // mulps         0x2118(%rip),%xmm1        # 4840 <_sk_callback_sse2+0xac7>
-  .byte  102,15,111,21,32,33,0,0             // movdqa        0x2120(%rip),%xmm2        # 4850 <_sk_callback_sse2+0xad7>
+  .byte  15,89,13,113,33,0,0                 // mulps         0x2171(%rip),%xmm1        # 4980 <_sk_callback_sse2+0xb20>
+  .byte  102,15,111,21,121,33,0,0            // movdqa        0x2179(%rip),%xmm2        # 4990 <_sk_callback_sse2+0xb30>
   .byte  102,15,219,211                      // pand          %xmm3,%xmm2
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,34,33,0,0                  // mulps         0x2122(%rip),%xmm2        # 4860 <_sk_callback_sse2+0xae7>
-  .byte  102,15,219,29,42,33,0,0             // pand          0x212a(%rip),%xmm3        # 4870 <_sk_callback_sse2+0xaf7>
+  .byte  15,89,21,123,33,0,0                 // mulps         0x217b(%rip),%xmm2        # 49a0 <_sk_callback_sse2+0xb40>
+  .byte  102,15,219,29,131,33,0,0            // pand          0x2183(%rip),%xmm3        # 49b0 <_sk_callback_sse2+0xb50>
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,48,33,0,0                  // mulps         0x2130(%rip),%xmm3        # 4880 <_sk_callback_sse2+0xb07>
+  .byte  15,89,29,137,33,0,0                 // mulps         0x2189(%rip),%xmm3        # 49c0 <_sk_callback_sse2+0xb60>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -25968,21 +26332,21 @@ _sk_gather_4444_sse2:
   .byte  102,15,196,216,3                    // pinsrw        $0x3,%eax,%xmm3
   .byte  102,15,239,192                      // pxor          %xmm0,%xmm0
   .byte  102,15,97,216                       // punpcklwd     %xmm0,%xmm3
-  .byte  102,15,111,5,183,32,0,0             // movdqa        0x20b7(%rip),%xmm0        # 4890 <_sk_callback_sse2+0xb17>
+  .byte  102,15,111,5,16,33,0,0              // movdqa        0x2110(%rip),%xmm0        # 49d0 <_sk_callback_sse2+0xb70>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,185,32,0,0                  // mulps         0x20b9(%rip),%xmm0        # 48a0 <_sk_callback_sse2+0xb27>
-  .byte  102,15,111,13,193,32,0,0            // movdqa        0x20c1(%rip),%xmm1        # 48b0 <_sk_callback_sse2+0xb37>
+  .byte  15,89,5,18,33,0,0                   // mulps         0x2112(%rip),%xmm0        # 49e0 <_sk_callback_sse2+0xb80>
+  .byte  102,15,111,13,26,33,0,0             // movdqa        0x211a(%rip),%xmm1        # 49f0 <_sk_callback_sse2+0xb90>
   .byte  102,15,219,203                      // pand          %xmm3,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,195,32,0,0                 // mulps         0x20c3(%rip),%xmm1        # 48c0 <_sk_callback_sse2+0xb47>
-  .byte  102,15,111,21,203,32,0,0            // movdqa        0x20cb(%rip),%xmm2        # 48d0 <_sk_callback_sse2+0xb57>
+  .byte  15,89,13,28,33,0,0                  // mulps         0x211c(%rip),%xmm1        # 4a00 <_sk_callback_sse2+0xba0>
+  .byte  102,15,111,21,36,33,0,0             // movdqa        0x2124(%rip),%xmm2        # 4a10 <_sk_callback_sse2+0xbb0>
   .byte  102,15,219,211                      // pand          %xmm3,%xmm2
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,205,32,0,0                 // mulps         0x20cd(%rip),%xmm2        # 48e0 <_sk_callback_sse2+0xb67>
-  .byte  102,15,219,29,213,32,0,0            // pand          0x20d5(%rip),%xmm3        # 48f0 <_sk_callback_sse2+0xb77>
+  .byte  15,89,21,38,33,0,0                  // mulps         0x2126(%rip),%xmm2        # 4a20 <_sk_callback_sse2+0xbc0>
+  .byte  102,15,219,29,46,33,0,0             // pand          0x212e(%rip),%xmm3        # 4a30 <_sk_callback_sse2+0xbd0>
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,219,32,0,0                 // mulps         0x20db(%rip),%xmm3        # 4900 <_sk_callback_sse2+0xb87>
+  .byte  15,89,29,52,33,0,0                  // mulps         0x2134(%rip),%xmm3        # 4a40 <_sk_callback_sse2+0xbe0>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -25992,7 +26356,7 @@ FUNCTION(_sk_store_4444_sse2)
 _sk_store_4444_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,218,32,0,0               // movaps        0x20da(%rip),%xmm8        # 4910 <_sk_callback_sse2+0xb97>
+  .byte  68,15,40,5,51,33,0,0                // movaps        0x2133(%rip),%xmm8        # 4a50 <_sk_callback_sse2+0xbf0>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
@@ -26024,11 +26388,11 @@ _sk_load_8888_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  68,15,16,12,184                     // movups        (%rax,%rdi,4),%xmm9
-  .byte  15,40,21,109,32,0,0                 // movaps        0x206d(%rip),%xmm2        # 4920 <_sk_callback_sse2+0xba7>
+  .byte  15,40,21,198,32,0,0                 // movaps        0x20c6(%rip),%xmm2        # 4a60 <_sk_callback_sse2+0xc00>
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  15,84,194                           // andps         %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,107,32,0,0               // movaps        0x206b(%rip),%xmm8        # 4930 <_sk_callback_sse2+0xbb7>
+  .byte  68,15,40,5,196,32,0,0               // movaps        0x20c4(%rip),%xmm8        # 4a70 <_sk_callback_sse2+0xc10>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  65,15,40,201                        // movaps        %xmm9,%xmm1
   .byte  102,15,114,209,8                    // psrld         $0x8,%xmm1
@@ -26077,11 +26441,11 @@ _sk_gather_8888_sse2:
   .byte  102,67,15,110,12,129                // movd          (%r9,%r8,4),%xmm1
   .byte  102,68,15,98,201                    // punpckldq     %xmm1,%xmm9
   .byte  102,68,15,98,200                    // punpckldq     %xmm0,%xmm9
-  .byte  102,15,111,21,188,31,0,0            // movdqa        0x1fbc(%rip),%xmm2        # 4940 <_sk_callback_sse2+0xbc7>
+  .byte  102,15,111,21,21,32,0,0             // movdqa        0x2015(%rip),%xmm2        # 4a80 <_sk_callback_sse2+0xc20>
   .byte  102,65,15,111,193                   // movdqa        %xmm9,%xmm0
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,184,31,0,0               // movaps        0x1fb8(%rip),%xmm8        # 4950 <_sk_callback_sse2+0xbd7>
+  .byte  68,15,40,5,17,32,0,0                // movaps        0x2011(%rip),%xmm8        # 4a90 <_sk_callback_sse2+0xc30>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,65,15,111,201                   // movdqa        %xmm9,%xmm1
   .byte  102,15,114,209,8                    // psrld         $0x8,%xmm1
@@ -26105,7 +26469,7 @@ FUNCTION(_sk_store_8888_sse2)
 _sk_store_8888_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,123,31,0,0               // movaps        0x1f7b(%rip),%xmm8        # 4960 <_sk_callback_sse2+0xbe7>
+  .byte  68,15,40,5,212,31,0,0               // movaps        0x1fd4(%rip),%xmm8        # 4aa0 <_sk_callback_sse2+0xc40>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
@@ -26144,7 +26508,7 @@ _sk_load_f16_sse2:
   .byte  102,69,15,239,210                   // pxor          %xmm10,%xmm10
   .byte  102,65,15,111,206                   // movdqa        %xmm14,%xmm1
   .byte  102,65,15,97,202                    // punpcklwd     %xmm10,%xmm1
-  .byte  102,68,15,111,13,235,30,0,0         // movdqa        0x1eeb(%rip),%xmm9        # 4970 <_sk_callback_sse2+0xbf7>
+  .byte  102,68,15,111,13,68,31,0,0          // movdqa        0x1f44(%rip),%xmm9        # 4ab0 <_sk_callback_sse2+0xc50>
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,65,15,219,193                   // pand          %xmm9,%xmm0
   .byte  102,15,239,200                      // pxor          %xmm0,%xmm1
@@ -26152,11 +26516,11 @@ _sk_load_f16_sse2:
   .byte  102,68,15,111,233                   // movdqa        %xmm1,%xmm13
   .byte  102,65,15,114,245,13                // pslld         $0xd,%xmm13
   .byte  102,68,15,235,232                   // por           %xmm0,%xmm13
-  .byte  102,68,15,111,29,208,30,0,0         // movdqa        0x1ed0(%rip),%xmm11        # 4980 <_sk_callback_sse2+0xc07>
+  .byte  102,68,15,111,29,41,31,0,0          // movdqa        0x1f29(%rip),%xmm11        # 4ac0 <_sk_callback_sse2+0xc60>
   .byte  102,69,15,254,235                   // paddd         %xmm11,%xmm13
-  .byte  102,68,15,111,37,210,30,0,0         // movdqa        0x1ed2(%rip),%xmm12        # 4990 <_sk_callback_sse2+0xc17>
+  .byte  102,68,15,111,37,43,31,0,0          // movdqa        0x1f2b(%rip),%xmm12        # 4ad0 <_sk_callback_sse2+0xc70>
   .byte  102,65,15,239,204                   // pxor          %xmm12,%xmm1
-  .byte  102,15,111,29,213,30,0,0            // movdqa        0x1ed5(%rip),%xmm3        # 49a0 <_sk_callback_sse2+0xc27>
+  .byte  102,15,111,29,46,31,0,0             // movdqa        0x1f2e(%rip),%xmm3        # 4ae0 <_sk_callback_sse2+0xc80>
   .byte  102,15,111,195                      // movdqa        %xmm3,%xmm0
   .byte  102,15,102,193                      // pcmpgtd       %xmm1,%xmm0
   .byte  102,65,15,223,197                   // pandn         %xmm13,%xmm0
@@ -26242,7 +26606,7 @@ _sk_gather_f16_sse2:
   .byte  102,69,15,239,210                   // pxor          %xmm10,%xmm10
   .byte  102,65,15,111,206                   // movdqa        %xmm14,%xmm1
   .byte  102,65,15,97,202                    // punpcklwd     %xmm10,%xmm1
-  .byte  102,68,15,111,13,99,29,0,0          // movdqa        0x1d63(%rip),%xmm9        # 49b0 <_sk_callback_sse2+0xc37>
+  .byte  102,68,15,111,13,188,29,0,0         // movdqa        0x1dbc(%rip),%xmm9        # 4af0 <_sk_callback_sse2+0xc90>
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,65,15,219,193                   // pand          %xmm9,%xmm0
   .byte  102,15,239,200                      // pxor          %xmm0,%xmm1
@@ -26250,11 +26614,11 @@ _sk_gather_f16_sse2:
   .byte  102,68,15,111,233                   // movdqa        %xmm1,%xmm13
   .byte  102,65,15,114,245,13                // pslld         $0xd,%xmm13
   .byte  102,68,15,235,232                   // por           %xmm0,%xmm13
-  .byte  102,68,15,111,29,72,29,0,0          // movdqa        0x1d48(%rip),%xmm11        # 49c0 <_sk_callback_sse2+0xc47>
+  .byte  102,68,15,111,29,161,29,0,0         // movdqa        0x1da1(%rip),%xmm11        # 4b00 <_sk_callback_sse2+0xca0>
   .byte  102,69,15,254,235                   // paddd         %xmm11,%xmm13
-  .byte  102,68,15,111,37,74,29,0,0          // movdqa        0x1d4a(%rip),%xmm12        # 49d0 <_sk_callback_sse2+0xc57>
+  .byte  102,68,15,111,37,163,29,0,0         // movdqa        0x1da3(%rip),%xmm12        # 4b10 <_sk_callback_sse2+0xcb0>
   .byte  102,65,15,239,204                   // pxor          %xmm12,%xmm1
-  .byte  102,15,111,29,77,29,0,0             // movdqa        0x1d4d(%rip),%xmm3        # 49e0 <_sk_callback_sse2+0xc67>
+  .byte  102,15,111,29,166,29,0,0            // movdqa        0x1da6(%rip),%xmm3        # 4b20 <_sk_callback_sse2+0xcc0>
   .byte  102,15,111,195                      // movdqa        %xmm3,%xmm0
   .byte  102,15,102,193                      // pcmpgtd       %xmm1,%xmm0
   .byte  102,65,15,223,197                   // pandn         %xmm13,%xmm0
@@ -26307,17 +26671,17 @@ FUNCTION(_sk_store_f16_sse2)
 _sk_store_f16_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  102,68,15,111,21,117,28,0,0         // movdqa        0x1c75(%rip),%xmm10        # 49f0 <_sk_callback_sse2+0xc77>
+  .byte  102,68,15,111,21,206,28,0,0         // movdqa        0x1cce(%rip),%xmm10        # 4b30 <_sk_callback_sse2+0xcd0>
   .byte  102,68,15,111,224                   // movdqa        %xmm0,%xmm12
   .byte  102,68,15,111,232                   // movdqa        %xmm0,%xmm13
   .byte  102,69,15,219,234                   // pand          %xmm10,%xmm13
   .byte  102,69,15,239,229                   // pxor          %xmm13,%xmm12
-  .byte  102,68,15,111,13,104,28,0,0         // movdqa        0x1c68(%rip),%xmm9        # 4a00 <_sk_callback_sse2+0xc87>
+  .byte  102,68,15,111,13,193,28,0,0         // movdqa        0x1cc1(%rip),%xmm9        # 4b40 <_sk_callback_sse2+0xce0>
   .byte  102,65,15,114,213,16                // psrld         $0x10,%xmm13
   .byte  102,69,15,111,193                   // movdqa        %xmm9,%xmm8
   .byte  102,69,15,102,196                   // pcmpgtd       %xmm12,%xmm8
   .byte  102,65,15,114,212,13                // psrld         $0xd,%xmm12
-  .byte  102,68,15,111,29,89,28,0,0          // movdqa        0x1c59(%rip),%xmm11        # 4a10 <_sk_callback_sse2+0xc97>
+  .byte  102,68,15,111,29,178,28,0,0         // movdqa        0x1cb2(%rip),%xmm11        # 4b50 <_sk_callback_sse2+0xcf0>
   .byte  102,69,15,235,235                   // por           %xmm11,%xmm13
   .byte  102,69,15,254,236                   // paddd         %xmm12,%xmm13
   .byte  102,65,15,114,245,16                // pslld         $0x10,%xmm13
@@ -26396,7 +26760,7 @@ _sk_load_u16_be_sse2:
   .byte  102,69,15,239,201                   // pxor          %xmm9,%xmm9
   .byte  102,65,15,97,201                    // punpcklwd     %xmm9,%xmm1
   .byte  15,91,193                           // cvtdq2ps      %xmm1,%xmm0
-  .byte  68,15,40,5,247,26,0,0               // movaps        0x1af7(%rip),%xmm8        # 4a20 <_sk_callback_sse2+0xca7>
+  .byte  68,15,40,5,80,27,0,0                // movaps        0x1b50(%rip),%xmm8        # 4b60 <_sk_callback_sse2+0xd00>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
   .byte  102,15,113,241,8                    // psllw         $0x8,%xmm1
@@ -26449,7 +26813,7 @@ _sk_load_rgb_u16_be_sse2:
   .byte  102,69,15,239,192                   // pxor          %xmm8,%xmm8
   .byte  102,65,15,97,192                    // punpcklwd     %xmm8,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,13,51,26,0,0               // movaps        0x1a33(%rip),%xmm9        # 4a30 <_sk_callback_sse2+0xcb7>
+  .byte  68,15,40,13,140,26,0,0              // movaps        0x1a8c(%rip),%xmm9        # 4b70 <_sk_callback_sse2+0xd10>
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
   .byte  102,15,113,241,8                    // psllw         $0x8,%xmm1
@@ -26466,7 +26830,7 @@ _sk_load_rgb_u16_be_sse2:
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
   .byte  65,15,89,209                        // mulps         %xmm9,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,250,25,0,0                 // movaps        0x19fa(%rip),%xmm3        # 4a40 <_sk_callback_sse2+0xcc7>
+  .byte  15,40,29,83,26,0,0                  // movaps        0x1a53(%rip),%xmm3        # 4b80 <_sk_callback_sse2+0xd20>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_store_u16_be_sse2
@@ -26475,7 +26839,7 @@ FUNCTION(_sk_store_u16_be_sse2)
 _sk_store_u16_be_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,13,251,25,0,0              // movaps        0x19fb(%rip),%xmm9        # 4a50 <_sk_callback_sse2+0xcd7>
+  .byte  68,15,40,13,84,26,0,0               // movaps        0x1a54(%rip),%xmm9        # 4b90 <_sk_callback_sse2+0xd30>
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  102,69,15,91,192                    // cvtps2dq      %xmm8,%xmm8
@@ -26625,7 +26989,7 @@ _sk_repeat_x_sse2:
   .byte  243,69,15,91,209                    // cvttps2dq     %xmm9,%xmm10
   .byte  69,15,91,210                        // cvtdq2ps      %xmm10,%xmm10
   .byte  69,15,194,202,1                     // cmpltps       %xmm10,%xmm9
-  .byte  68,15,84,13,229,23,0,0              // andps         0x17e5(%rip),%xmm9        # 4a60 <_sk_callback_sse2+0xce7>
+  .byte  68,15,84,13,62,24,0,0               // andps         0x183e(%rip),%xmm9        # 4ba0 <_sk_callback_sse2+0xd40>
   .byte  69,15,92,209                        // subps         %xmm9,%xmm10
   .byte  69,15,89,208                        // mulps         %xmm8,%xmm10
   .byte  65,15,92,194                        // subps         %xmm10,%xmm0
@@ -26647,7 +27011,7 @@ _sk_repeat_y_sse2:
   .byte  243,69,15,91,209                    // cvttps2dq     %xmm9,%xmm10
   .byte  69,15,91,210                        // cvtdq2ps      %xmm10,%xmm10
   .byte  69,15,194,202,1                     // cmpltps       %xmm10,%xmm9
-  .byte  68,15,84,13,173,23,0,0              // andps         0x17ad(%rip),%xmm9        # 4a70 <_sk_callback_sse2+0xcf7>
+  .byte  68,15,84,13,6,24,0,0                // andps         0x1806(%rip),%xmm9        # 4bb0 <_sk_callback_sse2+0xd50>
   .byte  69,15,92,209                        // subps         %xmm9,%xmm10
   .byte  69,15,89,208                        // mulps         %xmm8,%xmm10
   .byte  65,15,92,202                        // subps         %xmm10,%xmm1
@@ -26673,7 +27037,7 @@ _sk_mirror_x_sse2:
   .byte  243,69,15,91,218                    // cvttps2dq     %xmm10,%xmm11
   .byte  69,15,91,219                        // cvtdq2ps      %xmm11,%xmm11
   .byte  69,15,194,211,1                     // cmpltps       %xmm11,%xmm10
-  .byte  68,15,84,21,99,23,0,0               // andps         0x1763(%rip),%xmm10        # 4a80 <_sk_callback_sse2+0xd07>
+  .byte  68,15,84,21,188,23,0,0              // andps         0x17bc(%rip),%xmm10        # 4bc0 <_sk_callback_sse2+0xd60>
   .byte  69,15,87,228                        // xorps         %xmm12,%xmm12
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
   .byte  69,15,89,216                        // mulps         %xmm8,%xmm11
@@ -26703,7 +27067,7 @@ _sk_mirror_y_sse2:
   .byte  243,69,15,91,218                    // cvttps2dq     %xmm10,%xmm11
   .byte  69,15,91,219                        // cvtdq2ps      %xmm11,%xmm11
   .byte  69,15,194,211,1                     // cmpltps       %xmm11,%xmm10
-  .byte  68,15,84,21,9,23,0,0                // andps         0x1709(%rip),%xmm10        # 4a90 <_sk_callback_sse2+0xd17>
+  .byte  68,15,84,21,98,23,0,0               // andps         0x1762(%rip),%xmm10        # 4bd0 <_sk_callback_sse2+0xd70>
   .byte  69,15,87,228                        // xorps         %xmm12,%xmm12
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
   .byte  69,15,89,216                        // mulps         %xmm8,%xmm11
@@ -26722,10 +27086,10 @@ HIDDEN _sk_luminance_to_alpha_sse2
 FUNCTION(_sk_luminance_to_alpha_sse2)
 _sk_luminance_to_alpha_sse2:
   .byte  15,40,218                           // movaps        %xmm2,%xmm3
-  .byte  15,89,5,225,22,0,0                  // mulps         0x16e1(%rip),%xmm0        # 4aa0 <_sk_callback_sse2+0xd27>
-  .byte  15,89,13,234,22,0,0                 // mulps         0x16ea(%rip),%xmm1        # 4ab0 <_sk_callback_sse2+0xd37>
+  .byte  15,89,5,58,23,0,0                   // mulps         0x173a(%rip),%xmm0        # 4be0 <_sk_callback_sse2+0xd80>
+  .byte  15,89,13,67,23,0,0                  // mulps         0x1743(%rip),%xmm1        # 4bf0 <_sk_callback_sse2+0xd90>
   .byte  15,88,200                           // addps         %xmm0,%xmm1
-  .byte  15,89,29,240,22,0,0                 // mulps         0x16f0(%rip),%xmm3        # 4ac0 <_sk_callback_sse2+0xd47>
+  .byte  15,89,29,73,23,0,0                  // mulps         0x1749(%rip),%xmm3        # 4c00 <_sk_callback_sse2+0xda0>
   .byte  15,88,217                           // addps         %xmm1,%xmm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
@@ -26958,7 +27322,7 @@ _sk_linear_gradient_sse2:
   .byte  69,15,198,228,0                     // shufps        $0x0,%xmm12,%xmm12
   .byte  72,139,8                            // mov           (%rax),%rcx
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,132,15,1,0,0                     // je            3883 <_sk_linear_gradient_sse2+0x149>
+  .byte  15,132,15,1,0,0                     // je            396a <_sk_linear_gradient_sse2+0x149>
   .byte  72,139,64,8                         // mov           0x8(%rax),%rax
   .byte  72,131,192,32                       // add           $0x20,%rax
   .byte  69,15,87,192                        // xorps         %xmm8,%xmm8
@@ -27019,8 +27383,8 @@ _sk_linear_gradient_sse2:
   .byte  69,15,86,231                        // orps          %xmm15,%xmm12
   .byte  72,131,192,36                       // add           $0x24,%rax
   .byte  72,255,201                          // dec           %rcx
-  .byte  15,133,8,255,255,255                // jne           3789 <_sk_linear_gradient_sse2+0x4f>
-  .byte  235,13                              // jmp           3890 <_sk_linear_gradient_sse2+0x156>
+  .byte  15,133,8,255,255,255                // jne           3870 <_sk_linear_gradient_sse2+0x4f>
+  .byte  235,13                              // jmp           3977 <_sk_linear_gradient_sse2+0x156>
   .byte  15,87,201                           // xorps         %xmm1,%xmm1
   .byte  15,87,210                           // xorps         %xmm2,%xmm2
   .byte  15,87,219                           // xorps         %xmm3,%xmm3
@@ -27089,29 +27453,29 @@ _sk_xy_to_polar_unit_sse2:
   .byte  69,15,94,220                        // divps         %xmm12,%xmm11
   .byte  69,15,40,227                        // movaps        %xmm11,%xmm12
   .byte  69,15,89,228                        // mulps         %xmm12,%xmm12
-  .byte  68,15,40,45,104,17,0,0              // movaps        0x1168(%rip),%xmm13        # 4ad0 <_sk_callback_sse2+0xd57>
+  .byte  68,15,40,45,193,17,0,0              // movaps        0x11c1(%rip),%xmm13        # 4c10 <_sk_callback_sse2+0xdb0>
   .byte  69,15,89,236                        // mulps         %xmm12,%xmm13
-  .byte  68,15,88,45,108,17,0,0              // addps         0x116c(%rip),%xmm13        # 4ae0 <_sk_callback_sse2+0xd67>
+  .byte  68,15,88,45,197,17,0,0              // addps         0x11c5(%rip),%xmm13        # 4c20 <_sk_callback_sse2+0xdc0>
   .byte  69,15,89,236                        // mulps         %xmm12,%xmm13
-  .byte  68,15,88,45,112,17,0,0              // addps         0x1170(%rip),%xmm13        # 4af0 <_sk_callback_sse2+0xd77>
+  .byte  68,15,88,45,201,17,0,0              // addps         0x11c9(%rip),%xmm13        # 4c30 <_sk_callback_sse2+0xdd0>
   .byte  69,15,89,236                        // mulps         %xmm12,%xmm13
-  .byte  68,15,88,45,116,17,0,0              // addps         0x1174(%rip),%xmm13        # 4b00 <_sk_callback_sse2+0xd87>
+  .byte  68,15,88,45,205,17,0,0              // addps         0x11cd(%rip),%xmm13        # 4c40 <_sk_callback_sse2+0xde0>
   .byte  69,15,89,235                        // mulps         %xmm11,%xmm13
   .byte  69,15,194,202,1                     // cmpltps       %xmm10,%xmm9
-  .byte  68,15,40,21,115,17,0,0              // movaps        0x1173(%rip),%xmm10        # 4b10 <_sk_callback_sse2+0xd97>
+  .byte  68,15,40,21,204,17,0,0              // movaps        0x11cc(%rip),%xmm10        # 4c50 <_sk_callback_sse2+0xdf0>
   .byte  69,15,92,213                        // subps         %xmm13,%xmm10
   .byte  69,15,84,209                        // andps         %xmm9,%xmm10
   .byte  69,15,85,205                        // andnps        %xmm13,%xmm9
   .byte  69,15,86,202                        // orps          %xmm10,%xmm9
   .byte  68,15,194,192,1                     // cmpltps       %xmm0,%xmm8
-  .byte  68,15,40,21,102,17,0,0              // movaps        0x1166(%rip),%xmm10        # 4b20 <_sk_callback_sse2+0xda7>
+  .byte  68,15,40,21,191,17,0,0              // movaps        0x11bf(%rip),%xmm10        # 4c60 <_sk_callback_sse2+0xe00>
   .byte  69,15,92,209                        // subps         %xmm9,%xmm10
   .byte  69,15,84,208                        // andps         %xmm8,%xmm10
   .byte  69,15,85,193                        // andnps        %xmm9,%xmm8
   .byte  69,15,86,194                        // orps          %xmm10,%xmm8
   .byte  68,15,40,201                        // movaps        %xmm1,%xmm9
   .byte  68,15,194,200,1                     // cmpltps       %xmm0,%xmm9
-  .byte  68,15,40,21,85,17,0,0               // movaps        0x1155(%rip),%xmm10        # 4b30 <_sk_callback_sse2+0xdb7>
+  .byte  68,15,40,21,174,17,0,0              // movaps        0x11ae(%rip),%xmm10        # 4c70 <_sk_callback_sse2+0xe10>
   .byte  69,15,92,208                        // subps         %xmm8,%xmm10
   .byte  69,15,84,209                        // andps         %xmm9,%xmm10
   .byte  69,15,85,200                        // andnps        %xmm8,%xmm9
@@ -27126,7 +27490,7 @@ HIDDEN _sk_save_xy_sse2
 FUNCTION(_sk_save_xy_sse2)
 _sk_save_xy_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,62,17,0,0                // movaps        0x113e(%rip),%xmm8        # 4b40 <_sk_callback_sse2+0xdc7>
+  .byte  68,15,40,5,151,17,0,0               // movaps        0x1197(%rip),%xmm8        # 4c80 <_sk_callback_sse2+0xe20>
   .byte  15,17,0                             // movups        %xmm0,(%rax)
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,88,200                        // addps         %xmm8,%xmm9
@@ -27134,7 +27498,7 @@ _sk_save_xy_sse2:
   .byte  69,15,91,210                        // cvtdq2ps      %xmm10,%xmm10
   .byte  69,15,40,217                        // movaps        %xmm9,%xmm11
   .byte  69,15,194,218,1                     // cmpltps       %xmm10,%xmm11
-  .byte  68,15,40,37,41,17,0,0               // movaps        0x1129(%rip),%xmm12        # 4b50 <_sk_callback_sse2+0xdd7>
+  .byte  68,15,40,37,130,17,0,0              // movaps        0x1182(%rip),%xmm12        # 4c90 <_sk_callback_sse2+0xe30>
   .byte  69,15,84,220                        // andps         %xmm12,%xmm11
   .byte  69,15,92,211                        // subps         %xmm11,%xmm10
   .byte  69,15,92,202                        // subps         %xmm10,%xmm9
@@ -27181,8 +27545,8 @@ _sk_bilinear_nx_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,162,16,0,0                  // addps         0x10a2(%rip),%xmm0        # 4b60 <_sk_callback_sse2+0xde7>
-  .byte  68,15,40,13,170,16,0,0              // movaps        0x10aa(%rip),%xmm9        # 4b70 <_sk_callback_sse2+0xdf7>
+  .byte  15,88,5,251,16,0,0                  // addps         0x10fb(%rip),%xmm0        # 4ca0 <_sk_callback_sse2+0xe40>
+  .byte  68,15,40,13,3,17,0,0                // movaps        0x1103(%rip),%xmm9        # 4cb0 <_sk_callback_sse2+0xe50>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,128,0,0,0              // movups        %xmm9,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -27195,7 +27559,7 @@ _sk_bilinear_px_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,153,16,0,0                  // addps         0x1099(%rip),%xmm0        # 4b80 <_sk_callback_sse2+0xe07>
+  .byte  15,88,5,242,16,0,0                  // addps         0x10f2(%rip),%xmm0        # 4cc0 <_sk_callback_sse2+0xe60>
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -27207,8 +27571,8 @@ _sk_bilinear_ny_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,139,16,0,0                 // addps         0x108b(%rip),%xmm1        # 4b90 <_sk_callback_sse2+0xe17>
-  .byte  68,15,40,13,147,16,0,0              // movaps        0x1093(%rip),%xmm9        # 4ba0 <_sk_callback_sse2+0xe27>
+  .byte  15,88,13,228,16,0,0                 // addps         0x10e4(%rip),%xmm1        # 4cd0 <_sk_callback_sse2+0xe70>
+  .byte  68,15,40,13,236,16,0,0              // movaps        0x10ec(%rip),%xmm9        # 4ce0 <_sk_callback_sse2+0xe80>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,160,0,0,0              // movups        %xmm9,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -27221,7 +27585,7 @@ _sk_bilinear_py_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,129,16,0,0                 // addps         0x1081(%rip),%xmm1        # 4bb0 <_sk_callback_sse2+0xe37>
+  .byte  15,88,13,218,16,0,0                 // addps         0x10da(%rip),%xmm1        # 4cf0 <_sk_callback_sse2+0xe90>
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -27233,13 +27597,13 @@ _sk_bicubic_n3x_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,116,16,0,0                  // addps         0x1074(%rip),%xmm0        # 4bc0 <_sk_callback_sse2+0xe47>
-  .byte  68,15,40,13,124,16,0,0              // movaps        0x107c(%rip),%xmm9        # 4bd0 <_sk_callback_sse2+0xe57>
+  .byte  15,88,5,205,16,0,0                  // addps         0x10cd(%rip),%xmm0        # 4d00 <_sk_callback_sse2+0xea0>
+  .byte  68,15,40,13,213,16,0,0              // movaps        0x10d5(%rip),%xmm9        # 4d10 <_sk_callback_sse2+0xeb0>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,120,16,0,0              // mulps         0x1078(%rip),%xmm9        # 4be0 <_sk_callback_sse2+0xe67>
-  .byte  68,15,88,13,128,16,0,0              // addps         0x1080(%rip),%xmm9        # 4bf0 <_sk_callback_sse2+0xe77>
+  .byte  68,15,89,13,209,16,0,0              // mulps         0x10d1(%rip),%xmm9        # 4d20 <_sk_callback_sse2+0xec0>
+  .byte  68,15,88,13,217,16,0,0              // addps         0x10d9(%rip),%xmm9        # 4d30 <_sk_callback_sse2+0xed0>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,128,0,0,0              // movups        %xmm9,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -27252,16 +27616,16 @@ _sk_bicubic_n1x_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,111,16,0,0                  // addps         0x106f(%rip),%xmm0        # 4c00 <_sk_callback_sse2+0xe87>
-  .byte  68,15,40,13,119,16,0,0              // movaps        0x1077(%rip),%xmm9        # 4c10 <_sk_callback_sse2+0xe97>
+  .byte  15,88,5,200,16,0,0                  // addps         0x10c8(%rip),%xmm0        # 4d40 <_sk_callback_sse2+0xee0>
+  .byte  68,15,40,13,208,16,0,0              // movaps        0x10d0(%rip),%xmm9        # 4d50 <_sk_callback_sse2+0xef0>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,123,16,0,0               // movaps        0x107b(%rip),%xmm8        # 4c20 <_sk_callback_sse2+0xea7>
+  .byte  68,15,40,5,212,16,0,0               // movaps        0x10d4(%rip),%xmm8        # 4d60 <_sk_callback_sse2+0xf00>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,127,16,0,0               // addps         0x107f(%rip),%xmm8        # 4c30 <_sk_callback_sse2+0xeb7>
+  .byte  68,15,88,5,216,16,0,0               // addps         0x10d8(%rip),%xmm8        # 4d70 <_sk_callback_sse2+0xf10>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,131,16,0,0               // addps         0x1083(%rip),%xmm8        # 4c40 <_sk_callback_sse2+0xec7>
+  .byte  68,15,88,5,220,16,0,0               // addps         0x10dc(%rip),%xmm8        # 4d80 <_sk_callback_sse2+0xf20>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,135,16,0,0               // addps         0x1087(%rip),%xmm8        # 4c50 <_sk_callback_sse2+0xed7>
+  .byte  68,15,88,5,224,16,0,0               // addps         0x10e0(%rip),%xmm8        # 4d90 <_sk_callback_sse2+0xf30>
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -27271,17 +27635,17 @@ HIDDEN _sk_bicubic_p1x_sse2
 FUNCTION(_sk_bicubic_p1x_sse2)
 _sk_bicubic_p1x_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,129,16,0,0               // movaps        0x1081(%rip),%xmm8        # 4c60 <_sk_callback_sse2+0xee7>
+  .byte  68,15,40,5,218,16,0,0               // movaps        0x10da(%rip),%xmm8        # 4da0 <_sk_callback_sse2+0xf40>
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,72,64                      // movups        0x40(%rax),%xmm9
   .byte  65,15,88,192                        // addps         %xmm8,%xmm0
-  .byte  68,15,40,21,125,16,0,0              // movaps        0x107d(%rip),%xmm10        # 4c70 <_sk_callback_sse2+0xef7>
+  .byte  68,15,40,21,214,16,0,0              // movaps        0x10d6(%rip),%xmm10        # 4db0 <_sk_callback_sse2+0xf50>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,129,16,0,0              // addps         0x1081(%rip),%xmm10        # 4c80 <_sk_callback_sse2+0xf07>
+  .byte  68,15,88,21,218,16,0,0              // addps         0x10da(%rip),%xmm10        # 4dc0 <_sk_callback_sse2+0xf60>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,125,16,0,0              // addps         0x107d(%rip),%xmm10        # 4c90 <_sk_callback_sse2+0xf17>
+  .byte  68,15,88,21,214,16,0,0              // addps         0x10d6(%rip),%xmm10        # 4dd0 <_sk_callback_sse2+0xf70>
   .byte  68,15,17,144,128,0,0,0              // movups        %xmm10,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -27293,11 +27657,11 @@ _sk_bicubic_p3x_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,112,16,0,0                  // addps         0x1070(%rip),%xmm0        # 4ca0 <_sk_callback_sse2+0xf27>
+  .byte  15,88,5,201,16,0,0                  // addps         0x10c9(%rip),%xmm0        # 4de0 <_sk_callback_sse2+0xf80>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,112,16,0,0               // mulps         0x1070(%rip),%xmm8        # 4cb0 <_sk_callback_sse2+0xf37>
-  .byte  68,15,88,5,120,16,0,0               // addps         0x1078(%rip),%xmm8        # 4cc0 <_sk_callback_sse2+0xf47>
+  .byte  68,15,89,5,201,16,0,0               // mulps         0x10c9(%rip),%xmm8        # 4df0 <_sk_callback_sse2+0xf90>
+  .byte  68,15,88,5,209,16,0,0               // addps         0x10d1(%rip),%xmm8        # 4e00 <_sk_callback_sse2+0xfa0>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -27310,13 +27674,13 @@ _sk_bicubic_n3y_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,102,16,0,0                 // addps         0x1066(%rip),%xmm1        # 4cd0 <_sk_callback_sse2+0xf57>
-  .byte  68,15,40,13,110,16,0,0              // movaps        0x106e(%rip),%xmm9        # 4ce0 <_sk_callback_sse2+0xf67>
+  .byte  15,88,13,191,16,0,0                 // addps         0x10bf(%rip),%xmm1        # 4e10 <_sk_callback_sse2+0xfb0>
+  .byte  68,15,40,13,199,16,0,0              // movaps        0x10c7(%rip),%xmm9        # 4e20 <_sk_callback_sse2+0xfc0>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,106,16,0,0              // mulps         0x106a(%rip),%xmm9        # 4cf0 <_sk_callback_sse2+0xf77>
-  .byte  68,15,88,13,114,16,0,0              // addps         0x1072(%rip),%xmm9        # 4d00 <_sk_callback_sse2+0xf87>
+  .byte  68,15,89,13,195,16,0,0              // mulps         0x10c3(%rip),%xmm9        # 4e30 <_sk_callback_sse2+0xfd0>
+  .byte  68,15,88,13,203,16,0,0              // addps         0x10cb(%rip),%xmm9        # 4e40 <_sk_callback_sse2+0xfe0>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,160,0,0,0              // movups        %xmm9,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -27329,16 +27693,16 @@ _sk_bicubic_n1y_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,96,16,0,0                  // addps         0x1060(%rip),%xmm1        # 4d10 <_sk_callback_sse2+0xf97>
-  .byte  68,15,40,13,104,16,0,0              // movaps        0x1068(%rip),%xmm9        # 4d20 <_sk_callback_sse2+0xfa7>
+  .byte  15,88,13,185,16,0,0                 // addps         0x10b9(%rip),%xmm1        # 4e50 <_sk_callback_sse2+0xff0>
+  .byte  68,15,40,13,193,16,0,0              // movaps        0x10c1(%rip),%xmm9        # 4e60 <_sk_callback_sse2+0x1000>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,108,16,0,0               // movaps        0x106c(%rip),%xmm8        # 4d30 <_sk_callback_sse2+0xfb7>
+  .byte  68,15,40,5,197,16,0,0               // movaps        0x10c5(%rip),%xmm8        # 4e70 <_sk_callback_sse2+0x1010>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,112,16,0,0               // addps         0x1070(%rip),%xmm8        # 4d40 <_sk_callback_sse2+0xfc7>
+  .byte  68,15,88,5,201,16,0,0               // addps         0x10c9(%rip),%xmm8        # 4e80 <_sk_callback_sse2+0x1020>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,116,16,0,0               // addps         0x1074(%rip),%xmm8        # 4d50 <_sk_callback_sse2+0xfd7>
+  .byte  68,15,88,5,205,16,0,0               // addps         0x10cd(%rip),%xmm8        # 4e90 <_sk_callback_sse2+0x1030>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,120,16,0,0               // addps         0x1078(%rip),%xmm8        # 4d60 <_sk_callback_sse2+0xfe7>
+  .byte  68,15,88,5,209,16,0,0               // addps         0x10d1(%rip),%xmm8        # 4ea0 <_sk_callback_sse2+0x1040>
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -27348,17 +27712,17 @@ HIDDEN _sk_bicubic_p1y_sse2
 FUNCTION(_sk_bicubic_p1y_sse2)
 _sk_bicubic_p1y_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,114,16,0,0               // movaps        0x1072(%rip),%xmm8        # 4d70 <_sk_callback_sse2+0xff7>
+  .byte  68,15,40,5,203,16,0,0               // movaps        0x10cb(%rip),%xmm8        # 4eb0 <_sk_callback_sse2+0x1050>
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,72,96                      // movups        0x60(%rax),%xmm9
   .byte  65,15,88,200                        // addps         %xmm8,%xmm1
-  .byte  68,15,40,21,109,16,0,0              // movaps        0x106d(%rip),%xmm10        # 4d80 <_sk_callback_sse2+0x1007>
+  .byte  68,15,40,21,198,16,0,0              // movaps        0x10c6(%rip),%xmm10        # 4ec0 <_sk_callback_sse2+0x1060>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,113,16,0,0              // addps         0x1071(%rip),%xmm10        # 4d90 <_sk_callback_sse2+0x1017>
+  .byte  68,15,88,21,202,16,0,0              // addps         0x10ca(%rip),%xmm10        # 4ed0 <_sk_callback_sse2+0x1070>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,109,16,0,0              // addps         0x106d(%rip),%xmm10        # 4da0 <_sk_callback_sse2+0x1027>
+  .byte  68,15,88,21,198,16,0,0              // addps         0x10c6(%rip),%xmm10        # 4ee0 <_sk_callback_sse2+0x1080>
   .byte  68,15,17,144,160,0,0,0              // movups        %xmm10,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -27370,11 +27734,11 @@ _sk_bicubic_p3y_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,95,16,0,0                  // addps         0x105f(%rip),%xmm1        # 4db0 <_sk_callback_sse2+0x1037>
+  .byte  15,88,13,184,16,0,0                 // addps         0x10b8(%rip),%xmm1        # 4ef0 <_sk_callback_sse2+0x1090>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,95,16,0,0                // mulps         0x105f(%rip),%xmm8        # 4dc0 <_sk_callback_sse2+0x1047>
-  .byte  68,15,88,5,103,16,0,0               // addps         0x1067(%rip),%xmm8        # 4dd0 <_sk_callback_sse2+0x1057>
+  .byte  68,15,89,5,184,16,0,0               // mulps         0x10b8(%rip),%xmm8        # 4f00 <_sk_callback_sse2+0x10a0>
+  .byte  68,15,88,5,192,16,0,0               // addps         0x10c0(%rip),%xmm8        # 4f10 <_sk_callback_sse2+0x10b0>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -27463,6 +27827,40 @@ BALIGN16
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
   .byte  63                                  // (bad)
   .byte  0,0                                 // add           %al,(%rax)
+  .byte  128,63,1                            // cmpb          $0x1,(%rdi)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,1                                 // add           %al,(%rcx)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,1                                 // add           %al,(%rcx)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,1                                 // add           %al,(%rcx)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,4,0                               // add           %al,(%rax,%rax,1)
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  4,0                                 // add           $0x0,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  4,0                                 // add           $0x0,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  4,0                                 // add           $0x0,%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  2,0                                 // add           (%rax),%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  2,0                                 // add           (%rax),%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  2,0                                 // add           (%rax),%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  2,0                                 // add           (%rax),%al
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  33,8                                // and           %ecx,(%rax)
+  .byte  130                                 // (bad)
+  .byte  60,33                               // cmp           $0x21,%al
+  .byte  8,130,60,33,8,130                   // or            %al,-0x7df7dec4(%rdx)
+  .byte  60,33                               // cmp           $0x21,%al
+  .byte  8,130,60,0,0,0                      // or            %al,0x3c(%rdx)
+  .byte  191,0,0,0,191                       // mov           $0xbf000000,%edi
+  .byte  0,0                                 // add           %al,(%rax)
+  .byte  0,191,0,0,0,191                     // add           %bh,-0x41000000(%rdi)
+  .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
   .byte  63                                  // (bad)
@@ -27553,17 +27951,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
-  .byte  63                                  // (bad)
+  .byte  191,0,0,128,191                     // mov           $0xbf800000,%edi
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,191,0,0,128,191,0               // cmpb          $0x0,-0x40800000(%rdi)
-  .byte  0,128,191,0,0,128                   // add           %al,-0x7fffff41(%rax)
-  .byte  191,0,0,224,64                      // mov           $0x40e00000,%edi
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,64                              // loopne        3fd8 <.literal16+0x188>
+  .byte  0,224                               // add           %ah,%al
+  .byte  64,0,0                              // add           %al,(%rax)
+  .byte  224,64                              // loopne        4118 <.literal16+0x1d8>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,64                              // loopne        3fdc <.literal16+0x18c>
+  .byte  224,64                              // loopne        411c <.literal16+0x1dc>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,64                              // loopne        3fe0 <.literal16+0x190>
+  .byte  224,64                              // loopne        4120 <.literal16+0x1e0>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -27787,13 +28184,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        41a9 <.literal16+0x359>
+  .byte  224,7                               // loopne        42e9 <.literal16+0x3a9>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        41ad <.literal16+0x35d>
+  .byte  224,7                               // loopne        42ed <.literal16+0x3ad>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        41b1 <.literal16+0x361>
+  .byte  224,7                               // loopne        42f1 <.literal16+0x3b1>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        41b5 <.literal16+0x365>
+  .byte  224,7                               // loopne        42f5 <.literal16+0x3b5>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -27862,11 +28259,11 @@ BALIGN16
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,127,67                            // add           %bh,0x43(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            429b <.literal16+0x44b>
+  .byte  127,67                              // jg            43db <.literal16+0x49b>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            429f <.literal16+0x44f>
+  .byte  127,67                              // jg            43df <.literal16+0x49f>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            42a3 <.literal16+0x453>
+  .byte  127,67                              // jg            43e3 <.literal16+0x4a3>
   .byte  129,128,128,59,129,128,128,59,129,128// addl          $0x80813b80,-0x7f7ec480(%rax)
   .byte  128,59,129                          // cmpb          $0x81,(%rbx)
   .byte  128,128,59,129,128,128,59           // addb          $0x3b,-0x7f7f7ec5(%rax)
@@ -27881,16 +28278,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4294 <.literal16+0x444>
+  .byte  127,0                               // jg            43d4 <.literal16+0x494>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4298 <.literal16+0x448>
+  .byte  127,0                               // jg            43d8 <.literal16+0x498>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            429c <.literal16+0x44c>
+  .byte  127,0                               // jg            43dc <.literal16+0x49c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            42a0 <.literal16+0x450>
+  .byte  127,0                               // jg            43e0 <.literal16+0x4a0>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -27899,7 +28296,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            4325 <.literal16+0x4d5>
+  .byte  119,115                             // ja            4465 <.literal16+0x525>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -27910,7 +28307,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           4289 <.literal16+0x439>
+  .byte  117,191                             // jne           43c9 <.literal16+0x489>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -27922,7 +28319,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a382ca <_sk_callback_sse2+0xffffffffe9a34551>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3840a <_sk_callback_sse2+0xffffffffe9a345aa>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
@@ -27976,16 +28373,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4364 <.literal16+0x514>
+  .byte  127,0                               // jg            44a4 <.literal16+0x564>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4368 <.literal16+0x518>
+  .byte  127,0                               // jg            44a8 <.literal16+0x568>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            436c <.literal16+0x51c>
+  .byte  127,0                               // jg            44ac <.literal16+0x56c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4370 <.literal16+0x520>
+  .byte  127,0                               // jg            44b0 <.literal16+0x570>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -27994,7 +28391,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            43f5 <.literal16+0x5a5>
+  .byte  119,115                             // ja            4535 <.literal16+0x5f5>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -28005,7 +28402,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           4359 <.literal16+0x509>
+  .byte  117,191                             // jne           4499 <.literal16+0x559>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -28017,7 +28414,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3839a <_sk_callback_sse2+0xffffffffe9a34621>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a384da <_sk_callback_sse2+0xffffffffe9a3467a>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
@@ -28071,16 +28468,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4434 <.literal16+0x5e4>
+  .byte  127,0                               // jg            4574 <.literal16+0x634>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4438 <.literal16+0x5e8>
+  .byte  127,0                               // jg            4578 <.literal16+0x638>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            443c <.literal16+0x5ec>
+  .byte  127,0                               // jg            457c <.literal16+0x63c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4440 <.literal16+0x5f0>
+  .byte  127,0                               // jg            4580 <.literal16+0x640>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -28089,7 +28486,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            44c5 <.literal16+0x675>
+  .byte  119,115                             // ja            4605 <.literal16+0x6c5>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -28100,7 +28497,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           4429 <.literal16+0x5d9>
+  .byte  117,191                             // jne           4569 <.literal16+0x629>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -28112,7 +28509,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3846a <_sk_callback_sse2+0xffffffffe9a346f1>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a385aa <_sk_callback_sse2+0xffffffffe9a3474a>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
@@ -28166,16 +28563,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4504 <.literal16+0x6b4>
+  .byte  127,0                               // jg            4644 <.literal16+0x704>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4508 <.literal16+0x6b8>
+  .byte  127,0                               // jg            4648 <.literal16+0x708>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            450c <.literal16+0x6bc>
+  .byte  127,0                               // jg            464c <.literal16+0x70c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4510 <.literal16+0x6c0>
+  .byte  127,0                               // jg            4650 <.literal16+0x710>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -28184,7 +28581,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            4595 <.literal16+0x745>
+  .byte  119,115                             // ja            46d5 <.literal16+0x795>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -28195,7 +28592,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           44f9 <.literal16+0x6a9>
+  .byte  117,191                             // jne           4639 <.literal16+0x6f9>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -28207,7 +28604,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3853a <_sk_callback_sse2+0xffffffffe9a347c1>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3867a <_sk_callback_sse2+0xffffffffe9a3481a>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
@@ -28257,13 +28654,13 @@ BALIGN16
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
-  .byte  127,67                              // jg            4617 <.literal16+0x7c7>
+  .byte  127,67                              // jg            4757 <.literal16+0x817>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            461b <.literal16+0x7cb>
+  .byte  127,67                              // jg            475b <.literal16+0x81b>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            461f <.literal16+0x7cf>
+  .byte  127,67                              // jg            475f <.literal16+0x81f>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            4623 <.literal16+0x7d3>
+  .byte  127,67                              // jg            4763 <.literal16+0x823>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,195                               // add           %al,%bl
   .byte  0,0                                 // add           %al,(%rax)
@@ -28310,16 +28707,16 @@ BALIGN16
   .byte  128,3,62                            // addb          $0x3e,(%rbx)
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           46a3 <.literal16+0x853>
+  .byte  118,63                              // jbe           47e3 <.literal16+0x8a3>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           46a7 <.literal16+0x857>
+  .byte  118,63                              // jbe           47e7 <.literal16+0x8a7>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           46ab <.literal16+0x85b>
+  .byte  118,63                              // jbe           47eb <.literal16+0x8ab>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           46af <.literal16+0x85f>
+  .byte  118,63                              // jbe           47ef <.literal16+0x8af>
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
@@ -28331,11 +28728,11 @@ BALIGN16
   .byte  128,59,0                            // cmpb          $0x0,(%rbx)
   .byte  0,127,67                            // add           %bh,0x43(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            46eb <.literal16+0x89b>
+  .byte  127,67                              // jg            482b <.literal16+0x8eb>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            46ef <.literal16+0x89f>
+  .byte  127,67                              // jg            482f <.literal16+0x8ef>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            46f3 <.literal16+0x8a3>
+  .byte  127,67                              // jg            4833 <.literal16+0x8f3>
   .byte  129,128,128,59,129,128,128,59,129,128// addl          $0x80813b80,-0x7f7ec480(%rax)
   .byte  128,59,129                          // cmpb          $0x81,(%rbx)
   .byte  128,128,59,0,0,128,63               // addb          $0x3f,-0x7fffffc5(%rax)
@@ -28375,13 +28772,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        4739 <.literal16+0x8e9>
+  .byte  224,7                               // loopne        4879 <.literal16+0x939>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        473d <.literal16+0x8ed>
+  .byte  224,7                               // loopne        487d <.literal16+0x93d>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        4741 <.literal16+0x8f1>
+  .byte  224,7                               // loopne        4881 <.literal16+0x941>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        4745 <.literal16+0x8f5>
+  .byte  224,7                               // loopne        4885 <.literal16+0x945>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -28427,13 +28824,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        47a9 <.literal16+0x959>
+  .byte  224,7                               // loopne        48e9 <.literal16+0x9a9>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        47ad <.literal16+0x95d>
+  .byte  224,7                               // loopne        48ed <.literal16+0x9ad>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        47b1 <.literal16+0x961>
+  .byte  224,7                               // loopne        48f1 <.literal16+0x9b1>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        47b5 <.literal16+0x965>
+  .byte  224,7                               // loopne        48f5 <.literal16+0x9b5>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -28471,13 +28868,13 @@ BALIGN16
   .byte  65,0,0                              // add           %al,(%r8)
   .byte  248                                 // clc
   .byte  65,0,0                              // add           %al,(%r8)
-  .byte  124,66                              // jl            4846 <.literal16+0x9f6>
+  .byte  124,66                              // jl            4986 <.literal16+0xa46>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            484a <.literal16+0x9fa>
+  .byte  124,66                              // jl            498a <.literal16+0xa4a>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            484e <.literal16+0x9fe>
+  .byte  124,66                              // jl            498e <.literal16+0xa4e>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            4852 <.literal16+0xa02>
+  .byte  124,66                              // jl            4992 <.literal16+0xa52>
   .byte  0,240                               // add           %dh,%al
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,240                               // add           %dh,%al
@@ -28567,13 +28964,13 @@ BALIGN16
   .byte  136,136,61,137,136,136              // mov           %cl,-0x777776c3(%rax)
   .byte  61,137,136,136,61                   // cmp           $0x3d888889,%eax
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            4955 <.literal16+0xb05>
+  .byte  112,65                              // jo            4a95 <.literal16+0xb55>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            4959 <.literal16+0xb09>
+  .byte  112,65                              // jo            4a99 <.literal16+0xb59>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            495d <.literal16+0xb0d>
+  .byte  112,65                              // jo            4a9d <.literal16+0xb5d>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            4961 <.literal16+0xb11>
+  .byte  112,65                              // jo            4aa1 <.literal16+0xb61>
   .byte  255,0                               // incl          (%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  255,0                               // incl          (%rax)
@@ -28595,11 +28992,11 @@ BALIGN16
   .byte  128,59,129                          // cmpb          $0x81,(%rbx)
   .byte  128,128,59,0,0,127,67               // addb          $0x43,0x7f00003b(%rax)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            49ab <.literal16+0xb5b>
+  .byte  127,67                              // jg            4aeb <.literal16+0xbab>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            49af <.literal16+0xb5f>
+  .byte  127,67                              // jg            4aef <.literal16+0xbaf>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            49b3 <.literal16+0xb63>
+  .byte  127,67                              // jg            4af3 <.literal16+0xbb3>
   .byte  0,128,0,0,0,128                     // add           %al,-0x80000000(%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,128,0,0,0,128                     // add           %al,-0x80000000(%rax)
@@ -28675,13 +29072,13 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  255                                 // (bad)
-  .byte  127,71                              // jg            4a9b <.literal16+0xc4b>
+  .byte  127,71                              // jg            4bdb <.literal16+0xc9b>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            4a9f <.literal16+0xc4f>
+  .byte  127,71                              // jg            4bdf <.literal16+0xc9f>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            4aa3 <.literal16+0xc53>
+  .byte  127,71                              // jg            4be3 <.literal16+0xca3>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            4aa7 <.literal16+0xc57>
+  .byte  127,71                              // jg            4be7 <.literal16+0xca7>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -28834,11 +29231,11 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,114                          // cmpb          $0x72,(%rdi)
   .byte  28,199                              // sbb           $0xc7,%al
-  .byte  62,114,28                           // jb,pt         4c02 <.literal16+0xdb2>
+  .byte  62,114,28                           // jb,pt         4d42 <.literal16+0xe02>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4c06 <.literal16+0xdb6>
+  .byte  62,114,28                           // jb,pt         4d46 <.literal16+0xe06>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4c0a <.literal16+0xdba>
+  .byte  62,114,28                           // jb,pt         4d4a <.literal16+0xe0a>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -28882,7 +29279,7 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63da95 <_sk_callback_sse2+0x3d639d1c>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63dbd5 <_sk_callback_sse2+0x3d639d75>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -28908,7 +29305,7 @@ BALIGN16
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63dad5 <_sk_callback_sse2+0x3d639d5c>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63dc15 <_sk_callback_sse2+0x3d639db5>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
@@ -28917,13 +29314,13 @@ BALIGN16
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
-  .byte  114,28                              // jb            4cce <.literal16+0xe7e>
+  .byte  114,28                              // jb            4e0e <.literal16+0xece>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4cd2 <.literal16+0xe82>
+  .byte  62,114,28                           // jb,pt         4e12 <.literal16+0xed2>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4cd6 <.literal16+0xe86>
+  .byte  62,114,28                           // jb,pt         4e16 <.literal16+0xed6>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4cda <.literal16+0xe8a>
+  .byte  62,114,28                           // jb,pt         4e1a <.literal16+0xeda>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -28944,11 +29341,11 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,114                          // cmpb          $0x72,(%rdi)
   .byte  28,199                              // sbb           $0xc7,%al
-  .byte  62,114,28                           // jb,pt         4d12 <.literal16+0xec2>
+  .byte  62,114,28                           // jb,pt         4e52 <.literal16+0xf12>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4d16 <.literal16+0xec6>
+  .byte  62,114,28                           // jb,pt         4e56 <.literal16+0xf16>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4d1a <.literal16+0xeca>
+  .byte  62,114,28                           // jb,pt         4e5a <.literal16+0xf1a>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -28992,7 +29389,7 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63dba5 <_sk_callback_sse2+0x3d639e2c>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63dce5 <_sk_callback_sse2+0x3d639e85>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -29018,7 +29415,7 @@ BALIGN16
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63dbe5 <_sk_callback_sse2+0x3d639e6c>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63dd25 <_sk_callback_sse2+0x3d639ec5>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
@@ -29027,13 +29424,13 @@ BALIGN16
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
-  .byte  114,28                              // jb            4dde <.literal16+0xf8e>
+  .byte  114,28                              // jb            4f1e <.literal16+0xfde>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4de2 <_sk_callback_sse2+0x1069>
+  .byte  62,114,28                           // jb,pt         4f22 <_sk_callback_sse2+0x10c2>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4de6 <_sk_callback_sse2+0x106d>
+  .byte  62,114,28                           // jb,pt         4f26 <_sk_callback_sse2+0x10c6>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         4dea <_sk_callback_sse2+0x1071>
+  .byte  62,114,28                           // jb,pt         4f2a <_sk_callback_sse2+0x10ca>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
index ec73eb065d95b16b05492d2ba32e78eb5685d435..378412e0791dd72ab5eb293a232c0300021b302e 100644 (file)
@@ -106,14 +106,14 @@ _sk_seed_shader_hsw LABEL PROC
   DB  197,249,110,199                     ; vmovd         %edi,%xmm0
   DB  196,226,125,88,192                  ; vpbroadcastd  %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,222,60,0,0        ; vbroadcastss  0x3cde(%rip),%ymm1        # 3e38 <_sk_callback_hsw+0x11b>
+  DB  196,226,125,24,13,178,61,0,0        ; vbroadcastss  0x3db2(%rip),%ymm1        # 3f0c <_sk_callback_hsw+0x11b>
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
   DB  197,252,88,2                        ; vaddps        (%rdx),%ymm0,%ymm0
   DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,21,194,60,0,0        ; vbroadcastss  0x3cc2(%rip),%ymm2        # 3e3c <_sk_callback_hsw+0x11f>
+  DB  196,226,125,24,21,150,61,0,0        ; vbroadcastss  0x3d96(%rip),%ymm2        # 3f10 <_sk_callback_hsw+0x11f>
   DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
   DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
   DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
@@ -121,6 +121,50 @@ _sk_seed_shader_hsw LABEL PROC
   DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
   DB  255,224                             ; jmpq          *%rax
 
+PUBLIC _sk_dither_hsw
+_sk_dither_hsw LABEL PROC
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  197,121,110,199                     ; vmovd         %edi,%xmm8
+  DB  196,66,125,88,192                   ; vpbroadcastd  %xmm8,%ymm8
+  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
+  DB  197,60,88,2                         ; vaddps        (%rdx),%ymm8,%ymm8
+  DB  196,65,126,91,192                   ; vcvttps2dq    %ymm8,%ymm8
+  DB  76,139,0                            ; mov           (%rax),%r8
+  DB  196,66,125,88,8                     ; vpbroadcastd  (%r8),%ymm9
+  DB  196,65,61,239,201                   ; vpxor         %ymm9,%ymm8,%ymm9
+  DB  196,98,125,88,21,85,61,0,0          ; vpbroadcastd  0x3d55(%rip),%ymm10        # 3f14 <_sk_callback_hsw+0x123>
+  DB  196,65,53,219,218                   ; vpand         %ymm10,%ymm9,%ymm11
+  DB  196,193,37,114,243,5                ; vpslld        $0x5,%ymm11,%ymm11
+  DB  196,65,61,219,210                   ; vpand         %ymm10,%ymm8,%ymm10
+  DB  196,193,45,114,242,4                ; vpslld        $0x4,%ymm10,%ymm10
+  DB  196,98,125,88,37,58,61,0,0          ; vpbroadcastd  0x3d3a(%rip),%ymm12        # 3f18 <_sk_callback_hsw+0x127>
+  DB  196,98,125,88,45,53,61,0,0          ; vpbroadcastd  0x3d35(%rip),%ymm13        # 3f1c <_sk_callback_hsw+0x12b>
+  DB  196,65,53,219,245                   ; vpand         %ymm13,%ymm9,%ymm14
+  DB  196,193,13,114,246,2                ; vpslld        $0x2,%ymm14,%ymm14
+  DB  196,65,61,219,237                   ; vpand         %ymm13,%ymm8,%ymm13
+  DB  196,65,21,254,237                   ; vpaddd        %ymm13,%ymm13,%ymm13
+  DB  196,65,53,219,204                   ; vpand         %ymm12,%ymm9,%ymm9
+  DB  196,193,53,114,209,1                ; vpsrld        $0x1,%ymm9,%ymm9
+  DB  196,65,61,219,196                   ; vpand         %ymm12,%ymm8,%ymm8
+  DB  196,193,61,114,208,2                ; vpsrld        $0x2,%ymm8,%ymm8
+  DB  196,65,21,235,210                   ; vpor          %ymm10,%ymm13,%ymm10
+  DB  196,65,45,235,192                   ; vpor          %ymm8,%ymm10,%ymm8
+  DB  196,65,37,235,214                   ; vpor          %ymm14,%ymm11,%ymm10
+  DB  196,65,61,235,194                   ; vpor          %ymm10,%ymm8,%ymm8
+  DB  196,65,61,235,193                   ; vpor          %ymm9,%ymm8,%ymm8
+  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
+  DB  196,98,125,24,13,231,60,0,0         ; vbroadcastss  0x3ce7(%rip),%ymm9        # 3f20 <_sk_callback_hsw+0x12f>
+  DB  196,98,125,24,21,226,60,0,0         ; vbroadcastss  0x3ce2(%rip),%ymm10        # 3f24 <_sk_callback_hsw+0x133>
+  DB  196,66,61,184,209                   ; vfmadd231ps   %ymm9,%ymm8,%ymm10
+  DB  196,98,125,24,64,8                  ; vbroadcastss  0x8(%rax),%ymm8
+  DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
+  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
+  DB  197,188,88,192                      ; vaddps        %ymm0,%ymm8,%ymm0
+  DB  197,188,88,201                      ; vaddps        %ymm1,%ymm8,%ymm1
+  DB  197,188,88,210                      ; vaddps        %ymm2,%ymm8,%ymm2
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  255,224                             ; jmpq          *%rax
+
 PUBLIC _sk_constant_color_hsw
 _sk_constant_color_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -143,7 +187,7 @@ _sk_clear_hsw LABEL PROC
 PUBLIC _sk_srcatop_hsw
 _sk_srcatop_hsw LABEL PROC
   DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
-  DB  196,98,125,24,5,114,60,0,0          ; vbroadcastss  0x3c72(%rip),%ymm8        # 3e40 <_sk_callback_hsw+0x123>
+  DB  196,98,125,24,5,132,60,0,0          ; vbroadcastss  0x3c84(%rip),%ymm8        # 3f28 <_sk_callback_hsw+0x137>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,226,61,184,196                  ; vfmadd231ps   %ymm4,%ymm8,%ymm0
   DB  197,244,89,207                      ; vmulps        %ymm7,%ymm1,%ymm1
@@ -157,7 +201,7 @@ _sk_srcatop_hsw LABEL PROC
 
 PUBLIC _sk_dstatop_hsw
 _sk_dstatop_hsw LABEL PROC
-  DB  196,98,125,24,5,69,60,0,0           ; vbroadcastss  0x3c45(%rip),%ymm8        # 3e44 <_sk_callback_hsw+0x127>
+  DB  196,98,125,24,5,87,60,0,0           ; vbroadcastss  0x3c57(%rip),%ymm8        # 3f2c <_sk_callback_hsw+0x13b>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  196,226,101,184,196                 ; vfmadd231ps   %ymm4,%ymm3,%ymm0
@@ -190,7 +234,7 @@ _sk_dstin_hsw LABEL PROC
 
 PUBLIC _sk_srcout_hsw
 _sk_srcout_hsw LABEL PROC
-  DB  196,98,125,24,5,236,59,0,0          ; vbroadcastss  0x3bec(%rip),%ymm8        # 3e48 <_sk_callback_hsw+0x12b>
+  DB  196,98,125,24,5,254,59,0,0          ; vbroadcastss  0x3bfe(%rip),%ymm8        # 3f30 <_sk_callback_hsw+0x13f>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -201,7 +245,7 @@ _sk_srcout_hsw LABEL PROC
 
 PUBLIC _sk_dstout_hsw
 _sk_dstout_hsw LABEL PROC
-  DB  196,226,125,24,5,207,59,0,0         ; vbroadcastss  0x3bcf(%rip),%ymm0        # 3e4c <_sk_callback_hsw+0x12f>
+  DB  196,226,125,24,5,225,59,0,0         ; vbroadcastss  0x3be1(%rip),%ymm0        # 3f34 <_sk_callback_hsw+0x143>
   DB  197,252,92,219                      ; vsubps        %ymm3,%ymm0,%ymm3
   DB  197,228,89,196                      ; vmulps        %ymm4,%ymm3,%ymm0
   DB  197,228,89,205                      ; vmulps        %ymm5,%ymm3,%ymm1
@@ -212,7 +256,7 @@ _sk_dstout_hsw LABEL PROC
 
 PUBLIC _sk_srcover_hsw
 _sk_srcover_hsw LABEL PROC
-  DB  196,98,125,24,5,178,59,0,0          ; vbroadcastss  0x3bb2(%rip),%ymm8        # 3e50 <_sk_callback_hsw+0x133>
+  DB  196,98,125,24,5,196,59,0,0          ; vbroadcastss  0x3bc4(%rip),%ymm8        # 3f38 <_sk_callback_hsw+0x147>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,194,93,184,192                  ; vfmadd231ps   %ymm8,%ymm4,%ymm0
   DB  196,194,85,184,200                  ; vfmadd231ps   %ymm8,%ymm5,%ymm1
@@ -223,7 +267,7 @@ _sk_srcover_hsw LABEL PROC
 
 PUBLIC _sk_dstover_hsw
 _sk_dstover_hsw LABEL PROC
-  DB  196,98,125,24,5,145,59,0,0          ; vbroadcastss  0x3b91(%rip),%ymm8        # 3e54 <_sk_callback_hsw+0x137>
+  DB  196,98,125,24,5,163,59,0,0          ; vbroadcastss  0x3ba3(%rip),%ymm8        # 3f3c <_sk_callback_hsw+0x14b>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
   DB  196,226,61,168,205                  ; vfmadd213ps   %ymm5,%ymm8,%ymm1
@@ -243,7 +287,7 @@ _sk_modulate_hsw LABEL PROC
 
 PUBLIC _sk_multiply_hsw
 _sk_multiply_hsw LABEL PROC
-  DB  196,98,125,24,5,92,59,0,0           ; vbroadcastss  0x3b5c(%rip),%ymm8        # 3e58 <_sk_callback_hsw+0x13b>
+  DB  196,98,125,24,5,110,59,0,0          ; vbroadcastss  0x3b6e(%rip),%ymm8        # 3f40 <_sk_callback_hsw+0x14f>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,208                       ; vmulps        %ymm0,%ymm9,%ymm10
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -285,7 +329,7 @@ _sk_screen_hsw LABEL PROC
 
 PUBLIC _sk_xor__hsw
 _sk_xor__hsw LABEL PROC
-  DB  196,98,125,24,5,215,58,0,0          ; vbroadcastss  0x3ad7(%rip),%ymm8        # 3e5c <_sk_callback_hsw+0x13f>
+  DB  196,98,125,24,5,233,58,0,0          ; vbroadcastss  0x3ae9(%rip),%ymm8        # 3f44 <_sk_callback_hsw+0x153>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -317,7 +361,7 @@ _sk_darken_hsw LABEL PROC
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,95,209                  ; vmaxps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,95,58,0,0           ; vbroadcastss  0x3a5f(%rip),%ymm8        # 3e60 <_sk_callback_hsw+0x143>
+  DB  196,98,125,24,5,113,58,0,0          ; vbroadcastss  0x3a71(%rip),%ymm8        # 3f48 <_sk_callback_hsw+0x157>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -340,7 +384,7 @@ _sk_lighten_hsw LABEL PROC
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,14,58,0,0           ; vbroadcastss  0x3a0e(%rip),%ymm8        # 3e64 <_sk_callback_hsw+0x147>
+  DB  196,98,125,24,5,32,58,0,0           ; vbroadcastss  0x3a20(%rip),%ymm8        # 3f4c <_sk_callback_hsw+0x15b>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -366,7 +410,7 @@ _sk_difference_hsw LABEL PROC
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,177,57,0,0          ; vbroadcastss  0x39b1(%rip),%ymm8        # 3e68 <_sk_callback_hsw+0x14b>
+  DB  196,98,125,24,5,195,57,0,0          ; vbroadcastss  0x39c3(%rip),%ymm8        # 3f50 <_sk_callback_hsw+0x15f>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -386,7 +430,7 @@ _sk_exclusion_hsw LABEL PROC
   DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,111,57,0,0          ; vbroadcastss  0x396f(%rip),%ymm8        # 3e6c <_sk_callback_hsw+0x14f>
+  DB  196,98,125,24,5,129,57,0,0          ; vbroadcastss  0x3981(%rip),%ymm8        # 3f54 <_sk_callback_hsw+0x163>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -394,7 +438,7 @@ _sk_exclusion_hsw LABEL PROC
 
 PUBLIC _sk_colorburn_hsw
 _sk_colorburn_hsw LABEL PROC
-  DB  196,98,125,24,5,93,57,0,0           ; vbroadcastss  0x395d(%rip),%ymm8        # 3e70 <_sk_callback_hsw+0x153>
+  DB  196,98,125,24,5,111,57,0,0          ; vbroadcastss  0x396f(%rip),%ymm8        # 3f58 <_sk_callback_hsw+0x167>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,216                       ; vmulps        %ymm0,%ymm9,%ymm11
   DB  196,65,44,87,210                    ; vxorps        %ymm10,%ymm10,%ymm10
@@ -450,7 +494,7 @@ _sk_colorburn_hsw LABEL PROC
 PUBLIC _sk_colordodge_hsw
 _sk_colordodge_hsw LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
-  DB  196,98,125,24,13,104,56,0,0         ; vbroadcastss  0x3868(%rip),%ymm9        # 3e74 <_sk_callback_hsw+0x157>
+  DB  196,98,125,24,13,122,56,0,0         ; vbroadcastss  0x387a(%rip),%ymm9        # 3f5c <_sk_callback_hsw+0x16b>
   DB  197,52,92,215                       ; vsubps        %ymm7,%ymm9,%ymm10
   DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
   DB  197,52,92,203                       ; vsubps        %ymm3,%ymm9,%ymm9
@@ -501,7 +545,7 @@ _sk_colordodge_hsw LABEL PROC
 
 PUBLIC _sk_hardlight_hsw
 _sk_hardlight_hsw LABEL PROC
-  DB  196,98,125,24,5,137,55,0,0          ; vbroadcastss  0x3789(%rip),%ymm8        # 3e78 <_sk_callback_hsw+0x15b>
+  DB  196,98,125,24,5,155,55,0,0          ; vbroadcastss  0x379b(%rip),%ymm8        # 3f60 <_sk_callback_hsw+0x16f>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -550,7 +594,7 @@ _sk_hardlight_hsw LABEL PROC
 
 PUBLIC _sk_overlay_hsw
 _sk_overlay_hsw LABEL PROC
-  DB  196,98,125,24,5,193,54,0,0          ; vbroadcastss  0x36c1(%rip),%ymm8        # 3e7c <_sk_callback_hsw+0x15f>
+  DB  196,98,125,24,5,211,54,0,0          ; vbroadcastss  0x36d3(%rip),%ymm8        # 3f64 <_sk_callback_hsw+0x173>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -610,10 +654,10 @@ _sk_softlight_hsw LABEL PROC
   DB  196,65,20,88,197                    ; vaddps        %ymm13,%ymm13,%ymm8
   DB  196,65,60,88,192                    ; vaddps        %ymm8,%ymm8,%ymm8
   DB  196,66,61,168,192                   ; vfmadd213ps   %ymm8,%ymm8,%ymm8
-  DB  196,98,125,24,29,200,53,0,0         ; vbroadcastss  0x35c8(%rip),%ymm11        # 3e84 <_sk_callback_hsw+0x167>
+  DB  196,98,125,24,29,218,53,0,0         ; vbroadcastss  0x35da(%rip),%ymm11        # 3f6c <_sk_callback_hsw+0x17b>
   DB  196,65,20,88,227                    ; vaddps        %ymm11,%ymm13,%ymm12
   DB  196,65,28,89,192                    ; vmulps        %ymm8,%ymm12,%ymm8
-  DB  196,98,125,24,37,185,53,0,0         ; vbroadcastss  0x35b9(%rip),%ymm12        # 3e88 <_sk_callback_hsw+0x16b>
+  DB  196,98,125,24,37,203,53,0,0         ; vbroadcastss  0x35cb(%rip),%ymm12        # 3f70 <_sk_callback_hsw+0x17f>
   DB  196,66,21,184,196                   ; vfmadd231ps   %ymm12,%ymm13,%ymm8
   DB  196,65,124,82,245                   ; vrsqrtps      %ymm13,%ymm14
   DB  196,65,124,83,246                   ; vrcpps        %ymm14,%ymm14
@@ -623,7 +667,7 @@ _sk_softlight_hsw LABEL PROC
   DB  197,4,194,255,2                     ; vcmpleps      %ymm7,%ymm15,%ymm15
   DB  196,67,13,74,240,240                ; vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   DB  197,116,88,249                      ; vaddps        %ymm1,%ymm1,%ymm15
-  DB  196,98,125,24,5,124,53,0,0          ; vbroadcastss  0x357c(%rip),%ymm8        # 3e80 <_sk_callback_hsw+0x163>
+  DB  196,98,125,24,5,142,53,0,0          ; vbroadcastss  0x358e(%rip),%ymm8        # 3f68 <_sk_callback_hsw+0x177>
   DB  196,65,60,92,237                    ; vsubps        %ymm13,%ymm8,%ymm13
   DB  197,132,92,195                      ; vsubps        %ymm3,%ymm15,%ymm0
   DB  196,98,125,168,235                  ; vfmadd213ps   %ymm3,%ymm0,%ymm13
@@ -713,7 +757,7 @@ _sk_clamp_0_hsw LABEL PROC
 
 PUBLIC _sk_clamp_1_hsw
 _sk_clamp_1_hsw LABEL PROC
-  DB  196,98,125,24,5,255,51,0,0          ; vbroadcastss  0x33ff(%rip),%ymm8        # 3e8c <_sk_callback_hsw+0x16f>
+  DB  196,98,125,24,5,17,52,0,0           ; vbroadcastss  0x3411(%rip),%ymm8        # 3f74 <_sk_callback_hsw+0x183>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
@@ -723,7 +767,7 @@ _sk_clamp_1_hsw LABEL PROC
 
 PUBLIC _sk_clamp_a_hsw
 _sk_clamp_a_hsw LABEL PROC
-  DB  196,98,125,24,5,226,51,0,0          ; vbroadcastss  0x33e2(%rip),%ymm8        # 3e90 <_sk_callback_hsw+0x173>
+  DB  196,98,125,24,5,244,51,0,0          ; vbroadcastss  0x33f4(%rip),%ymm8        # 3f78 <_sk_callback_hsw+0x187>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
   DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
@@ -795,7 +839,7 @@ PUBLIC _sk_unpremul_hsw
 _sk_unpremul_hsw LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
-  DB  196,98,125,24,21,42,51,0,0          ; vbroadcastss  0x332a(%rip),%ymm10        # 3e94 <_sk_callback_hsw+0x177>
+  DB  196,98,125,24,21,60,51,0,0          ; vbroadcastss  0x333c(%rip),%ymm10        # 3f7c <_sk_callback_hsw+0x18b>
   DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
   DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
@@ -806,16 +850,16 @@ _sk_unpremul_hsw LABEL PROC
 
 PUBLIC _sk_from_srgb_hsw
 _sk_from_srgb_hsw LABEL PROC
-  DB  196,98,125,24,5,11,51,0,0           ; vbroadcastss  0x330b(%rip),%ymm8        # 3e98 <_sk_callback_hsw+0x17b>
+  DB  196,98,125,24,5,29,51,0,0           ; vbroadcastss  0x331d(%rip),%ymm8        # 3f80 <_sk_callback_hsw+0x18f>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
-  DB  196,98,125,24,29,253,50,0,0         ; vbroadcastss  0x32fd(%rip),%ymm11        # 3e9c <_sk_callback_hsw+0x17f>
-  DB  196,98,125,24,37,248,50,0,0         ; vbroadcastss  0x32f8(%rip),%ymm12        # 3ea0 <_sk_callback_hsw+0x183>
+  DB  196,98,125,24,29,15,51,0,0          ; vbroadcastss  0x330f(%rip),%ymm11        # 3f84 <_sk_callback_hsw+0x193>
+  DB  196,98,125,24,37,10,51,0,0          ; vbroadcastss  0x330a(%rip),%ymm12        # 3f88 <_sk_callback_hsw+0x197>
   DB  196,65,124,40,236                   ; vmovaps       %ymm12,%ymm13
   DB  196,66,125,168,235                  ; vfmadd213ps   %ymm11,%ymm0,%ymm13
-  DB  196,98,125,24,53,233,50,0,0         ; vbroadcastss  0x32e9(%rip),%ymm14        # 3ea4 <_sk_callback_hsw+0x187>
+  DB  196,98,125,24,53,251,50,0,0         ; vbroadcastss  0x32fb(%rip),%ymm14        # 3f8c <_sk_callback_hsw+0x19b>
   DB  196,66,45,168,238                   ; vfmadd213ps   %ymm14,%ymm10,%ymm13
-  DB  196,98,125,24,21,223,50,0,0         ; vbroadcastss  0x32df(%rip),%ymm10        # 3ea8 <_sk_callback_hsw+0x18b>
+  DB  196,98,125,24,21,241,50,0,0         ; vbroadcastss  0x32f1(%rip),%ymm10        # 3f90 <_sk_callback_hsw+0x19f>
   DB  196,193,124,194,194,1               ; vcmpltps      %ymm10,%ymm0,%ymm0
   DB  196,195,21,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm13,%ymm0
   DB  196,65,116,89,200                   ; vmulps        %ymm8,%ymm1,%ymm9
@@ -839,16 +883,16 @@ _sk_to_srgb_hsw LABEL PROC
   DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
   DB  196,65,124,83,200                   ; vrcpps        %ymm8,%ymm9
   DB  196,65,124,82,208                   ; vrsqrtps      %ymm8,%ymm10
-  DB  196,98,125,24,5,121,50,0,0          ; vbroadcastss  0x3279(%rip),%ymm8        # 3eac <_sk_callback_hsw+0x18f>
+  DB  196,98,125,24,5,139,50,0,0          ; vbroadcastss  0x328b(%rip),%ymm8        # 3f94 <_sk_callback_hsw+0x1a3>
   DB  196,65,124,89,216                   ; vmulps        %ymm8,%ymm0,%ymm11
-  DB  196,98,125,24,37,111,50,0,0         ; vbroadcastss  0x326f(%rip),%ymm12        # 3eb0 <_sk_callback_hsw+0x193>
-  DB  196,98,125,24,45,106,50,0,0         ; vbroadcastss  0x326a(%rip),%ymm13        # 3eb4 <_sk_callback_hsw+0x197>
+  DB  196,98,125,24,37,129,50,0,0         ; vbroadcastss  0x3281(%rip),%ymm12        # 3f98 <_sk_callback_hsw+0x1a7>
+  DB  196,98,125,24,45,124,50,0,0         ; vbroadcastss  0x327c(%rip),%ymm13        # 3f9c <_sk_callback_hsw+0x1ab>
   DB  196,66,21,168,204                   ; vfmadd213ps   %ymm12,%ymm13,%ymm9
-  DB  196,98,125,24,53,96,50,0,0          ; vbroadcastss  0x3260(%rip),%ymm14        # 3eb8 <_sk_callback_hsw+0x19b>
+  DB  196,98,125,24,53,114,50,0,0         ; vbroadcastss  0x3272(%rip),%ymm14        # 3fa0 <_sk_callback_hsw+0x1af>
   DB  196,66,13,184,202                   ; vfmadd231ps   %ymm10,%ymm14,%ymm9
-  DB  196,98,125,24,21,86,50,0,0          ; vbroadcastss  0x3256(%rip),%ymm10        # 3ebc <_sk_callback_hsw+0x19f>
+  DB  196,98,125,24,21,104,50,0,0         ; vbroadcastss  0x3268(%rip),%ymm10        # 3fa4 <_sk_callback_hsw+0x1b3>
   DB  196,65,44,93,201                    ; vminps        %ymm9,%ymm10,%ymm9
-  DB  196,98,125,24,61,76,50,0,0          ; vbroadcastss  0x324c(%rip),%ymm15        # 3ec0 <_sk_callback_hsw+0x1a3>
+  DB  196,98,125,24,61,94,50,0,0          ; vbroadcastss  0x325e(%rip),%ymm15        # 3fa8 <_sk_callback_hsw+0x1b7>
   DB  196,193,124,194,199,1               ; vcmpltps      %ymm15,%ymm0,%ymm0
   DB  196,195,53,74,195,0                 ; vblendvps     %ymm0,%ymm11,%ymm9,%ymm0
   DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
@@ -879,26 +923,26 @@ _sk_rgb_to_hsl_hsw LABEL PROC
   DB  197,124,93,201                      ; vminps        %ymm1,%ymm0,%ymm9
   DB  197,52,93,202                       ; vminps        %ymm2,%ymm9,%ymm9
   DB  196,65,60,92,209                    ; vsubps        %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,29,198,49,0,0         ; vbroadcastss  0x31c6(%rip),%ymm11        # 3ec4 <_sk_callback_hsw+0x1a7>
+  DB  196,98,125,24,29,216,49,0,0         ; vbroadcastss  0x31d8(%rip),%ymm11        # 3fac <_sk_callback_hsw+0x1bb>
   DB  196,65,36,94,218                    ; vdivps        %ymm10,%ymm11,%ymm11
   DB  197,116,92,226                      ; vsubps        %ymm2,%ymm1,%ymm12
   DB  197,116,194,234,1                   ; vcmpltps      %ymm2,%ymm1,%ymm13
-  DB  196,98,125,24,53,179,49,0,0         ; vbroadcastss  0x31b3(%rip),%ymm14        # 3ec8 <_sk_callback_hsw+0x1ab>
+  DB  196,98,125,24,53,197,49,0,0         ; vbroadcastss  0x31c5(%rip),%ymm14        # 3fb0 <_sk_callback_hsw+0x1bf>
   DB  196,65,4,87,255                     ; vxorps        %ymm15,%ymm15,%ymm15
   DB  196,67,5,74,238,208                 ; vblendvps     %ymm13,%ymm14,%ymm15,%ymm13
   DB  196,66,37,168,229                   ; vfmadd213ps   %ymm13,%ymm11,%ymm12
   DB  197,236,92,208                      ; vsubps        %ymm0,%ymm2,%ymm2
   DB  197,124,92,233                      ; vsubps        %ymm1,%ymm0,%ymm13
-  DB  196,98,125,24,53,154,49,0,0         ; vbroadcastss  0x319a(%rip),%ymm14        # 3ed0 <_sk_callback_hsw+0x1b3>
+  DB  196,98,125,24,53,172,49,0,0         ; vbroadcastss  0x31ac(%rip),%ymm14        # 3fb8 <_sk_callback_hsw+0x1c7>
   DB  196,66,37,168,238                   ; vfmadd213ps   %ymm14,%ymm11,%ymm13
-  DB  196,98,125,24,53,136,49,0,0         ; vbroadcastss  0x3188(%rip),%ymm14        # 3ecc <_sk_callback_hsw+0x1af>
+  DB  196,98,125,24,53,154,49,0,0         ; vbroadcastss  0x319a(%rip),%ymm14        # 3fb4 <_sk_callback_hsw+0x1c3>
   DB  196,194,37,168,214                  ; vfmadd213ps   %ymm14,%ymm11,%ymm2
   DB  197,188,194,201,0                   ; vcmpeqps      %ymm1,%ymm8,%ymm1
   DB  196,227,21,74,202,16                ; vblendvps     %ymm1,%ymm2,%ymm13,%ymm1
   DB  197,188,194,192,0                   ; vcmpeqps      %ymm0,%ymm8,%ymm0
   DB  196,195,117,74,196,0                ; vblendvps     %ymm0,%ymm12,%ymm1,%ymm0
   DB  196,193,60,88,201                   ; vaddps        %ymm9,%ymm8,%ymm1
-  DB  196,98,125,24,29,107,49,0,0         ; vbroadcastss  0x316b(%rip),%ymm11        # 3ed8 <_sk_callback_hsw+0x1bb>
+  DB  196,98,125,24,29,125,49,0,0         ; vbroadcastss  0x317d(%rip),%ymm11        # 3fc0 <_sk_callback_hsw+0x1cf>
   DB  196,193,116,89,211                  ; vmulps        %ymm11,%ymm1,%ymm2
   DB  197,36,194,218,1                    ; vcmpltps      %ymm2,%ymm11,%ymm11
   DB  196,65,12,92,224                    ; vsubps        %ymm8,%ymm14,%ymm12
@@ -908,7 +952,7 @@ _sk_rgb_to_hsl_hsw LABEL PROC
   DB  197,172,94,201                      ; vdivps        %ymm1,%ymm10,%ymm1
   DB  196,195,125,74,199,128              ; vblendvps     %ymm8,%ymm15,%ymm0,%ymm0
   DB  196,195,117,74,207,128              ; vblendvps     %ymm8,%ymm15,%ymm1,%ymm1
-  DB  196,98,125,24,5,46,49,0,0           ; vbroadcastss  0x312e(%rip),%ymm8        # 3ed4 <_sk_callback_hsw+0x1b7>
+  DB  196,98,125,24,5,64,49,0,0           ; vbroadcastss  0x3140(%rip),%ymm8        # 3fbc <_sk_callback_hsw+0x1cb>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -923,30 +967,30 @@ _sk_hsl_to_rgb_hsw LABEL PROC
   DB  197,252,17,28,36                    ; vmovups       %ymm3,(%rsp)
   DB  197,252,40,233                      ; vmovaps       %ymm1,%ymm5
   DB  197,252,40,224                      ; vmovaps       %ymm0,%ymm4
-  DB  196,98,125,24,5,245,48,0,0          ; vbroadcastss  0x30f5(%rip),%ymm8        # 3edc <_sk_callback_hsw+0x1bf>
+  DB  196,98,125,24,5,7,49,0,0            ; vbroadcastss  0x3107(%rip),%ymm8        # 3fc4 <_sk_callback_hsw+0x1d3>
   DB  197,60,194,202,2                    ; vcmpleps      %ymm2,%ymm8,%ymm9
   DB  197,84,89,210                       ; vmulps        %ymm2,%ymm5,%ymm10
   DB  196,65,84,92,218                    ; vsubps        %ymm10,%ymm5,%ymm11
   DB  196,67,45,74,203,144                ; vblendvps     %ymm9,%ymm11,%ymm10,%ymm9
   DB  197,52,88,210                       ; vaddps        %ymm2,%ymm9,%ymm10
-  DB  196,98,125,24,13,216,48,0,0         ; vbroadcastss  0x30d8(%rip),%ymm9        # 3ee0 <_sk_callback_hsw+0x1c3>
+  DB  196,98,125,24,13,234,48,0,0         ; vbroadcastss  0x30ea(%rip),%ymm9        # 3fc8 <_sk_callback_hsw+0x1d7>
   DB  196,66,109,170,202                  ; vfmsub213ps   %ymm10,%ymm2,%ymm9
-  DB  196,98,125,24,29,206,48,0,0         ; vbroadcastss  0x30ce(%rip),%ymm11        # 3ee4 <_sk_callback_hsw+0x1c7>
+  DB  196,98,125,24,29,224,48,0,0         ; vbroadcastss  0x30e0(%rip),%ymm11        # 3fcc <_sk_callback_hsw+0x1db>
   DB  196,65,92,88,219                    ; vaddps        %ymm11,%ymm4,%ymm11
   DB  196,67,125,8,227,1                  ; vroundps      $0x1,%ymm11,%ymm12
   DB  196,65,36,92,252                    ; vsubps        %ymm12,%ymm11,%ymm15
   DB  196,65,44,92,217                    ; vsubps        %ymm9,%ymm10,%ymm11
-  DB  196,98,125,24,45,184,48,0,0         ; vbroadcastss  0x30b8(%rip),%ymm13        # 3eec <_sk_callback_hsw+0x1cf>
+  DB  196,98,125,24,45,202,48,0,0         ; vbroadcastss  0x30ca(%rip),%ymm13        # 3fd4 <_sk_callback_hsw+0x1e3>
   DB  196,193,4,89,197                    ; vmulps        %ymm13,%ymm15,%ymm0
-  DB  196,98,125,24,53,174,48,0,0         ; vbroadcastss  0x30ae(%rip),%ymm14        # 3ef0 <_sk_callback_hsw+0x1d3>
+  DB  196,98,125,24,53,192,48,0,0         ; vbroadcastss  0x30c0(%rip),%ymm14        # 3fd8 <_sk_callback_hsw+0x1e7>
   DB  197,12,92,224                       ; vsubps        %ymm0,%ymm14,%ymm12
   DB  196,66,37,168,225                   ; vfmadd213ps   %ymm9,%ymm11,%ymm12
-  DB  196,226,125,24,29,148,48,0,0        ; vbroadcastss  0x3094(%rip),%ymm3        # 3ee8 <_sk_callback_hsw+0x1cb>
+  DB  196,226,125,24,29,166,48,0,0        ; vbroadcastss  0x30a6(%rip),%ymm3        # 3fd0 <_sk_callback_hsw+0x1df>
   DB  196,193,100,194,255,2               ; vcmpleps      %ymm15,%ymm3,%ymm7
   DB  196,195,29,74,249,112               ; vblendvps     %ymm7,%ymm9,%ymm12,%ymm7
   DB  196,65,60,194,231,2                 ; vcmpleps      %ymm15,%ymm8,%ymm12
   DB  196,227,45,74,255,192               ; vblendvps     %ymm12,%ymm7,%ymm10,%ymm7
-  DB  196,98,125,24,37,127,48,0,0         ; vbroadcastss  0x307f(%rip),%ymm12        # 3ef4 <_sk_callback_hsw+0x1d7>
+  DB  196,98,125,24,37,145,48,0,0         ; vbroadcastss  0x3091(%rip),%ymm12        # 3fdc <_sk_callback_hsw+0x1eb>
   DB  196,65,28,194,255,2                 ; vcmpleps      %ymm15,%ymm12,%ymm15
   DB  196,194,37,168,193                  ; vfmadd213ps   %ymm9,%ymm11,%ymm0
   DB  196,99,125,74,255,240               ; vblendvps     %ymm15,%ymm7,%ymm0,%ymm15
@@ -962,7 +1006,7 @@ _sk_hsl_to_rgb_hsw LABEL PROC
   DB  197,156,194,192,2                   ; vcmpleps      %ymm0,%ymm12,%ymm0
   DB  196,194,37,168,249                  ; vfmadd213ps   %ymm9,%ymm11,%ymm7
   DB  196,227,69,74,201,0                 ; vblendvps     %ymm0,%ymm1,%ymm7,%ymm1
-  DB  196,226,125,24,5,43,48,0,0          ; vbroadcastss  0x302b(%rip),%ymm0        # 3ef8 <_sk_callback_hsw+0x1db>
+  DB  196,226,125,24,5,61,48,0,0          ; vbroadcastss  0x303d(%rip),%ymm0        # 3fe0 <_sk_callback_hsw+0x1ef>
   DB  197,220,88,192                      ; vaddps        %ymm0,%ymm4,%ymm0
   DB  196,227,125,8,224,1                 ; vroundps      $0x1,%ymm0,%ymm4
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
@@ -1008,11 +1052,11 @@ _sk_scale_u8_hsw LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,51                              ; jne           fb3 <_sk_scale_u8_hsw+0x43>
+  DB  117,51                              ; jne           1089 <_sk_scale_u8_hsw+0x43>
   DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
   DB  196,66,125,49,192                   ; vpmovzxbd     %xmm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,101,47,0,0         ; vbroadcastss  0x2f65(%rip),%ymm9        # 3efc <_sk_callback_hsw+0x1df>
+  DB  196,98,125,24,13,119,47,0,0         ; vbroadcastss  0x2f77(%rip),%ymm9        # 3fe4 <_sk_callback_hsw+0x1f3>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -1030,9 +1074,9 @@ _sk_scale_u8_hsw LABEL PROC
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           fbb <_sk_scale_u8_hsw+0x4b>
+  DB  117,234                             ; jne           1091 <_sk_scale_u8_hsw+0x4b>
   DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
-  DB  235,172                             ; jmp           f84 <_sk_scale_u8_hsw+0x14>
+  DB  235,172                             ; jmp           105a <_sk_scale_u8_hsw+0x14>
 
 PUBLIC _sk_lerp_1_float_hsw
 _sk_lerp_1_float_hsw LABEL PROC
@@ -1056,11 +1100,11 @@ _sk_lerp_u8_hsw LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,71                              ; jne           105e <_sk_lerp_u8_hsw+0x57>
+  DB  117,71                              ; jne           1134 <_sk_lerp_u8_hsw+0x57>
   DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
   DB  196,66,125,49,192                   ; vpmovzxbd     %xmm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,210,46,0,0         ; vbroadcastss  0x2ed2(%rip),%ymm9        # 3f00 <_sk_callback_hsw+0x1e3>
+  DB  196,98,125,24,13,228,46,0,0         ; vbroadcastss  0x2ee4(%rip),%ymm9        # 3fe8 <_sk_callback_hsw+0x1f7>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
@@ -1082,32 +1126,32 @@ _sk_lerp_u8_hsw LABEL PROC
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           1066 <_sk_lerp_u8_hsw+0x5f>
+  DB  117,234                             ; jne           113c <_sk_lerp_u8_hsw+0x5f>
   DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
-  DB  235,152                             ; jmp           101b <_sk_lerp_u8_hsw+0x14>
+  DB  235,152                             ; jmp           10f1 <_sk_lerp_u8_hsw+0x14>
 
 PUBLIC _sk_lerp_565_hsw
 _sk_lerp_565_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,149,0,0,0                    ; jne           1126 <_sk_lerp_565_hsw+0xa3>
+  DB  15,133,149,0,0,0                    ; jne           11fc <_sk_lerp_565_hsw+0xa3>
   DB  196,193,122,111,28,122              ; vmovdqu       (%r10,%rdi,2),%xmm3
   DB  196,226,125,51,219                  ; vpmovzxwd     %xmm3,%ymm3
-  DB  196,98,125,88,5,95,46,0,0           ; vpbroadcastd  0x2e5f(%rip),%ymm8        # 3f04 <_sk_callback_hsw+0x1e7>
+  DB  196,98,125,88,5,113,46,0,0          ; vpbroadcastd  0x2e71(%rip),%ymm8        # 3fec <_sk_callback_hsw+0x1fb>
   DB  196,65,101,219,192                  ; vpand         %ymm8,%ymm3,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,80,46,0,0          ; vbroadcastss  0x2e50(%rip),%ymm9        # 3f08 <_sk_callback_hsw+0x1eb>
+  DB  196,98,125,24,13,98,46,0,0          ; vbroadcastss  0x2e62(%rip),%ymm9        # 3ff0 <_sk_callback_hsw+0x1ff>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,88,13,70,46,0,0          ; vpbroadcastd  0x2e46(%rip),%ymm9        # 3f0c <_sk_callback_hsw+0x1ef>
+  DB  196,98,125,88,13,88,46,0,0          ; vpbroadcastd  0x2e58(%rip),%ymm9        # 3ff4 <_sk_callback_hsw+0x203>
   DB  196,65,101,219,201                  ; vpand         %ymm9,%ymm3,%ymm9
   DB  196,65,124,91,201                   ; vcvtdq2ps     %ymm9,%ymm9
-  DB  196,98,125,24,21,55,46,0,0          ; vbroadcastss  0x2e37(%rip),%ymm10        # 3f10 <_sk_callback_hsw+0x1f3>
+  DB  196,98,125,24,21,73,46,0,0          ; vbroadcastss  0x2e49(%rip),%ymm10        # 3ff8 <_sk_callback_hsw+0x207>
   DB  196,65,52,89,202                    ; vmulps        %ymm10,%ymm9,%ymm9
-  DB  196,98,125,88,21,45,46,0,0          ; vpbroadcastd  0x2e2d(%rip),%ymm10        # 3f14 <_sk_callback_hsw+0x1f7>
+  DB  196,98,125,88,21,63,46,0,0          ; vpbroadcastd  0x2e3f(%rip),%ymm10        # 3ffc <_sk_callback_hsw+0x20b>
   DB  196,193,101,219,218                 ; vpand         %ymm10,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,21,31,46,0,0          ; vbroadcastss  0x2e1f(%rip),%ymm10        # 3f18 <_sk_callback_hsw+0x1fb>
+  DB  196,98,125,24,21,49,46,0,0          ; vbroadcastss  0x2e31(%rip),%ymm10        # 4000 <_sk_callback_hsw+0x20f>
   DB  196,193,100,89,218                  ; vmulps        %ymm10,%ymm3,%ymm3
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
@@ -1116,16 +1160,16 @@ _sk_lerp_565_hsw LABEL PROC
   DB  197,236,92,214                      ; vsubps        %ymm6,%ymm2,%ymm2
   DB  196,226,101,168,214                 ; vfmadd213ps   %ymm6,%ymm3,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,248,45,0,0        ; vbroadcastss  0x2df8(%rip),%ymm3        # 3f1c <_sk_callback_hsw+0x1ff>
+  DB  196,226,125,24,29,10,46,0,0         ; vbroadcastss  0x2e0a(%rip),%ymm3        # 4004 <_sk_callback_hsw+0x213>
   DB  255,224                             ; jmpq          *%rax
   DB  65,137,200                          ; mov           %ecx,%r8d
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  197,225,239,219                     ; vpxor         %xmm3,%xmm3,%xmm3
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,89,255,255,255               ; ja            1097 <_sk_lerp_565_hsw+0x14>
+  DB  15,135,89,255,255,255               ; ja            116d <_sk_lerp_565_hsw+0x14>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,75,0,0,0                  ; lea           0x4b(%rip),%r9        # 1194 <_sk_lerp_565_hsw+0x111>
+  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 1268 <_sk_lerp_565_hsw+0x10f>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -1137,28 +1181,27 @@ _sk_lerp_565_hsw LABEL PROC
   DB  196,193,97,196,92,122,4,2           ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm3,%xmm3
   DB  196,193,97,196,92,122,2,1           ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm3,%xmm3
   DB  196,193,97,196,28,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm3,%xmm3
-  DB  233,5,255,255,255                   ; jmpq          1097 <_sk_lerp_565_hsw+0x14>
-  DB  102,144                             ; xchg          %ax,%ax
-  DB  242,255                             ; repnz         (bad)
+  DB  233,5,255,255,255                   ; jmpq          116d <_sk_lerp_565_hsw+0x14>
+  DB  244                                 ; hlt
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  234                                 ; (bad)
   DB  255                                 ; (bad)
+  DB  236                                 ; in            (%dx),%al
   DB  255                                 ; (bad)
-  DB  255,226                             ; jmpq          *%rdx
   DB  255                                 ; (bad)
+  DB  255,228                             ; jmpq          *%rsp
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  218,255                             ; (bad)
   DB  255                                 ; (bad)
-  DB  255,210                             ; callq         *%rdx
+  DB  220,255                             ; fdivr         %st,%st(7)
   DB  255                                 ; (bad)
+  DB  255,212                             ; callq         *%rsp
   DB  255                                 ; (bad)
-  DB  255,202                             ; dec           %edx
   DB  255                                 ; (bad)
+  DB  255,204                             ; dec           %esp
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  190                                 ; .byte         0xbe
+  DB  255,192                             ; inc           %eax
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; .byte         0xff
@@ -1170,23 +1213,23 @@ _sk_load_tables_hsw LABEL PROC
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
   DB  76,3,8                              ; add           (%rax),%r9
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,105                             ; jne           122e <_sk_load_tables_hsw+0x7e>
+  DB  117,105                             ; jne           1302 <_sk_load_tables_hsw+0x7e>
   DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
-  DB  197,229,219,13,14,48,0,0            ; vpand         0x300e(%rip),%ymm3,%ymm1        # 41e0 <_sk_callback_hsw+0x4c3>
+  DB  197,229,219,13,26,48,0,0            ; vpand         0x301a(%rip),%ymm3,%ymm1        # 42c0 <_sk_callback_hsw+0x4cf>
   DB  196,65,61,118,192                   ; vpcmpeqd      %ymm8,%ymm8,%ymm8
   DB  72,139,72,8                         ; mov           0x8(%rax),%rcx
   DB  76,139,72,16                        ; mov           0x10(%rax),%r9
   DB  197,237,118,210                     ; vpcmpeqd      %ymm2,%ymm2,%ymm2
   DB  196,226,109,146,4,137               ; vgatherdps    %ymm2,(%rcx,%ymm1,4),%ymm0
-  DB  196,226,101,0,21,14,48,0,0          ; vpshufb       0x300e(%rip),%ymm3,%ymm2        # 4200 <_sk_callback_hsw+0x4e3>
+  DB  196,226,101,0,21,26,48,0,0          ; vpshufb       0x301a(%rip),%ymm3,%ymm2        # 42e0 <_sk_callback_hsw+0x4ef>
   DB  196,65,53,118,201                   ; vpcmpeqd      %ymm9,%ymm9,%ymm9
   DB  196,194,53,146,12,145               ; vgatherdps    %ymm9,(%r9,%ymm2,4),%ymm1
   DB  72,139,64,24                        ; mov           0x18(%rax),%rax
-  DB  196,98,101,0,13,22,48,0,0           ; vpshufb       0x3016(%rip),%ymm3,%ymm9        # 4220 <_sk_callback_hsw+0x503>
+  DB  196,98,101,0,13,34,48,0,0           ; vpshufb       0x3022(%rip),%ymm3,%ymm9        # 4300 <_sk_callback_hsw+0x50f>
   DB  196,162,61,146,20,136               ; vgatherdps    %ymm8,(%rax,%ymm9,4),%ymm2
   DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,254,44,0,0          ; vbroadcastss  0x2cfe(%rip),%ymm8        # 3f20 <_sk_callback_hsw+0x203>
+  DB  196,98,125,24,5,18,45,0,0           ; vbroadcastss  0x2d12(%rip),%ymm8        # 4008 <_sk_callback_hsw+0x217>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,137,193                          ; mov           %r8,%rcx
@@ -1199,7 +1242,7 @@ _sk_load_tables_hsw LABEL PROC
   DB  196,193,249,110,194                 ; vmovq         %r10,%xmm0
   DB  196,226,125,33,192                  ; vpmovsxbd     %xmm0,%ymm0
   DB  196,194,125,140,25                  ; vpmaskmovd    (%r9),%ymm0,%ymm3
-  DB  233,115,255,255,255                 ; jmpq          11ca <_sk_load_tables_hsw+0x1a>
+  DB  233,115,255,255,255                 ; jmpq          129e <_sk_load_tables_hsw+0x1a>
 
 PUBLIC _sk_load_tables_u16_be_hsw
 _sk_load_tables_u16_be_hsw LABEL PROC
@@ -1207,7 +1250,7 @@ _sk_load_tables_u16_be_hsw LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,201,0,0,0                    ; jne           1336 <_sk_load_tables_u16_be_hsw+0xdf>
+  DB  15,133,201,0,0,0                    ; jne           140a <_sk_load_tables_u16_be_hsw+0xdf>
   DB  196,1,121,16,4,72                   ; vmovupd       (%r8,%r9,2),%xmm8
   DB  196,129,121,16,84,72,16             ; vmovupd       0x10(%r8,%r9,2),%xmm2
   DB  196,129,121,16,92,72,32             ; vmovupd       0x20(%r8,%r9,2),%xmm3
@@ -1223,7 +1266,7 @@ _sk_load_tables_u16_be_hsw LABEL PROC
   DB  197,185,108,200                     ; vpunpcklqdq   %xmm0,%xmm8,%xmm1
   DB  197,185,109,208                     ; vpunpckhqdq   %xmm0,%xmm8,%xmm2
   DB  197,49,108,195                      ; vpunpcklqdq   %xmm3,%xmm9,%xmm8
-  DB  197,121,111,21,162,48,0,0           ; vmovdqa       0x30a2(%rip),%xmm10        # 4360 <_sk_callback_hsw+0x643>
+  DB  197,121,111,21,174,48,0,0           ; vmovdqa       0x30ae(%rip),%xmm10        # 4440 <_sk_callback_hsw+0x64f>
   DB  196,193,113,219,194                 ; vpand         %xmm10,%xmm1,%xmm0
   DB  196,226,125,51,200                  ; vpmovzxwd     %xmm0,%ymm1
   DB  196,65,37,118,219                   ; vpcmpeqd      %ymm11,%ymm11,%ymm11
@@ -1245,36 +1288,36 @@ _sk_load_tables_u16_be_hsw LABEL PROC
   DB  197,185,235,219                     ; vpor          %xmm3,%xmm8,%xmm3
   DB  196,226,125,51,219                  ; vpmovzxwd     %xmm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,247,43,0,0          ; vbroadcastss  0x2bf7(%rip),%ymm8        # 3f24 <_sk_callback_hsw+0x207>
+  DB  196,98,125,24,5,11,44,0,0           ; vbroadcastss  0x2c0b(%rip),%ymm8        # 400c <_sk_callback_hsw+0x21b>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
   DB  196,1,123,16,4,72                   ; vmovsd        (%r8,%r9,2),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,85                              ; je            139c <_sk_load_tables_u16_be_hsw+0x145>
+  DB  116,85                              ; je            1470 <_sk_load_tables_u16_be_hsw+0x145>
   DB  196,1,57,22,68,72,8                 ; vmovhpd       0x8(%r8,%r9,2),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,72                              ; jb            139c <_sk_load_tables_u16_be_hsw+0x145>
+  DB  114,72                              ; jb            1470 <_sk_load_tables_u16_be_hsw+0x145>
   DB  196,129,123,16,84,72,16             ; vmovsd        0x10(%r8,%r9,2),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,72                              ; je            13a9 <_sk_load_tables_u16_be_hsw+0x152>
+  DB  116,72                              ; je            147d <_sk_load_tables_u16_be_hsw+0x152>
   DB  196,129,105,22,84,72,24             ; vmovhpd       0x18(%r8,%r9,2),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,59                              ; jb            13a9 <_sk_load_tables_u16_be_hsw+0x152>
+  DB  114,59                              ; jb            147d <_sk_load_tables_u16_be_hsw+0x152>
   DB  196,129,123,16,92,72,32             ; vmovsd        0x20(%r8,%r9,2),%xmm3
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,9,255,255,255                ; je            1288 <_sk_load_tables_u16_be_hsw+0x31>
+  DB  15,132,9,255,255,255                ; je            135c <_sk_load_tables_u16_be_hsw+0x31>
   DB  196,129,97,22,92,72,40              ; vmovhpd       0x28(%r8,%r9,2),%xmm3,%xmm3
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,248,254,255,255              ; jb            1288 <_sk_load_tables_u16_be_hsw+0x31>
+  DB  15,130,248,254,255,255              ; jb            135c <_sk_load_tables_u16_be_hsw+0x31>
   DB  196,1,122,126,76,72,48              ; vmovq         0x30(%r8,%r9,2),%xmm9
-  DB  233,236,254,255,255                 ; jmpq          1288 <_sk_load_tables_u16_be_hsw+0x31>
+  DB  233,236,254,255,255                 ; jmpq          135c <_sk_load_tables_u16_be_hsw+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,223,254,255,255                 ; jmpq          1288 <_sk_load_tables_u16_be_hsw+0x31>
+  DB  233,223,254,255,255                 ; jmpq          135c <_sk_load_tables_u16_be_hsw+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
-  DB  233,214,254,255,255                 ; jmpq          1288 <_sk_load_tables_u16_be_hsw+0x31>
+  DB  233,214,254,255,255                 ; jmpq          135c <_sk_load_tables_u16_be_hsw+0x31>
 
 PUBLIC _sk_load_tables_rgb_u16_be_hsw
 _sk_load_tables_rgb_u16_be_hsw LABEL PROC
@@ -1282,7 +1325,7 @@ _sk_load_tables_rgb_u16_be_hsw LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,127                       ; lea           (%rdi,%rdi,2),%r9
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,193,0,0,0                    ; jne           1485 <_sk_load_tables_rgb_u16_be_hsw+0xd3>
+  DB  15,133,193,0,0,0                    ; jne           1559 <_sk_load_tables_rgb_u16_be_hsw+0xd3>
   DB  196,129,122,111,4,72                ; vmovdqu       (%r8,%r9,2),%xmm0
   DB  196,129,122,111,84,72,12            ; vmovdqu       0xc(%r8,%r9,2),%xmm2
   DB  196,129,122,111,76,72,24            ; vmovdqu       0x18(%r8,%r9,2),%xmm1
@@ -1303,7 +1346,7 @@ _sk_load_tables_rgb_u16_be_hsw LABEL PROC
   DB  197,185,108,218                     ; vpunpcklqdq   %xmm2,%xmm8,%xmm3
   DB  197,185,109,210                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm2
   DB  197,121,108,193                     ; vpunpcklqdq   %xmm1,%xmm0,%xmm8
-  DB  197,121,111,13,66,47,0,0            ; vmovdqa       0x2f42(%rip),%xmm9        # 4370 <_sk_callback_hsw+0x653>
+  DB  197,121,111,13,78,47,0,0            ; vmovdqa       0x2f4e(%rip),%xmm9        # 4450 <_sk_callback_hsw+0x65f>
   DB  196,193,97,219,193                  ; vpand         %xmm9,%xmm3,%xmm0
   DB  196,226,125,51,200                  ; vpmovzxwd     %xmm0,%ymm1
   DB  197,229,118,219                     ; vpcmpeqd      %ymm3,%ymm3,%ymm3
@@ -1320,41 +1363,41 @@ _sk_load_tables_rgb_u16_be_hsw LABEL PROC
   DB  196,98,125,51,194                   ; vpmovzxwd     %xmm2,%ymm8
   DB  196,162,101,146,20,128              ; vgatherdps    %ymm3,(%rax,%ymm8,4),%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,165,42,0,0        ; vbroadcastss  0x2aa5(%rip),%ymm3        # 3f28 <_sk_callback_hsw+0x20b>
+  DB  196,226,125,24,29,185,42,0,0        ; vbroadcastss  0x2ab9(%rip),%ymm3        # 4010 <_sk_callback_hsw+0x21f>
   DB  255,224                             ; jmpq          *%rax
   DB  196,129,121,110,4,72                ; vmovd         (%r8,%r9,2),%xmm0
   DB  196,129,121,196,68,72,4,2           ; vpinsrw       $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  117,5                               ; jne           149e <_sk_load_tables_rgb_u16_be_hsw+0xec>
-  DB  233,90,255,255,255                  ; jmpq          13f8 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  DB  117,5                               ; jne           1572 <_sk_load_tables_rgb_u16_be_hsw+0xec>
+  DB  233,90,255,255,255                  ; jmpq          14cc <_sk_load_tables_rgb_u16_be_hsw+0x46>
   DB  196,129,121,110,76,72,6             ; vmovd         0x6(%r8,%r9,2),%xmm1
   DB  196,1,113,196,68,72,10,2            ; vpinsrw       $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,26                              ; jb            14cd <_sk_load_tables_rgb_u16_be_hsw+0x11b>
+  DB  114,26                              ; jb            15a1 <_sk_load_tables_rgb_u16_be_hsw+0x11b>
   DB  196,129,121,110,76,72,12            ; vmovd         0xc(%r8,%r9,2),%xmm1
   DB  196,129,113,196,84,72,16,2          ; vpinsrw       $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  117,10                              ; jne           14d2 <_sk_load_tables_rgb_u16_be_hsw+0x120>
-  DB  233,43,255,255,255                  ; jmpq          13f8 <_sk_load_tables_rgb_u16_be_hsw+0x46>
-  DB  233,38,255,255,255                  ; jmpq          13f8 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  DB  117,10                              ; jne           15a6 <_sk_load_tables_rgb_u16_be_hsw+0x120>
+  DB  233,43,255,255,255                  ; jmpq          14cc <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  DB  233,38,255,255,255                  ; jmpq          14cc <_sk_load_tables_rgb_u16_be_hsw+0x46>
   DB  196,129,121,110,76,72,18            ; vmovd         0x12(%r8,%r9,2),%xmm1
   DB  196,1,113,196,76,72,22,2            ; vpinsrw       $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,26                              ; jb            1501 <_sk_load_tables_rgb_u16_be_hsw+0x14f>
+  DB  114,26                              ; jb            15d5 <_sk_load_tables_rgb_u16_be_hsw+0x14f>
   DB  196,129,121,110,76,72,24            ; vmovd         0x18(%r8,%r9,2),%xmm1
   DB  196,129,113,196,76,72,28,2          ; vpinsrw       $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  117,10                              ; jne           1506 <_sk_load_tables_rgb_u16_be_hsw+0x154>
-  DB  233,247,254,255,255                 ; jmpq          13f8 <_sk_load_tables_rgb_u16_be_hsw+0x46>
-  DB  233,242,254,255,255                 ; jmpq          13f8 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  DB  117,10                              ; jne           15da <_sk_load_tables_rgb_u16_be_hsw+0x154>
+  DB  233,247,254,255,255                 ; jmpq          14cc <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  DB  233,242,254,255,255                 ; jmpq          14cc <_sk_load_tables_rgb_u16_be_hsw+0x46>
   DB  196,129,121,110,92,72,30            ; vmovd         0x1e(%r8,%r9,2),%xmm3
   DB  196,1,97,196,92,72,34,2             ; vpinsrw       $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,20                              ; jb            152f <_sk_load_tables_rgb_u16_be_hsw+0x17d>
+  DB  114,20                              ; jb            1603 <_sk_load_tables_rgb_u16_be_hsw+0x17d>
   DB  196,129,121,110,92,72,36            ; vmovd         0x24(%r8,%r9,2),%xmm3
   DB  196,129,97,196,92,72,40,2           ; vpinsrw       $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
-  DB  233,201,254,255,255                 ; jmpq          13f8 <_sk_load_tables_rgb_u16_be_hsw+0x46>
-  DB  233,196,254,255,255                 ; jmpq          13f8 <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  DB  233,201,254,255,255                 ; jmpq          14cc <_sk_load_tables_rgb_u16_be_hsw+0x46>
+  DB  233,196,254,255,255                 ; jmpq          14cc <_sk_load_tables_rgb_u16_be_hsw+0x46>
 
 PUBLIC _sk_byte_tables_hsw
 _sk_byte_tables_hsw LABEL PROC
@@ -1365,7 +1408,7 @@ _sk_byte_tables_hsw LABEL PROC
   DB  65,84                               ; push          %r12
   DB  83                                  ; push          %rbx
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,227,41,0,0          ; vbroadcastss  0x29e3(%rip),%ymm8        # 3f2c <_sk_callback_hsw+0x20f>
+  DB  196,98,125,24,5,247,41,0,0          ; vbroadcastss  0x29f7(%rip),%ymm8        # 4014 <_sk_callback_hsw+0x223>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,195,249,22,192,1                ; vpextrq       $0x1,%xmm0,%r8
@@ -1402,7 +1445,7 @@ _sk_byte_tables_hsw LABEL PROC
   DB  196,227,121,32,197,7                ; vpinsrb       $0x7,%ebp,%xmm0,%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,52,41,0,0          ; vbroadcastss  0x2934(%rip),%ymm9        # 3f30 <_sk_callback_hsw+0x213>
+  DB  196,98,125,24,13,72,41,0,0          ; vbroadcastss  0x2948(%rip),%ymm9        # 4018 <_sk_callback_hsw+0x227>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -1561,7 +1604,7 @@ _sk_byte_tables_rgb_hsw LABEL PROC
   DB  196,227,121,32,197,7                ; vpinsrb       $0x7,%ebp,%xmm0,%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,109,38,0,0         ; vbroadcastss  0x266d(%rip),%ymm9        # 3f34 <_sk_callback_hsw+0x217>
+  DB  196,98,125,24,13,129,38,0,0         ; vbroadcastss  0x2681(%rip),%ymm9        # 401c <_sk_callback_hsw+0x22b>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -1714,33 +1757,33 @@ _sk_parametric_r_hsw LABEL PROC
   DB  196,66,125,168,211                  ; vfmadd213ps   %ymm11,%ymm0,%ymm10
   DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
   DB  196,65,124,91,218                   ; vcvtdq2ps     %ymm10,%ymm11
-  DB  196,98,125,24,37,32,36,0,0          ; vbroadcastss  0x2420(%rip),%ymm12        # 3f38 <_sk_callback_hsw+0x21b>
-  DB  196,98,125,24,45,27,36,0,0          ; vbroadcastss  0x241b(%rip),%ymm13        # 3f3c <_sk_callback_hsw+0x21f>
+  DB  196,98,125,24,37,52,36,0,0          ; vbroadcastss  0x2434(%rip),%ymm12        # 4020 <_sk_callback_hsw+0x22f>
+  DB  196,98,125,24,45,47,36,0,0          ; vbroadcastss  0x242f(%rip),%ymm13        # 4024 <_sk_callback_hsw+0x233>
   DB  196,65,44,84,213                    ; vandps        %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,17,36,0,0          ; vbroadcastss  0x2411(%rip),%ymm13        # 3f40 <_sk_callback_hsw+0x223>
+  DB  196,98,125,24,45,37,36,0,0          ; vbroadcastss  0x2425(%rip),%ymm13        # 4028 <_sk_callback_hsw+0x237>
   DB  196,65,44,86,213                    ; vorps         %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,7,36,0,0           ; vbroadcastss  0x2407(%rip),%ymm13        # 3f44 <_sk_callback_hsw+0x227>
+  DB  196,98,125,24,45,27,36,0,0          ; vbroadcastss  0x241b(%rip),%ymm13        # 402c <_sk_callback_hsw+0x23b>
   DB  196,66,37,184,236                   ; vfmadd231ps   %ymm12,%ymm11,%ymm13
-  DB  196,98,125,24,29,253,35,0,0         ; vbroadcastss  0x23fd(%rip),%ymm11        # 3f48 <_sk_callback_hsw+0x22b>
+  DB  196,98,125,24,29,17,36,0,0          ; vbroadcastss  0x2411(%rip),%ymm11        # 4030 <_sk_callback_hsw+0x23f>
   DB  196,66,45,172,221                   ; vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  DB  196,98,125,24,37,243,35,0,0         ; vbroadcastss  0x23f3(%rip),%ymm12        # 3f4c <_sk_callback_hsw+0x22f>
+  DB  196,98,125,24,37,7,36,0,0           ; vbroadcastss  0x2407(%rip),%ymm12        # 4034 <_sk_callback_hsw+0x243>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,233,35,0,0         ; vbroadcastss  0x23e9(%rip),%ymm12        # 3f50 <_sk_callback_hsw+0x233>
+  DB  196,98,125,24,37,253,35,0,0         ; vbroadcastss  0x23fd(%rip),%ymm12        # 4038 <_sk_callback_hsw+0x247>
   DB  196,65,28,94,210                    ; vdivps        %ymm10,%ymm12,%ymm10
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  196,99,125,8,208,1                  ; vroundps      $0x1,%ymm0,%ymm10
   DB  196,65,124,92,210                   ; vsubps        %ymm10,%ymm0,%ymm10
-  DB  196,98,125,24,29,202,35,0,0         ; vbroadcastss  0x23ca(%rip),%ymm11        # 3f54 <_sk_callback_hsw+0x237>
+  DB  196,98,125,24,29,222,35,0,0         ; vbroadcastss  0x23de(%rip),%ymm11        # 403c <_sk_callback_hsw+0x24b>
   DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
-  DB  196,98,125,24,29,192,35,0,0         ; vbroadcastss  0x23c0(%rip),%ymm11        # 3f58 <_sk_callback_hsw+0x23b>
+  DB  196,98,125,24,29,212,35,0,0         ; vbroadcastss  0x23d4(%rip),%ymm11        # 4040 <_sk_callback_hsw+0x24f>
   DB  196,98,45,172,216                   ; vfnmadd213ps  %ymm0,%ymm10,%ymm11
-  DB  196,226,125,24,5,182,35,0,0         ; vbroadcastss  0x23b6(%rip),%ymm0        # 3f5c <_sk_callback_hsw+0x23f>
+  DB  196,226,125,24,5,202,35,0,0         ; vbroadcastss  0x23ca(%rip),%ymm0        # 4044 <_sk_callback_hsw+0x253>
   DB  196,193,124,92,194                  ; vsubps        %ymm10,%ymm0,%ymm0
-  DB  196,98,125,24,21,172,35,0,0         ; vbroadcastss  0x23ac(%rip),%ymm10        # 3f60 <_sk_callback_hsw+0x243>
+  DB  196,98,125,24,21,192,35,0,0         ; vbroadcastss  0x23c0(%rip),%ymm10        # 4048 <_sk_callback_hsw+0x257>
   DB  197,172,94,192                      ; vdivps        %ymm0,%ymm10,%ymm0
   DB  197,164,88,192                      ; vaddps        %ymm0,%ymm11,%ymm0
-  DB  196,98,125,24,21,159,35,0,0         ; vbroadcastss  0x239f(%rip),%ymm10        # 3f64 <_sk_callback_hsw+0x247>
+  DB  196,98,125,24,21,179,35,0,0         ; vbroadcastss  0x23b3(%rip),%ymm10        # 404c <_sk_callback_hsw+0x25b>
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -1748,7 +1791,7 @@ _sk_parametric_r_hsw LABEL PROC
   DB  196,195,125,74,193,128              ; vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,118,35,0,0          ; vbroadcastss  0x2376(%rip),%ymm8        # 3f68 <_sk_callback_hsw+0x24b>
+  DB  196,98,125,24,5,138,35,0,0          ; vbroadcastss  0x238a(%rip),%ymm8        # 4050 <_sk_callback_hsw+0x25f>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -1766,33 +1809,33 @@ _sk_parametric_g_hsw LABEL PROC
   DB  196,66,117,168,211                  ; vfmadd213ps   %ymm11,%ymm1,%ymm10
   DB  196,226,125,24,8                    ; vbroadcastss  (%rax),%ymm1
   DB  196,65,124,91,218                   ; vcvtdq2ps     %ymm10,%ymm11
-  DB  196,98,125,24,37,46,35,0,0          ; vbroadcastss  0x232e(%rip),%ymm12        # 3f6c <_sk_callback_hsw+0x24f>
-  DB  196,98,125,24,45,41,35,0,0          ; vbroadcastss  0x2329(%rip),%ymm13        # 3f70 <_sk_callback_hsw+0x253>
+  DB  196,98,125,24,37,66,35,0,0          ; vbroadcastss  0x2342(%rip),%ymm12        # 4054 <_sk_callback_hsw+0x263>
+  DB  196,98,125,24,45,61,35,0,0          ; vbroadcastss  0x233d(%rip),%ymm13        # 4058 <_sk_callback_hsw+0x267>
   DB  196,65,44,84,213                    ; vandps        %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,31,35,0,0          ; vbroadcastss  0x231f(%rip),%ymm13        # 3f74 <_sk_callback_hsw+0x257>
+  DB  196,98,125,24,45,51,35,0,0          ; vbroadcastss  0x2333(%rip),%ymm13        # 405c <_sk_callback_hsw+0x26b>
   DB  196,65,44,86,213                    ; vorps         %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,21,35,0,0          ; vbroadcastss  0x2315(%rip),%ymm13        # 3f78 <_sk_callback_hsw+0x25b>
+  DB  196,98,125,24,45,41,35,0,0          ; vbroadcastss  0x2329(%rip),%ymm13        # 4060 <_sk_callback_hsw+0x26f>
   DB  196,66,37,184,236                   ; vfmadd231ps   %ymm12,%ymm11,%ymm13
-  DB  196,98,125,24,29,11,35,0,0          ; vbroadcastss  0x230b(%rip),%ymm11        # 3f7c <_sk_callback_hsw+0x25f>
+  DB  196,98,125,24,29,31,35,0,0          ; vbroadcastss  0x231f(%rip),%ymm11        # 4064 <_sk_callback_hsw+0x273>
   DB  196,66,45,172,221                   ; vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  DB  196,98,125,24,37,1,35,0,0           ; vbroadcastss  0x2301(%rip),%ymm12        # 3f80 <_sk_callback_hsw+0x263>
+  DB  196,98,125,24,37,21,35,0,0          ; vbroadcastss  0x2315(%rip),%ymm12        # 4068 <_sk_callback_hsw+0x277>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,247,34,0,0         ; vbroadcastss  0x22f7(%rip),%ymm12        # 3f84 <_sk_callback_hsw+0x267>
+  DB  196,98,125,24,37,11,35,0,0          ; vbroadcastss  0x230b(%rip),%ymm12        # 406c <_sk_callback_hsw+0x27b>
   DB  196,65,28,94,210                    ; vdivps        %ymm10,%ymm12,%ymm10
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
   DB  196,193,116,89,202                  ; vmulps        %ymm10,%ymm1,%ymm1
   DB  196,99,125,8,209,1                  ; vroundps      $0x1,%ymm1,%ymm10
   DB  196,65,116,92,210                   ; vsubps        %ymm10,%ymm1,%ymm10
-  DB  196,98,125,24,29,216,34,0,0         ; vbroadcastss  0x22d8(%rip),%ymm11        # 3f88 <_sk_callback_hsw+0x26b>
+  DB  196,98,125,24,29,236,34,0,0         ; vbroadcastss  0x22ec(%rip),%ymm11        # 4070 <_sk_callback_hsw+0x27f>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,206,34,0,0         ; vbroadcastss  0x22ce(%rip),%ymm11        # 3f8c <_sk_callback_hsw+0x26f>
+  DB  196,98,125,24,29,226,34,0,0         ; vbroadcastss  0x22e2(%rip),%ymm11        # 4074 <_sk_callback_hsw+0x283>
   DB  196,98,45,172,217                   ; vfnmadd213ps  %ymm1,%ymm10,%ymm11
-  DB  196,226,125,24,13,196,34,0,0        ; vbroadcastss  0x22c4(%rip),%ymm1        # 3f90 <_sk_callback_hsw+0x273>
+  DB  196,226,125,24,13,216,34,0,0        ; vbroadcastss  0x22d8(%rip),%ymm1        # 4078 <_sk_callback_hsw+0x287>
   DB  196,193,116,92,202                  ; vsubps        %ymm10,%ymm1,%ymm1
-  DB  196,98,125,24,21,186,34,0,0         ; vbroadcastss  0x22ba(%rip),%ymm10        # 3f94 <_sk_callback_hsw+0x277>
+  DB  196,98,125,24,21,206,34,0,0         ; vbroadcastss  0x22ce(%rip),%ymm10        # 407c <_sk_callback_hsw+0x28b>
   DB  197,172,94,201                      ; vdivps        %ymm1,%ymm10,%ymm1
   DB  197,164,88,201                      ; vaddps        %ymm1,%ymm11,%ymm1
-  DB  196,98,125,24,21,173,34,0,0         ; vbroadcastss  0x22ad(%rip),%ymm10        # 3f98 <_sk_callback_hsw+0x27b>
+  DB  196,98,125,24,21,193,34,0,0         ; vbroadcastss  0x22c1(%rip),%ymm10        # 4080 <_sk_callback_hsw+0x28f>
   DB  196,193,116,89,202                  ; vmulps        %ymm10,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -1800,7 +1843,7 @@ _sk_parametric_g_hsw LABEL PROC
   DB  196,195,117,74,201,128              ; vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
-  DB  196,98,125,24,5,132,34,0,0          ; vbroadcastss  0x2284(%rip),%ymm8        # 3f9c <_sk_callback_hsw+0x27f>
+  DB  196,98,125,24,5,152,34,0,0          ; vbroadcastss  0x2298(%rip),%ymm8        # 4084 <_sk_callback_hsw+0x293>
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -1818,33 +1861,33 @@ _sk_parametric_b_hsw LABEL PROC
   DB  196,66,109,168,211                  ; vfmadd213ps   %ymm11,%ymm2,%ymm10
   DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
   DB  196,65,124,91,218                   ; vcvtdq2ps     %ymm10,%ymm11
-  DB  196,98,125,24,37,60,34,0,0          ; vbroadcastss  0x223c(%rip),%ymm12        # 3fa0 <_sk_callback_hsw+0x283>
-  DB  196,98,125,24,45,55,34,0,0          ; vbroadcastss  0x2237(%rip),%ymm13        # 3fa4 <_sk_callback_hsw+0x287>
+  DB  196,98,125,24,37,80,34,0,0          ; vbroadcastss  0x2250(%rip),%ymm12        # 4088 <_sk_callback_hsw+0x297>
+  DB  196,98,125,24,45,75,34,0,0          ; vbroadcastss  0x224b(%rip),%ymm13        # 408c <_sk_callback_hsw+0x29b>
   DB  196,65,44,84,213                    ; vandps        %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,45,34,0,0          ; vbroadcastss  0x222d(%rip),%ymm13        # 3fa8 <_sk_callback_hsw+0x28b>
+  DB  196,98,125,24,45,65,34,0,0          ; vbroadcastss  0x2241(%rip),%ymm13        # 4090 <_sk_callback_hsw+0x29f>
   DB  196,65,44,86,213                    ; vorps         %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,35,34,0,0          ; vbroadcastss  0x2223(%rip),%ymm13        # 3fac <_sk_callback_hsw+0x28f>
+  DB  196,98,125,24,45,55,34,0,0          ; vbroadcastss  0x2237(%rip),%ymm13        # 4094 <_sk_callback_hsw+0x2a3>
   DB  196,66,37,184,236                   ; vfmadd231ps   %ymm12,%ymm11,%ymm13
-  DB  196,98,125,24,29,25,34,0,0          ; vbroadcastss  0x2219(%rip),%ymm11        # 3fb0 <_sk_callback_hsw+0x293>
+  DB  196,98,125,24,29,45,34,0,0          ; vbroadcastss  0x222d(%rip),%ymm11        # 4098 <_sk_callback_hsw+0x2a7>
   DB  196,66,45,172,221                   ; vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  DB  196,98,125,24,37,15,34,0,0          ; vbroadcastss  0x220f(%rip),%ymm12        # 3fb4 <_sk_callback_hsw+0x297>
+  DB  196,98,125,24,37,35,34,0,0          ; vbroadcastss  0x2223(%rip),%ymm12        # 409c <_sk_callback_hsw+0x2ab>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,5,34,0,0           ; vbroadcastss  0x2205(%rip),%ymm12        # 3fb8 <_sk_callback_hsw+0x29b>
+  DB  196,98,125,24,37,25,34,0,0          ; vbroadcastss  0x2219(%rip),%ymm12        # 40a0 <_sk_callback_hsw+0x2af>
   DB  196,65,28,94,210                    ; vdivps        %ymm10,%ymm12,%ymm10
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
   DB  196,193,108,89,210                  ; vmulps        %ymm10,%ymm2,%ymm2
   DB  196,99,125,8,210,1                  ; vroundps      $0x1,%ymm2,%ymm10
   DB  196,65,108,92,210                   ; vsubps        %ymm10,%ymm2,%ymm10
-  DB  196,98,125,24,29,230,33,0,0         ; vbroadcastss  0x21e6(%rip),%ymm11        # 3fbc <_sk_callback_hsw+0x29f>
+  DB  196,98,125,24,29,250,33,0,0         ; vbroadcastss  0x21fa(%rip),%ymm11        # 40a4 <_sk_callback_hsw+0x2b3>
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
-  DB  196,98,125,24,29,220,33,0,0         ; vbroadcastss  0x21dc(%rip),%ymm11        # 3fc0 <_sk_callback_hsw+0x2a3>
+  DB  196,98,125,24,29,240,33,0,0         ; vbroadcastss  0x21f0(%rip),%ymm11        # 40a8 <_sk_callback_hsw+0x2b7>
   DB  196,98,45,172,218                   ; vfnmadd213ps  %ymm2,%ymm10,%ymm11
-  DB  196,226,125,24,21,210,33,0,0        ; vbroadcastss  0x21d2(%rip),%ymm2        # 3fc4 <_sk_callback_hsw+0x2a7>
+  DB  196,226,125,24,21,230,33,0,0        ; vbroadcastss  0x21e6(%rip),%ymm2        # 40ac <_sk_callback_hsw+0x2bb>
   DB  196,193,108,92,210                  ; vsubps        %ymm10,%ymm2,%ymm2
-  DB  196,98,125,24,21,200,33,0,0         ; vbroadcastss  0x21c8(%rip),%ymm10        # 3fc8 <_sk_callback_hsw+0x2ab>
+  DB  196,98,125,24,21,220,33,0,0         ; vbroadcastss  0x21dc(%rip),%ymm10        # 40b0 <_sk_callback_hsw+0x2bf>
   DB  197,172,94,210                      ; vdivps        %ymm2,%ymm10,%ymm2
   DB  197,164,88,210                      ; vaddps        %ymm2,%ymm11,%ymm2
-  DB  196,98,125,24,21,187,33,0,0         ; vbroadcastss  0x21bb(%rip),%ymm10        # 3fcc <_sk_callback_hsw+0x2af>
+  DB  196,98,125,24,21,207,33,0,0         ; vbroadcastss  0x21cf(%rip),%ymm10        # 40b4 <_sk_callback_hsw+0x2c3>
   DB  196,193,108,89,210                  ; vmulps        %ymm10,%ymm2,%ymm2
   DB  197,253,91,210                      ; vcvtps2dq     %ymm2,%ymm2
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -1852,7 +1895,7 @@ _sk_parametric_b_hsw LABEL PROC
   DB  196,195,109,74,209,128              ; vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,146,33,0,0          ; vbroadcastss  0x2192(%rip),%ymm8        # 3fd0 <_sk_callback_hsw+0x2b3>
+  DB  196,98,125,24,5,166,33,0,0          ; vbroadcastss  0x21a6(%rip),%ymm8        # 40b8 <_sk_callback_hsw+0x2c7>
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -1870,33 +1913,33 @@ _sk_parametric_a_hsw LABEL PROC
   DB  196,66,101,168,211                  ; vfmadd213ps   %ymm11,%ymm3,%ymm10
   DB  196,226,125,24,24                   ; vbroadcastss  (%rax),%ymm3
   DB  196,65,124,91,218                   ; vcvtdq2ps     %ymm10,%ymm11
-  DB  196,98,125,24,37,74,33,0,0          ; vbroadcastss  0x214a(%rip),%ymm12        # 3fd4 <_sk_callback_hsw+0x2b7>
-  DB  196,98,125,24,45,69,33,0,0          ; vbroadcastss  0x2145(%rip),%ymm13        # 3fd8 <_sk_callback_hsw+0x2bb>
+  DB  196,98,125,24,37,94,33,0,0          ; vbroadcastss  0x215e(%rip),%ymm12        # 40bc <_sk_callback_hsw+0x2cb>
+  DB  196,98,125,24,45,89,33,0,0          ; vbroadcastss  0x2159(%rip),%ymm13        # 40c0 <_sk_callback_hsw+0x2cf>
   DB  196,65,44,84,213                    ; vandps        %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,59,33,0,0          ; vbroadcastss  0x213b(%rip),%ymm13        # 3fdc <_sk_callback_hsw+0x2bf>
+  DB  196,98,125,24,45,79,33,0,0          ; vbroadcastss  0x214f(%rip),%ymm13        # 40c4 <_sk_callback_hsw+0x2d3>
   DB  196,65,44,86,213                    ; vorps         %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,49,33,0,0          ; vbroadcastss  0x2131(%rip),%ymm13        # 3fe0 <_sk_callback_hsw+0x2c3>
+  DB  196,98,125,24,45,69,33,0,0          ; vbroadcastss  0x2145(%rip),%ymm13        # 40c8 <_sk_callback_hsw+0x2d7>
   DB  196,66,37,184,236                   ; vfmadd231ps   %ymm12,%ymm11,%ymm13
-  DB  196,98,125,24,29,39,33,0,0          ; vbroadcastss  0x2127(%rip),%ymm11        # 3fe4 <_sk_callback_hsw+0x2c7>
+  DB  196,98,125,24,29,59,33,0,0          ; vbroadcastss  0x213b(%rip),%ymm11        # 40cc <_sk_callback_hsw+0x2db>
   DB  196,66,45,172,221                   ; vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  DB  196,98,125,24,37,29,33,0,0          ; vbroadcastss  0x211d(%rip),%ymm12        # 3fe8 <_sk_callback_hsw+0x2cb>
+  DB  196,98,125,24,37,49,33,0,0          ; vbroadcastss  0x2131(%rip),%ymm12        # 40d0 <_sk_callback_hsw+0x2df>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,19,33,0,0          ; vbroadcastss  0x2113(%rip),%ymm12        # 3fec <_sk_callback_hsw+0x2cf>
+  DB  196,98,125,24,37,39,33,0,0          ; vbroadcastss  0x2127(%rip),%ymm12        # 40d4 <_sk_callback_hsw+0x2e3>
   DB  196,65,28,94,210                    ; vdivps        %ymm10,%ymm12,%ymm10
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
   DB  196,193,100,89,218                  ; vmulps        %ymm10,%ymm3,%ymm3
   DB  196,99,125,8,211,1                  ; vroundps      $0x1,%ymm3,%ymm10
   DB  196,65,100,92,210                   ; vsubps        %ymm10,%ymm3,%ymm10
-  DB  196,98,125,24,29,244,32,0,0         ; vbroadcastss  0x20f4(%rip),%ymm11        # 3ff0 <_sk_callback_hsw+0x2d3>
+  DB  196,98,125,24,29,8,33,0,0           ; vbroadcastss  0x2108(%rip),%ymm11        # 40d8 <_sk_callback_hsw+0x2e7>
   DB  196,193,100,88,219                  ; vaddps        %ymm11,%ymm3,%ymm3
-  DB  196,98,125,24,29,234,32,0,0         ; vbroadcastss  0x20ea(%rip),%ymm11        # 3ff4 <_sk_callback_hsw+0x2d7>
+  DB  196,98,125,24,29,254,32,0,0         ; vbroadcastss  0x20fe(%rip),%ymm11        # 40dc <_sk_callback_hsw+0x2eb>
   DB  196,98,45,172,219                   ; vfnmadd213ps  %ymm3,%ymm10,%ymm11
-  DB  196,226,125,24,29,224,32,0,0        ; vbroadcastss  0x20e0(%rip),%ymm3        # 3ff8 <_sk_callback_hsw+0x2db>
+  DB  196,226,125,24,29,244,32,0,0        ; vbroadcastss  0x20f4(%rip),%ymm3        # 40e0 <_sk_callback_hsw+0x2ef>
   DB  196,193,100,92,218                  ; vsubps        %ymm10,%ymm3,%ymm3
-  DB  196,98,125,24,21,214,32,0,0         ; vbroadcastss  0x20d6(%rip),%ymm10        # 3ffc <_sk_callback_hsw+0x2df>
+  DB  196,98,125,24,21,234,32,0,0         ; vbroadcastss  0x20ea(%rip),%ymm10        # 40e4 <_sk_callback_hsw+0x2f3>
   DB  197,172,94,219                      ; vdivps        %ymm3,%ymm10,%ymm3
   DB  197,164,88,219                      ; vaddps        %ymm3,%ymm11,%ymm3
-  DB  196,98,125,24,21,201,32,0,0         ; vbroadcastss  0x20c9(%rip),%ymm10        # 4000 <_sk_callback_hsw+0x2e3>
+  DB  196,98,125,24,21,221,32,0,0         ; vbroadcastss  0x20dd(%rip),%ymm10        # 40e8 <_sk_callback_hsw+0x2f7>
   DB  196,193,100,89,218                  ; vmulps        %ymm10,%ymm3,%ymm3
   DB  197,253,91,219                      ; vcvtps2dq     %ymm3,%ymm3
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -1904,33 +1947,33 @@ _sk_parametric_a_hsw LABEL PROC
   DB  196,195,101,74,217,128              ; vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
-  DB  196,98,125,24,5,160,32,0,0          ; vbroadcastss  0x20a0(%rip),%ymm8        # 4004 <_sk_callback_hsw+0x2e7>
+  DB  196,98,125,24,5,180,32,0,0          ; vbroadcastss  0x20b4(%rip),%ymm8        # 40ec <_sk_callback_hsw+0x2fb>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_lab_to_xyz_hsw
 _sk_lab_to_xyz_hsw LABEL PROC
-  DB  196,98,125,24,5,146,32,0,0          ; vbroadcastss  0x2092(%rip),%ymm8        # 4008 <_sk_callback_hsw+0x2eb>
-  DB  196,98,125,24,13,141,32,0,0         ; vbroadcastss  0x208d(%rip),%ymm9        # 400c <_sk_callback_hsw+0x2ef>
-  DB  196,98,125,24,21,136,32,0,0         ; vbroadcastss  0x2088(%rip),%ymm10        # 4010 <_sk_callback_hsw+0x2f3>
+  DB  196,98,125,24,5,166,32,0,0          ; vbroadcastss  0x20a6(%rip),%ymm8        # 40f0 <_sk_callback_hsw+0x2ff>
+  DB  196,98,125,24,13,161,32,0,0         ; vbroadcastss  0x20a1(%rip),%ymm9        # 40f4 <_sk_callback_hsw+0x303>
+  DB  196,98,125,24,21,156,32,0,0         ; vbroadcastss  0x209c(%rip),%ymm10        # 40f8 <_sk_callback_hsw+0x307>
   DB  196,194,53,168,202                  ; vfmadd213ps   %ymm10,%ymm9,%ymm1
   DB  196,194,53,168,210                  ; vfmadd213ps   %ymm10,%ymm9,%ymm2
-  DB  196,98,125,24,13,121,32,0,0         ; vbroadcastss  0x2079(%rip),%ymm9        # 4014 <_sk_callback_hsw+0x2f7>
+  DB  196,98,125,24,13,141,32,0,0         ; vbroadcastss  0x208d(%rip),%ymm9        # 40fc <_sk_callback_hsw+0x30b>
   DB  196,66,125,184,200                  ; vfmadd231ps   %ymm8,%ymm0,%ymm9
-  DB  196,226,125,24,5,111,32,0,0         ; vbroadcastss  0x206f(%rip),%ymm0        # 4018 <_sk_callback_hsw+0x2fb>
+  DB  196,226,125,24,5,131,32,0,0         ; vbroadcastss  0x2083(%rip),%ymm0        # 4100 <_sk_callback_hsw+0x30f>
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
-  DB  196,98,125,24,5,102,32,0,0          ; vbroadcastss  0x2066(%rip),%ymm8        # 401c <_sk_callback_hsw+0x2ff>
+  DB  196,98,125,24,5,122,32,0,0          ; vbroadcastss  0x207a(%rip),%ymm8        # 4104 <_sk_callback_hsw+0x313>
   DB  196,98,117,168,192                  ; vfmadd213ps   %ymm0,%ymm1,%ymm8
-  DB  196,98,125,24,13,92,32,0,0          ; vbroadcastss  0x205c(%rip),%ymm9        # 4020 <_sk_callback_hsw+0x303>
+  DB  196,98,125,24,13,112,32,0,0         ; vbroadcastss  0x2070(%rip),%ymm9        # 4108 <_sk_callback_hsw+0x317>
   DB  196,98,109,172,200                  ; vfnmadd213ps  %ymm0,%ymm2,%ymm9
   DB  196,193,60,89,200                   ; vmulps        %ymm8,%ymm8,%ymm1
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
-  DB  196,226,125,24,21,73,32,0,0         ; vbroadcastss  0x2049(%rip),%ymm2        # 4024 <_sk_callback_hsw+0x307>
+  DB  196,226,125,24,21,93,32,0,0         ; vbroadcastss  0x205d(%rip),%ymm2        # 410c <_sk_callback_hsw+0x31b>
   DB  197,108,194,209,1                   ; vcmpltps      %ymm1,%ymm2,%ymm10
-  DB  196,98,125,24,29,63,32,0,0          ; vbroadcastss  0x203f(%rip),%ymm11        # 4028 <_sk_callback_hsw+0x30b>
+  DB  196,98,125,24,29,83,32,0,0          ; vbroadcastss  0x2053(%rip),%ymm11        # 4110 <_sk_callback_hsw+0x31f>
   DB  196,65,60,88,195                    ; vaddps        %ymm11,%ymm8,%ymm8
-  DB  196,98,125,24,37,53,32,0,0          ; vbroadcastss  0x2035(%rip),%ymm12        # 402c <_sk_callback_hsw+0x30f>
+  DB  196,98,125,24,37,73,32,0,0          ; vbroadcastss  0x2049(%rip),%ymm12        # 4114 <_sk_callback_hsw+0x323>
   DB  196,65,60,89,196                    ; vmulps        %ymm12,%ymm8,%ymm8
   DB  196,99,61,74,193,160                ; vblendvps     %ymm10,%ymm1,%ymm8,%ymm8
   DB  197,252,89,200                      ; vmulps        %ymm0,%ymm0,%ymm1
@@ -1945,9 +1988,9 @@ _sk_lab_to_xyz_hsw LABEL PROC
   DB  196,65,52,88,203                    ; vaddps        %ymm11,%ymm9,%ymm9
   DB  196,65,52,89,204                    ; vmulps        %ymm12,%ymm9,%ymm9
   DB  196,227,53,74,208,32                ; vblendvps     %ymm2,%ymm0,%ymm9,%ymm2
-  DB  196,226,125,24,5,234,31,0,0         ; vbroadcastss  0x1fea(%rip),%ymm0        # 4030 <_sk_callback_hsw+0x313>
+  DB  196,226,125,24,5,254,31,0,0         ; vbroadcastss  0x1ffe(%rip),%ymm0        # 4118 <_sk_callback_hsw+0x327>
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
-  DB  196,98,125,24,5,225,31,0,0          ; vbroadcastss  0x1fe1(%rip),%ymm8        # 4034 <_sk_callback_hsw+0x317>
+  DB  196,98,125,24,5,245,31,0,0          ; vbroadcastss  0x1ff5(%rip),%ymm8        # 411c <_sk_callback_hsw+0x32b>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -1959,11 +2002,11 @@ _sk_load_a8_hsw LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,45                              ; jne           2099 <_sk_load_a8_hsw+0x3d>
+  DB  117,45                              ; jne           216d <_sk_load_a8_hsw+0x3d>
   DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,182,31,0,0        ; vbroadcastss  0x1fb6(%rip),%ymm1        # 4038 <_sk_callback_hsw+0x31b>
+  DB  196,226,125,24,13,202,31,0,0        ; vbroadcastss  0x1fca(%rip),%ymm1        # 4120 <_sk_callback_hsw+0x32f>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -1980,9 +2023,9 @@ _sk_load_a8_hsw LABEL PROC
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           20a1 <_sk_load_a8_hsw+0x45>
+  DB  117,234                             ; jne           2175 <_sk_load_a8_hsw+0x45>
   DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
-  DB  235,178                             ; jmp           2070 <_sk_load_a8_hsw+0x14>
+  DB  235,178                             ; jmp           2144 <_sk_load_a8_hsw+0x14>
 
 PUBLIC _sk_gather_a8_hsw
 _sk_gather_a8_hsw LABEL PROC
@@ -2026,7 +2069,7 @@ _sk_gather_a8_hsw LABEL PROC
   DB  196,227,121,32,192,7                ; vpinsrb       $0x7,%eax,%xmm0,%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,193,30,0,0        ; vbroadcastss  0x1ec1(%rip),%ymm1        # 403c <_sk_callback_hsw+0x31f>
+  DB  196,226,125,24,13,213,30,0,0        ; vbroadcastss  0x1ed5(%rip),%ymm1        # 4124 <_sk_callback_hsw+0x333>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -2042,14 +2085,14 @@ PUBLIC _sk_store_a8_hsw
 _sk_store_a8_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,156,30,0,0          ; vbroadcastss  0x1e9c(%rip),%ymm8        # 4040 <_sk_callback_hsw+0x323>
+  DB  196,98,125,24,5,176,30,0,0          ; vbroadcastss  0x1eb0(%rip),%ymm8        # 4128 <_sk_callback_hsw+0x337>
   DB  196,65,100,89,192                   ; vmulps        %ymm8,%ymm3,%ymm8
   DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  196,65,57,103,192                   ; vpackuswb     %xmm8,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           21cd <_sk_store_a8_hsw+0x37>
+  DB  117,10                              ; jne           22a1 <_sk_store_a8_hsw+0x37>
   DB  196,65,123,17,4,58                  ; vmovsd        %xmm8,(%r10,%rdi,1)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2057,10 +2100,10 @@ _sk_store_a8_hsw LABEL PROC
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            21c9 <_sk_store_a8_hsw+0x33>
+  DB  119,236                             ; ja            229d <_sk_store_a8_hsw+0x33>
   DB  196,66,121,48,192                   ; vpmovzxbw     %xmm8,%xmm8
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,67,0,0,0                  ; lea           0x43(%rip),%r9        # 2230 <_sk_store_a8_hsw+0x9a>
+  DB  76,141,13,67,0,0,0                  ; lea           0x43(%rip),%r9        # 2304 <_sk_store_a8_hsw+0x9a>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2071,7 +2114,7 @@ _sk_store_a8_hsw LABEL PROC
   DB  196,67,121,20,68,58,2,4             ; vpextrb       $0x4,%xmm8,0x2(%r10,%rdi,1)
   DB  196,67,121,20,68,58,1,2             ; vpextrb       $0x2,%xmm8,0x1(%r10,%rdi,1)
   DB  196,67,121,20,4,58,0                ; vpextrb       $0x0,%xmm8,(%r10,%rdi,1)
-  DB  235,154                             ; jmp           21c9 <_sk_store_a8_hsw+0x33>
+  DB  235,154                             ; jmp           229d <_sk_store_a8_hsw+0x33>
   DB  144                                 ; nop
   DB  246,255                             ; idiv          %bh
   DB  255                                 ; (bad)
@@ -2103,14 +2146,14 @@ _sk_load_g8_hsw LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,50                              ; jne           228e <_sk_load_g8_hsw+0x42>
+  DB  117,50                              ; jne           2362 <_sk_load_g8_hsw+0x42>
   DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,210,29,0,0        ; vbroadcastss  0x1dd2(%rip),%ymm1        # 4044 <_sk_callback_hsw+0x327>
+  DB  196,226,125,24,13,230,29,0,0        ; vbroadcastss  0x1de6(%rip),%ymm1        # 412c <_sk_callback_hsw+0x33b>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,199,29,0,0        ; vbroadcastss  0x1dc7(%rip),%ymm3        # 4048 <_sk_callback_hsw+0x32b>
+  DB  196,226,125,24,29,219,29,0,0        ; vbroadcastss  0x1ddb(%rip),%ymm3        # 4130 <_sk_callback_hsw+0x33f>
   DB  76,137,193                          ; mov           %r8,%rcx
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
@@ -2124,9 +2167,9 @@ _sk_load_g8_hsw LABEL PROC
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           2296 <_sk_load_g8_hsw+0x4a>
+  DB  117,234                             ; jne           236a <_sk_load_g8_hsw+0x4a>
   DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
-  DB  235,173                             ; jmp           2260 <_sk_load_g8_hsw+0x14>
+  DB  235,173                             ; jmp           2334 <_sk_load_g8_hsw+0x14>
 
 PUBLIC _sk_gather_g8_hsw
 _sk_gather_g8_hsw LABEL PROC
@@ -2170,10 +2213,10 @@ _sk_gather_g8_hsw LABEL PROC
   DB  196,227,121,32,192,7                ; vpinsrb       $0x7,%eax,%xmm0,%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,220,28,0,0        ; vbroadcastss  0x1cdc(%rip),%ymm1        # 404c <_sk_callback_hsw+0x32f>
+  DB  196,226,125,24,13,240,28,0,0        ; vbroadcastss  0x1cf0(%rip),%ymm1        # 4134 <_sk_callback_hsw+0x343>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,209,28,0,0        ; vbroadcastss  0x1cd1(%rip),%ymm3        # 4050 <_sk_callback_hsw+0x333>
+  DB  196,226,125,24,29,229,28,0,0        ; vbroadcastss  0x1ce5(%rip),%ymm3        # 4138 <_sk_callback_hsw+0x347>
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
   DB  91                                  ; pop           %rbx
@@ -2187,9 +2230,9 @@ _sk_gather_i8_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  73,137,192                          ; mov           %rax,%r8
   DB  77,133,192                          ; test          %r8,%r8
-  DB  116,5                               ; je            239f <_sk_gather_i8_hsw+0xf>
+  DB  116,5                               ; je            2473 <_sk_gather_i8_hsw+0xf>
   DB  76,137,192                          ; mov           %r8,%rax
-  DB  235,2                               ; jmp           23a1 <_sk_gather_i8_hsw+0x11>
+  DB  235,2                               ; jmp           2475 <_sk_gather_i8_hsw+0x11>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,87                               ; push          %r15
   DB  65,86                               ; push          %r14
@@ -2227,14 +2270,14 @@ _sk_gather_i8_hsw LABEL PROC
   DB  73,139,64,8                         ; mov           0x8(%r8),%rax
   DB  197,245,118,201                     ; vpcmpeqd      %ymm1,%ymm1,%ymm1
   DB  196,226,117,144,28,128              ; vpgatherdd    %ymm1,(%rax,%ymm0,4),%ymm3
-  DB  197,229,219,5,241,29,0,0            ; vpand         0x1df1(%rip),%ymm3,%ymm0        # 4240 <_sk_callback_hsw+0x523>
+  DB  197,229,219,5,253,29,0,0            ; vpand         0x1dfd(%rip),%ymm3,%ymm0        # 4320 <_sk_callback_hsw+0x52f>
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,5,248,27,0,0          ; vbroadcastss  0x1bf8(%rip),%ymm8        # 4054 <_sk_callback_hsw+0x337>
+  DB  196,98,125,24,5,12,28,0,0           ; vbroadcastss  0x1c0c(%rip),%ymm8        # 413c <_sk_callback_hsw+0x34b>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,226,101,0,13,246,29,0,0         ; vpshufb       0x1df6(%rip),%ymm3,%ymm1        # 4260 <_sk_callback_hsw+0x543>
+  DB  196,226,101,0,13,2,30,0,0           ; vpshufb       0x1e02(%rip),%ymm3,%ymm1        # 4340 <_sk_callback_hsw+0x54f>
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  196,226,101,0,21,4,30,0,0           ; vpshufb       0x1e04(%rip),%ymm3,%ymm2        # 4280 <_sk_callback_hsw+0x563>
+  DB  196,226,101,0,21,16,30,0,0          ; vpshufb       0x1e10(%rip),%ymm3,%ymm2        # 4360 <_sk_callback_hsw+0x56f>
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
@@ -2253,35 +2296,35 @@ _sk_load_565_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,114                             ; jne           251c <_sk_load_565_hsw+0x7c>
+  DB  117,114                             ; jne           25f0 <_sk_load_565_hsw+0x7c>
   DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
   DB  196,226,125,51,208                  ; vpmovzxwd     %xmm0,%ymm2
-  DB  196,226,125,88,5,154,27,0,0         ; vpbroadcastd  0x1b9a(%rip),%ymm0        # 4058 <_sk_callback_hsw+0x33b>
+  DB  196,226,125,88,5,174,27,0,0         ; vpbroadcastd  0x1bae(%rip),%ymm0        # 4140 <_sk_callback_hsw+0x34f>
   DB  197,237,219,192                     ; vpand         %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,141,27,0,0        ; vbroadcastss  0x1b8d(%rip),%ymm1        # 405c <_sk_callback_hsw+0x33f>
+  DB  196,226,125,24,13,161,27,0,0        ; vbroadcastss  0x1ba1(%rip),%ymm1        # 4144 <_sk_callback_hsw+0x353>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,88,13,132,27,0,0        ; vpbroadcastd  0x1b84(%rip),%ymm1        # 4060 <_sk_callback_hsw+0x343>
+  DB  196,226,125,88,13,152,27,0,0        ; vpbroadcastd  0x1b98(%rip),%ymm1        # 4148 <_sk_callback_hsw+0x357>
   DB  197,237,219,201                     ; vpand         %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,119,27,0,0        ; vbroadcastss  0x1b77(%rip),%ymm3        # 4064 <_sk_callback_hsw+0x347>
+  DB  196,226,125,24,29,139,27,0,0        ; vbroadcastss  0x1b8b(%rip),%ymm3        # 414c <_sk_callback_hsw+0x35b>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,88,29,110,27,0,0        ; vpbroadcastd  0x1b6e(%rip),%ymm3        # 4068 <_sk_callback_hsw+0x34b>
+  DB  196,226,125,88,29,130,27,0,0        ; vpbroadcastd  0x1b82(%rip),%ymm3        # 4150 <_sk_callback_hsw+0x35f>
   DB  197,237,219,211                     ; vpand         %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,97,27,0,0         ; vbroadcastss  0x1b61(%rip),%ymm3        # 406c <_sk_callback_hsw+0x34f>
+  DB  196,226,125,24,29,117,27,0,0        ; vbroadcastss  0x1b75(%rip),%ymm3        # 4154 <_sk_callback_hsw+0x363>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,86,27,0,0         ; vbroadcastss  0x1b56(%rip),%ymm3        # 4070 <_sk_callback_hsw+0x353>
+  DB  196,226,125,24,29,106,27,0,0        ; vbroadcastss  0x1b6a(%rip),%ymm3        # 4158 <_sk_callback_hsw+0x367>
   DB  255,224                             ; jmpq          *%rax
   DB  65,137,200                          ; mov           %ecx,%r8d
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,128                             ; ja            24b0 <_sk_load_565_hsw+0x10>
+  DB  119,128                             ; ja            2584 <_sk_load_565_hsw+0x10>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 2584 <_sk_load_565_hsw+0xe4>
+  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 2658 <_sk_load_565_hsw+0xe4>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2293,7 +2336,7 @@ _sk_load_565_hsw LABEL PROC
   DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  DB  233,44,255,255,255                  ; jmpq          24b0 <_sk_load_565_hsw+0x10>
+  DB  233,44,255,255,255                  ; jmpq          2584 <_sk_load_565_hsw+0x10>
   DB  244                                 ; hlt
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
@@ -2361,23 +2404,23 @@ _sk_gather_565_hsw LABEL PROC
   DB  65,15,183,4,88                      ; movzwl        (%r8,%rbx,2),%eax
   DB  197,249,196,192,7                   ; vpinsrw       $0x7,%eax,%xmm0,%xmm0
   DB  196,226,125,51,208                  ; vpmovzxwd     %xmm0,%ymm2
-  DB  196,226,125,88,5,25,26,0,0          ; vpbroadcastd  0x1a19(%rip),%ymm0        # 4074 <_sk_callback_hsw+0x357>
+  DB  196,226,125,88,5,45,26,0,0          ; vpbroadcastd  0x1a2d(%rip),%ymm0        # 415c <_sk_callback_hsw+0x36b>
   DB  197,237,219,192                     ; vpand         %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,12,26,0,0         ; vbroadcastss  0x1a0c(%rip),%ymm1        # 4078 <_sk_callback_hsw+0x35b>
+  DB  196,226,125,24,13,32,26,0,0         ; vbroadcastss  0x1a20(%rip),%ymm1        # 4160 <_sk_callback_hsw+0x36f>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,88,13,3,26,0,0          ; vpbroadcastd  0x1a03(%rip),%ymm1        # 407c <_sk_callback_hsw+0x35f>
+  DB  196,226,125,88,13,23,26,0,0         ; vpbroadcastd  0x1a17(%rip),%ymm1        # 4164 <_sk_callback_hsw+0x373>
   DB  197,237,219,201                     ; vpand         %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,246,25,0,0        ; vbroadcastss  0x19f6(%rip),%ymm3        # 4080 <_sk_callback_hsw+0x363>
+  DB  196,226,125,24,29,10,26,0,0         ; vbroadcastss  0x1a0a(%rip),%ymm3        # 4168 <_sk_callback_hsw+0x377>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,88,29,237,25,0,0        ; vpbroadcastd  0x19ed(%rip),%ymm3        # 4084 <_sk_callback_hsw+0x367>
+  DB  196,226,125,88,29,1,26,0,0          ; vpbroadcastd  0x1a01(%rip),%ymm3        # 416c <_sk_callback_hsw+0x37b>
   DB  197,237,219,211                     ; vpand         %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,224,25,0,0        ; vbroadcastss  0x19e0(%rip),%ymm3        # 4088 <_sk_callback_hsw+0x36b>
+  DB  196,226,125,24,29,244,25,0,0        ; vbroadcastss  0x19f4(%rip),%ymm3        # 4170 <_sk_callback_hsw+0x37f>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,213,25,0,0        ; vbroadcastss  0x19d5(%rip),%ymm3        # 408c <_sk_callback_hsw+0x36f>
+  DB  196,226,125,24,29,233,25,0,0        ; vbroadcastss  0x19e9(%rip),%ymm3        # 4174 <_sk_callback_hsw+0x383>
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,94                               ; pop           %r14
@@ -2388,11 +2431,11 @@ PUBLIC _sk_store_565_hsw
 _sk_store_565_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,194,25,0,0          ; vbroadcastss  0x19c2(%rip),%ymm8        # 4090 <_sk_callback_hsw+0x373>
+  DB  196,98,125,24,5,214,25,0,0          ; vbroadcastss  0x19d6(%rip),%ymm8        # 4178 <_sk_callback_hsw+0x387>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,53,114,241,11               ; vpslld        $0xb,%ymm9,%ymm9
-  DB  196,98,125,24,21,173,25,0,0         ; vbroadcastss  0x19ad(%rip),%ymm10        # 4094 <_sk_callback_hsw+0x377>
+  DB  196,98,125,24,21,193,25,0,0         ; vbroadcastss  0x19c1(%rip),%ymm10        # 417c <_sk_callback_hsw+0x38b>
   DB  196,65,116,89,210                   ; vmulps        %ymm10,%ymm1,%ymm10
   DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
   DB  196,193,45,114,242,5                ; vpslld        $0x5,%ymm10,%ymm10
@@ -2403,7 +2446,7 @@ _sk_store_565_hsw LABEL PROC
   DB  196,67,125,57,193,1                 ; vextracti128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           2725 <_sk_store_565_hsw+0x65>
+  DB  117,10                              ; jne           27f9 <_sk_store_565_hsw+0x65>
   DB  196,65,122,127,4,122                ; vmovdqu       %xmm8,(%r10,%rdi,2)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2411,9 +2454,9 @@ _sk_store_565_hsw LABEL PROC
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            2721 <_sk_store_565_hsw+0x61>
+  DB  119,236                             ; ja            27f5 <_sk_store_565_hsw+0x61>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 2784 <_sk_store_565_hsw+0xc4>
+  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 2858 <_sk_store_565_hsw+0xc4>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2424,7 +2467,7 @@ _sk_store_565_hsw LABEL PROC
   DB  196,67,121,21,68,122,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   DB  196,67,121,21,68,122,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   DB  196,67,121,21,4,122,0               ; vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  DB  235,159                             ; jmp           2721 <_sk_store_565_hsw+0x61>
+  DB  235,159                             ; jmp           27f5 <_sk_store_565_hsw+0x61>
   DB  102,144                             ; xchg          %ax,%ax
   DB  245                                 ; cmc
   DB  255                                 ; (bad)
@@ -2455,28 +2498,28 @@ _sk_load_4444_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,138,0,0,0                    ; jne           2838 <_sk_load_4444_hsw+0x98>
+  DB  15,133,138,0,0,0                    ; jne           290c <_sk_load_4444_hsw+0x98>
   DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
   DB  196,226,125,51,216                  ; vpmovzxwd     %xmm0,%ymm3
-  DB  196,226,125,88,5,214,24,0,0         ; vpbroadcastd  0x18d6(%rip),%ymm0        # 4098 <_sk_callback_hsw+0x37b>
+  DB  196,226,125,88,5,234,24,0,0         ; vpbroadcastd  0x18ea(%rip),%ymm0        # 4180 <_sk_callback_hsw+0x38f>
   DB  197,229,219,192                     ; vpand         %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,201,24,0,0        ; vbroadcastss  0x18c9(%rip),%ymm1        # 409c <_sk_callback_hsw+0x37f>
+  DB  196,226,125,24,13,221,24,0,0        ; vbroadcastss  0x18dd(%rip),%ymm1        # 4184 <_sk_callback_hsw+0x393>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,88,13,192,24,0,0        ; vpbroadcastd  0x18c0(%rip),%ymm1        # 40a0 <_sk_callback_hsw+0x383>
+  DB  196,226,125,88,13,212,24,0,0        ; vpbroadcastd  0x18d4(%rip),%ymm1        # 4188 <_sk_callback_hsw+0x397>
   DB  197,229,219,201                     ; vpand         %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,179,24,0,0        ; vbroadcastss  0x18b3(%rip),%ymm2        # 40a4 <_sk_callback_hsw+0x387>
+  DB  196,226,125,24,21,199,24,0,0        ; vbroadcastss  0x18c7(%rip),%ymm2        # 418c <_sk_callback_hsw+0x39b>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,88,21,170,24,0,0        ; vpbroadcastd  0x18aa(%rip),%ymm2        # 40a8 <_sk_callback_hsw+0x38b>
+  DB  196,226,125,88,21,190,24,0,0        ; vpbroadcastd  0x18be(%rip),%ymm2        # 4190 <_sk_callback_hsw+0x39f>
   DB  197,229,219,210                     ; vpand         %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,157,24,0,0          ; vbroadcastss  0x189d(%rip),%ymm8        # 40ac <_sk_callback_hsw+0x38f>
+  DB  196,98,125,24,5,177,24,0,0          ; vbroadcastss  0x18b1(%rip),%ymm8        # 4194 <_sk_callback_hsw+0x3a3>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,88,5,147,24,0,0          ; vpbroadcastd  0x1893(%rip),%ymm8        # 40b0 <_sk_callback_hsw+0x393>
+  DB  196,98,125,88,5,167,24,0,0          ; vpbroadcastd  0x18a7(%rip),%ymm8        # 4198 <_sk_callback_hsw+0x3a7>
   DB  196,193,101,219,216                 ; vpand         %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,133,24,0,0          ; vbroadcastss  0x1885(%rip),%ymm8        # 40b4 <_sk_callback_hsw+0x397>
+  DB  196,98,125,24,5,153,24,0,0          ; vbroadcastss  0x1899(%rip),%ymm8        # 419c <_sk_callback_hsw+0x3ab>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2485,9 +2528,9 @@ _sk_load_4444_hsw LABEL PROC
   DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,100,255,255,255              ; ja            27b4 <_sk_load_4444_hsw+0x14>
+  DB  15,135,100,255,255,255              ; ja            2888 <_sk_load_4444_hsw+0x14>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 28a4 <_sk_load_4444_hsw+0x104>
+  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 2978 <_sk_load_4444_hsw+0x104>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2499,7 +2542,7 @@ _sk_load_4444_hsw LABEL PROC
   DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  DB  233,16,255,255,255                  ; jmpq          27b4 <_sk_load_4444_hsw+0x14>
+  DB  233,16,255,255,255                  ; jmpq          2888 <_sk_load_4444_hsw+0x14>
   DB  244                                 ; hlt
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
@@ -2567,25 +2610,25 @@ _sk_gather_4444_hsw LABEL PROC
   DB  65,15,183,4,88                      ; movzwl        (%r8,%rbx,2),%eax
   DB  197,249,196,192,7                   ; vpinsrw       $0x7,%eax,%xmm0,%xmm0
   DB  196,226,125,51,216                  ; vpmovzxwd     %xmm0,%ymm3
-  DB  196,226,125,88,5,61,23,0,0          ; vpbroadcastd  0x173d(%rip),%ymm0        # 40b8 <_sk_callback_hsw+0x39b>
+  DB  196,226,125,88,5,81,23,0,0          ; vpbroadcastd  0x1751(%rip),%ymm0        # 41a0 <_sk_callback_hsw+0x3af>
   DB  197,229,219,192                     ; vpand         %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,48,23,0,0         ; vbroadcastss  0x1730(%rip),%ymm1        # 40bc <_sk_callback_hsw+0x39f>
+  DB  196,226,125,24,13,68,23,0,0         ; vbroadcastss  0x1744(%rip),%ymm1        # 41a4 <_sk_callback_hsw+0x3b3>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,88,13,39,23,0,0         ; vpbroadcastd  0x1727(%rip),%ymm1        # 40c0 <_sk_callback_hsw+0x3a3>
+  DB  196,226,125,88,13,59,23,0,0         ; vpbroadcastd  0x173b(%rip),%ymm1        # 41a8 <_sk_callback_hsw+0x3b7>
   DB  197,229,219,201                     ; vpand         %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,26,23,0,0         ; vbroadcastss  0x171a(%rip),%ymm2        # 40c4 <_sk_callback_hsw+0x3a7>
+  DB  196,226,125,24,21,46,23,0,0         ; vbroadcastss  0x172e(%rip),%ymm2        # 41ac <_sk_callback_hsw+0x3bb>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,88,21,17,23,0,0         ; vpbroadcastd  0x1711(%rip),%ymm2        # 40c8 <_sk_callback_hsw+0x3ab>
+  DB  196,226,125,88,21,37,23,0,0         ; vpbroadcastd  0x1725(%rip),%ymm2        # 41b0 <_sk_callback_hsw+0x3bf>
   DB  197,229,219,210                     ; vpand         %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,4,23,0,0            ; vbroadcastss  0x1704(%rip),%ymm8        # 40cc <_sk_callback_hsw+0x3af>
+  DB  196,98,125,24,5,24,23,0,0           ; vbroadcastss  0x1718(%rip),%ymm8        # 41b4 <_sk_callback_hsw+0x3c3>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,88,5,250,22,0,0          ; vpbroadcastd  0x16fa(%rip),%ymm8        # 40d0 <_sk_callback_hsw+0x3b3>
+  DB  196,98,125,88,5,14,23,0,0           ; vpbroadcastd  0x170e(%rip),%ymm8        # 41b8 <_sk_callback_hsw+0x3c7>
   DB  196,193,101,219,216                 ; vpand         %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,236,22,0,0          ; vbroadcastss  0x16ec(%rip),%ymm8        # 40d4 <_sk_callback_hsw+0x3b7>
+  DB  196,98,125,24,5,0,23,0,0            ; vbroadcastss  0x1700(%rip),%ymm8        # 41bc <_sk_callback_hsw+0x3cb>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -2598,7 +2641,7 @@ PUBLIC _sk_store_4444_hsw
 _sk_store_4444_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,210,22,0,0          ; vbroadcastss  0x16d2(%rip),%ymm8        # 40d8 <_sk_callback_hsw+0x3bb>
+  DB  196,98,125,24,5,230,22,0,0          ; vbroadcastss  0x16e6(%rip),%ymm8        # 41c0 <_sk_callback_hsw+0x3cf>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,53,114,241,12               ; vpslld        $0xc,%ymm9,%ymm9
@@ -2616,7 +2659,7 @@ _sk_store_4444_hsw LABEL PROC
   DB  196,67,125,57,193,1                 ; vextracti128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           2a69 <_sk_store_4444_hsw+0x71>
+  DB  117,10                              ; jne           2b3d <_sk_store_4444_hsw+0x71>
   DB  196,65,122,127,4,122                ; vmovdqu       %xmm8,(%r10,%rdi,2)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2624,9 +2667,9 @@ _sk_store_4444_hsw LABEL PROC
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            2a65 <_sk_store_4444_hsw+0x6d>
+  DB  119,236                             ; ja            2b39 <_sk_store_4444_hsw+0x6d>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 2ac8 <_sk_store_4444_hsw+0xd0>
+  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 2b9c <_sk_store_4444_hsw+0xd0>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2637,7 +2680,7 @@ _sk_store_4444_hsw LABEL PROC
   DB  196,67,121,21,68,122,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   DB  196,67,121,21,68,122,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   DB  196,67,121,21,4,122,0               ; vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  DB  235,159                             ; jmp           2a65 <_sk_store_4444_hsw+0x6d>
+  DB  235,159                             ; jmp           2b39 <_sk_store_4444_hsw+0x6d>
   DB  102,144                             ; xchg          %ax,%ax
   DB  245                                 ; cmc
   DB  255                                 ; (bad)
@@ -2670,16 +2713,16 @@ _sk_load_8888_hsw LABEL PROC
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
   DB  76,3,8                              ; add           (%rax),%r9
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,88                              ; jne           2b51 <_sk_load_8888_hsw+0x6d>
+  DB  117,88                              ; jne           2c25 <_sk_load_8888_hsw+0x6d>
   DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
-  DB  197,229,219,5,154,23,0,0            ; vpand         0x179a(%rip),%ymm3,%ymm0        # 42a0 <_sk_callback_hsw+0x583>
+  DB  197,229,219,5,166,23,0,0            ; vpand         0x17a6(%rip),%ymm3,%ymm0        # 4380 <_sk_callback_hsw+0x58f>
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,5,201,21,0,0          ; vbroadcastss  0x15c9(%rip),%ymm8        # 40dc <_sk_callback_hsw+0x3bf>
+  DB  196,98,125,24,5,221,21,0,0          ; vbroadcastss  0x15dd(%rip),%ymm8        # 41c4 <_sk_callback_hsw+0x3d3>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,226,101,0,13,159,23,0,0         ; vpshufb       0x179f(%rip),%ymm3,%ymm1        # 42c0 <_sk_callback_hsw+0x5a3>
+  DB  196,226,101,0,13,171,23,0,0         ; vpshufb       0x17ab(%rip),%ymm3,%ymm1        # 43a0 <_sk_callback_hsw+0x5af>
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  196,226,101,0,21,173,23,0,0         ; vpshufb       0x17ad(%rip),%ymm3,%ymm2        # 42e0 <_sk_callback_hsw+0x5c3>
+  DB  196,226,101,0,21,185,23,0,0         ; vpshufb       0x17b9(%rip),%ymm3,%ymm2        # 43c0 <_sk_callback_hsw+0x5cf>
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
@@ -2696,7 +2739,7 @@ _sk_load_8888_hsw LABEL PROC
   DB  196,225,249,110,192                 ; vmovq         %rax,%xmm0
   DB  196,226,125,33,192                  ; vpmovsxbd     %xmm0,%ymm0
   DB  196,194,125,140,25                  ; vpmaskmovd    (%r9),%ymm0,%ymm3
-  DB  235,135                             ; jmp           2afe <_sk_load_8888_hsw+0x1a>
+  DB  235,135                             ; jmp           2bd2 <_sk_load_8888_hsw+0x1a>
 
 PUBLIC _sk_gather_8888_hsw
 _sk_gather_8888_hsw LABEL PROC
@@ -2709,14 +2752,14 @@ _sk_gather_8888_hsw LABEL PROC
   DB  197,245,254,192                     ; vpaddd        %ymm0,%ymm1,%ymm0
   DB  197,245,118,201                     ; vpcmpeqd      %ymm1,%ymm1,%ymm1
   DB  196,194,117,144,28,128              ; vpgatherdd    %ymm1,(%r8,%ymm0,4),%ymm3
-  DB  197,229,219,5,91,23,0,0             ; vpand         0x175b(%rip),%ymm3,%ymm0        # 4300 <_sk_callback_hsw+0x5e3>
+  DB  197,229,219,5,103,23,0,0            ; vpand         0x1767(%rip),%ymm3,%ymm0        # 43e0 <_sk_callback_hsw+0x5ef>
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,5,46,21,0,0           ; vbroadcastss  0x152e(%rip),%ymm8        # 40e0 <_sk_callback_hsw+0x3c3>
+  DB  196,98,125,24,5,66,21,0,0           ; vbroadcastss  0x1542(%rip),%ymm8        # 41c8 <_sk_callback_hsw+0x3d7>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,226,101,0,13,96,23,0,0          ; vpshufb       0x1760(%rip),%ymm3,%ymm1        # 4320 <_sk_callback_hsw+0x603>
+  DB  196,226,101,0,13,108,23,0,0         ; vpshufb       0x176c(%rip),%ymm3,%ymm1        # 4400 <_sk_callback_hsw+0x60f>
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  196,226,101,0,21,110,23,0,0         ; vpshufb       0x176e(%rip),%ymm3,%ymm2        # 4340 <_sk_callback_hsw+0x623>
+  DB  196,226,101,0,21,122,23,0,0         ; vpshufb       0x177a(%rip),%ymm3,%ymm2        # 4420 <_sk_callback_hsw+0x62f>
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
@@ -2731,7 +2774,7 @@ _sk_store_8888_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
   DB  76,3,8                              ; add           (%rax),%r9
-  DB  196,98,125,24,5,222,20,0,0          ; vbroadcastss  0x14de(%rip),%ymm8        # 40e4 <_sk_callback_hsw+0x3c7>
+  DB  196,98,125,24,5,242,20,0,0          ; vbroadcastss  0x14f2(%rip),%ymm8        # 41cc <_sk_callback_hsw+0x3db>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,65,116,89,208                   ; vmulps        %ymm8,%ymm1,%ymm10
@@ -2747,7 +2790,7 @@ _sk_store_8888_hsw LABEL PROC
   DB  196,65,45,235,192                   ; vpor          %ymm8,%ymm10,%ymm8
   DB  196,65,53,235,192                   ; vpor          %ymm8,%ymm9,%ymm8
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,12                              ; jne           2c60 <_sk_store_8888_hsw+0x73>
+  DB  117,12                              ; jne           2d34 <_sk_store_8888_hsw+0x73>
   DB  196,65,126,127,1                    ; vmovdqu       %ymm8,(%r9)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,137,193                          ; mov           %r8,%rcx
@@ -2760,14 +2803,14 @@ _sk_store_8888_hsw LABEL PROC
   DB  196,97,249,110,200                  ; vmovq         %rax,%xmm9
   DB  196,66,125,33,201                   ; vpmovsxbd     %xmm9,%ymm9
   DB  196,66,53,142,1                     ; vpmaskmovd    %ymm8,%ymm9,(%r9)
-  DB  235,211                             ; jmp           2c59 <_sk_store_8888_hsw+0x6c>
+  DB  235,211                             ; jmp           2d2d <_sk_store_8888_hsw+0x6c>
 
 PUBLIC _sk_load_f16_hsw
 _sk_load_f16_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,97                              ; jne           2cf1 <_sk_load_f16_hsw+0x6b>
+  DB  117,97                              ; jne           2dc5 <_sk_load_f16_hsw+0x6b>
   DB  197,121,16,4,248                    ; vmovupd       (%rax,%rdi,8),%xmm8
   DB  197,249,16,84,248,16                ; vmovupd       0x10(%rax,%rdi,8),%xmm2
   DB  197,249,16,92,248,32                ; vmovupd       0x20(%rax,%rdi,8),%xmm3
@@ -2793,29 +2836,29 @@ _sk_load_f16_hsw LABEL PROC
   DB  197,123,16,4,248                    ; vmovsd        (%rax,%rdi,8),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,79                              ; je            2d50 <_sk_load_f16_hsw+0xca>
+  DB  116,79                              ; je            2e24 <_sk_load_f16_hsw+0xca>
   DB  197,57,22,68,248,8                  ; vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,67                              ; jb            2d50 <_sk_load_f16_hsw+0xca>
+  DB  114,67                              ; jb            2e24 <_sk_load_f16_hsw+0xca>
   DB  197,251,16,84,248,16                ; vmovsd        0x10(%rax,%rdi,8),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,68                              ; je            2d5d <_sk_load_f16_hsw+0xd7>
+  DB  116,68                              ; je            2e31 <_sk_load_f16_hsw+0xd7>
   DB  197,233,22,84,248,24                ; vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,56                              ; jb            2d5d <_sk_load_f16_hsw+0xd7>
+  DB  114,56                              ; jb            2e31 <_sk_load_f16_hsw+0xd7>
   DB  197,251,16,92,248,32                ; vmovsd        0x20(%rax,%rdi,8),%xmm3
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,114,255,255,255              ; je            2ca7 <_sk_load_f16_hsw+0x21>
+  DB  15,132,114,255,255,255              ; je            2d7b <_sk_load_f16_hsw+0x21>
   DB  197,225,22,92,248,40                ; vmovhpd       0x28(%rax,%rdi,8),%xmm3,%xmm3
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,98,255,255,255               ; jb            2ca7 <_sk_load_f16_hsw+0x21>
+  DB  15,130,98,255,255,255               ; jb            2d7b <_sk_load_f16_hsw+0x21>
   DB  197,122,126,76,248,48               ; vmovq         0x30(%rax,%rdi,8),%xmm9
-  DB  233,87,255,255,255                  ; jmpq          2ca7 <_sk_load_f16_hsw+0x21>
+  DB  233,87,255,255,255                  ; jmpq          2d7b <_sk_load_f16_hsw+0x21>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,74,255,255,255                  ; jmpq          2ca7 <_sk_load_f16_hsw+0x21>
+  DB  233,74,255,255,255                  ; jmpq          2d7b <_sk_load_f16_hsw+0x21>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
-  DB  233,65,255,255,255                  ; jmpq          2ca7 <_sk_load_f16_hsw+0x21>
+  DB  233,65,255,255,255                  ; jmpq          2d7b <_sk_load_f16_hsw+0x21>
 
 PUBLIC _sk_gather_f16_hsw
 _sk_gather_f16_hsw LABEL PROC
@@ -2869,7 +2912,7 @@ _sk_store_f16_hsw LABEL PROC
   DB  196,65,57,98,205                    ; vpunpckldq    %xmm13,%xmm8,%xmm9
   DB  196,65,57,106,197                   ; vpunpckhdq    %xmm13,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,27                              ; jne           2e55 <_sk_store_f16_hsw+0x65>
+  DB  117,27                              ; jne           2f29 <_sk_store_f16_hsw+0x65>
   DB  197,120,17,28,248                   ; vmovups       %xmm11,(%rax,%rdi,8)
   DB  197,120,17,84,248,16                ; vmovups       %xmm10,0x10(%rax,%rdi,8)
   DB  197,120,17,76,248,32                ; vmovups       %xmm9,0x20(%rax,%rdi,8)
@@ -2878,22 +2921,22 @@ _sk_store_f16_hsw LABEL PROC
   DB  255,224                             ; jmpq          *%rax
   DB  197,121,214,28,248                  ; vmovq         %xmm11,(%rax,%rdi,8)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,241                             ; je            2e51 <_sk_store_f16_hsw+0x61>
+  DB  116,241                             ; je            2f25 <_sk_store_f16_hsw+0x61>
   DB  197,121,23,92,248,8                 ; vmovhpd       %xmm11,0x8(%rax,%rdi,8)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,229                             ; jb            2e51 <_sk_store_f16_hsw+0x61>
+  DB  114,229                             ; jb            2f25 <_sk_store_f16_hsw+0x61>
   DB  197,121,214,84,248,16               ; vmovq         %xmm10,0x10(%rax,%rdi,8)
-  DB  116,221                             ; je            2e51 <_sk_store_f16_hsw+0x61>
+  DB  116,221                             ; je            2f25 <_sk_store_f16_hsw+0x61>
   DB  197,121,23,84,248,24                ; vmovhpd       %xmm10,0x18(%rax,%rdi,8)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,209                             ; jb            2e51 <_sk_store_f16_hsw+0x61>
+  DB  114,209                             ; jb            2f25 <_sk_store_f16_hsw+0x61>
   DB  197,121,214,76,248,32               ; vmovq         %xmm9,0x20(%rax,%rdi,8)
-  DB  116,201                             ; je            2e51 <_sk_store_f16_hsw+0x61>
+  DB  116,201                             ; je            2f25 <_sk_store_f16_hsw+0x61>
   DB  197,121,23,76,248,40                ; vmovhpd       %xmm9,0x28(%rax,%rdi,8)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,189                             ; jb            2e51 <_sk_store_f16_hsw+0x61>
+  DB  114,189                             ; jb            2f25 <_sk_store_f16_hsw+0x61>
   DB  197,121,214,68,248,48               ; vmovq         %xmm8,0x30(%rax,%rdi,8)
-  DB  235,181                             ; jmp           2e51 <_sk_store_f16_hsw+0x61>
+  DB  235,181                             ; jmp           2f25 <_sk_store_f16_hsw+0x61>
 
 PUBLIC _sk_load_u16_be_hsw
 _sk_load_u16_be_hsw LABEL PROC
@@ -2901,7 +2944,7 @@ _sk_load_u16_be_hsw LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,204,0,0,0                    ; jne           2f7e <_sk_load_u16_be_hsw+0xe2>
+  DB  15,133,204,0,0,0                    ; jne           3052 <_sk_load_u16_be_hsw+0xe2>
   DB  196,65,121,16,4,64                  ; vmovupd       (%r8,%rax,2),%xmm8
   DB  196,193,121,16,84,64,16             ; vmovupd       0x10(%r8,%rax,2),%xmm2
   DB  196,193,121,16,92,64,32             ; vmovupd       0x20(%r8,%rax,2),%xmm3
@@ -2920,7 +2963,7 @@ _sk_load_u16_be_hsw LABEL PROC
   DB  197,241,235,192                     ; vpor          %xmm0,%xmm1,%xmm0
   DB  196,226,125,51,192                  ; vpmovzxwd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,21,213,17,0,0         ; vbroadcastss  0x11d5(%rip),%ymm10        # 40e8 <_sk_callback_hsw+0x3cb>
+  DB  196,98,125,24,21,233,17,0,0         ; vbroadcastss  0x11e9(%rip),%ymm10        # 41d0 <_sk_callback_hsw+0x3df>
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  197,185,109,202                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -2948,29 +2991,29 @@ _sk_load_u16_be_hsw LABEL PROC
   DB  196,65,123,16,4,64                  ; vmovsd        (%r8,%rax,2),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,85                              ; je            2fe4 <_sk_load_u16_be_hsw+0x148>
+  DB  116,85                              ; je            30b8 <_sk_load_u16_be_hsw+0x148>
   DB  196,65,57,22,68,64,8                ; vmovhpd       0x8(%r8,%rax,2),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,72                              ; jb            2fe4 <_sk_load_u16_be_hsw+0x148>
+  DB  114,72                              ; jb            30b8 <_sk_load_u16_be_hsw+0x148>
   DB  196,193,123,16,84,64,16             ; vmovsd        0x10(%r8,%rax,2),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,72                              ; je            2ff1 <_sk_load_u16_be_hsw+0x155>
+  DB  116,72                              ; je            30c5 <_sk_load_u16_be_hsw+0x155>
   DB  196,193,105,22,84,64,24             ; vmovhpd       0x18(%r8,%rax,2),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,59                              ; jb            2ff1 <_sk_load_u16_be_hsw+0x155>
+  DB  114,59                              ; jb            30c5 <_sk_load_u16_be_hsw+0x155>
   DB  196,193,123,16,92,64,32             ; vmovsd        0x20(%r8,%rax,2),%xmm3
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,6,255,255,255                ; je            2ecd <_sk_load_u16_be_hsw+0x31>
+  DB  15,132,6,255,255,255                ; je            2fa1 <_sk_load_u16_be_hsw+0x31>
   DB  196,193,97,22,92,64,40              ; vmovhpd       0x28(%r8,%rax,2),%xmm3,%xmm3
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,245,254,255,255              ; jb            2ecd <_sk_load_u16_be_hsw+0x31>
+  DB  15,130,245,254,255,255              ; jb            2fa1 <_sk_load_u16_be_hsw+0x31>
   DB  196,65,122,126,76,64,48             ; vmovq         0x30(%r8,%rax,2),%xmm9
-  DB  233,233,254,255,255                 ; jmpq          2ecd <_sk_load_u16_be_hsw+0x31>
+  DB  233,233,254,255,255                 ; jmpq          2fa1 <_sk_load_u16_be_hsw+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,220,254,255,255                 ; jmpq          2ecd <_sk_load_u16_be_hsw+0x31>
+  DB  233,220,254,255,255                 ; jmpq          2fa1 <_sk_load_u16_be_hsw+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
-  DB  233,211,254,255,255                 ; jmpq          2ecd <_sk_load_u16_be_hsw+0x31>
+  DB  233,211,254,255,255                 ; jmpq          2fa1 <_sk_load_u16_be_hsw+0x31>
 
 PUBLIC _sk_load_rgb_u16_be_hsw
 _sk_load_rgb_u16_be_hsw LABEL PROC
@@ -2978,7 +3021,7 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,127                        ; lea           (%rdi,%rdi,2),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,204,0,0,0                    ; jne           30d8 <_sk_load_rgb_u16_be_hsw+0xde>
+  DB  15,133,204,0,0,0                    ; jne           31ac <_sk_load_rgb_u16_be_hsw+0xde>
   DB  196,193,122,111,4,64                ; vmovdqu       (%r8,%rax,2),%xmm0
   DB  196,193,122,111,84,64,12            ; vmovdqu       0xc(%r8,%rax,2),%xmm2
   DB  196,193,122,111,76,64,24            ; vmovdqu       0x18(%r8,%rax,2),%xmm1
@@ -3002,7 +3045,7 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
   DB  197,241,235,192                     ; vpor          %xmm0,%xmm1,%xmm0
   DB  196,226,125,51,192                  ; vpmovzxwd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,21,102,16,0,0         ; vbroadcastss  0x1066(%rip),%ymm10        # 40ec <_sk_callback_hsw+0x3cf>
+  DB  196,98,125,24,21,122,16,0,0         ; vbroadcastss  0x107a(%rip),%ymm10        # 41d4 <_sk_callback_hsw+0x3e3>
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  197,185,109,202                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -3019,48 +3062,48 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,210                  ; vmulps        %ymm10,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,26,16,0,0         ; vbroadcastss  0x101a(%rip),%ymm3        # 40f0 <_sk_callback_hsw+0x3d3>
+  DB  196,226,125,24,29,46,16,0,0         ; vbroadcastss  0x102e(%rip),%ymm3        # 41d8 <_sk_callback_hsw+0x3e7>
   DB  255,224                             ; jmpq          *%rax
   DB  196,193,121,110,4,64                ; vmovd         (%r8,%rax,2),%xmm0
   DB  196,193,121,196,68,64,4,2           ; vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  117,5                               ; jne           30f1 <_sk_load_rgb_u16_be_hsw+0xf7>
-  DB  233,79,255,255,255                  ; jmpq          3040 <_sk_load_rgb_u16_be_hsw+0x46>
+  DB  117,5                               ; jne           31c5 <_sk_load_rgb_u16_be_hsw+0xf7>
+  DB  233,79,255,255,255                  ; jmpq          3114 <_sk_load_rgb_u16_be_hsw+0x46>
   DB  196,193,121,110,76,64,6             ; vmovd         0x6(%r8,%rax,2),%xmm1
   DB  196,65,113,196,68,64,10,2           ; vpinsrw       $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,26                              ; jb            3120 <_sk_load_rgb_u16_be_hsw+0x126>
+  DB  114,26                              ; jb            31f4 <_sk_load_rgb_u16_be_hsw+0x126>
   DB  196,193,121,110,76,64,12            ; vmovd         0xc(%r8,%rax,2),%xmm1
   DB  196,193,113,196,84,64,16,2          ; vpinsrw       $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  117,10                              ; jne           3125 <_sk_load_rgb_u16_be_hsw+0x12b>
-  DB  233,32,255,255,255                  ; jmpq          3040 <_sk_load_rgb_u16_be_hsw+0x46>
-  DB  233,27,255,255,255                  ; jmpq          3040 <_sk_load_rgb_u16_be_hsw+0x46>
+  DB  117,10                              ; jne           31f9 <_sk_load_rgb_u16_be_hsw+0x12b>
+  DB  233,32,255,255,255                  ; jmpq          3114 <_sk_load_rgb_u16_be_hsw+0x46>
+  DB  233,27,255,255,255                  ; jmpq          3114 <_sk_load_rgb_u16_be_hsw+0x46>
   DB  196,193,121,110,76,64,18            ; vmovd         0x12(%r8,%rax,2),%xmm1
   DB  196,65,113,196,76,64,22,2           ; vpinsrw       $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,26                              ; jb            3154 <_sk_load_rgb_u16_be_hsw+0x15a>
+  DB  114,26                              ; jb            3228 <_sk_load_rgb_u16_be_hsw+0x15a>
   DB  196,193,121,110,76,64,24            ; vmovd         0x18(%r8,%rax,2),%xmm1
   DB  196,193,113,196,76,64,28,2          ; vpinsrw       $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  117,10                              ; jne           3159 <_sk_load_rgb_u16_be_hsw+0x15f>
-  DB  233,236,254,255,255                 ; jmpq          3040 <_sk_load_rgb_u16_be_hsw+0x46>
-  DB  233,231,254,255,255                 ; jmpq          3040 <_sk_load_rgb_u16_be_hsw+0x46>
+  DB  117,10                              ; jne           322d <_sk_load_rgb_u16_be_hsw+0x15f>
+  DB  233,236,254,255,255                 ; jmpq          3114 <_sk_load_rgb_u16_be_hsw+0x46>
+  DB  233,231,254,255,255                 ; jmpq          3114 <_sk_load_rgb_u16_be_hsw+0x46>
   DB  196,193,121,110,92,64,30            ; vmovd         0x1e(%r8,%rax,2),%xmm3
   DB  196,65,97,196,92,64,34,2            ; vpinsrw       $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,20                              ; jb            3182 <_sk_load_rgb_u16_be_hsw+0x188>
+  DB  114,20                              ; jb            3256 <_sk_load_rgb_u16_be_hsw+0x188>
   DB  196,193,121,110,92,64,36            ; vmovd         0x24(%r8,%rax,2),%xmm3
   DB  196,193,97,196,92,64,40,2           ; vpinsrw       $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
-  DB  233,190,254,255,255                 ; jmpq          3040 <_sk_load_rgb_u16_be_hsw+0x46>
-  DB  233,185,254,255,255                 ; jmpq          3040 <_sk_load_rgb_u16_be_hsw+0x46>
+  DB  233,190,254,255,255                 ; jmpq          3114 <_sk_load_rgb_u16_be_hsw+0x46>
+  DB  233,185,254,255,255                 ; jmpq          3114 <_sk_load_rgb_u16_be_hsw+0x46>
 
 PUBLIC _sk_store_u16_be_hsw
 _sk_store_u16_be_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
-  DB  196,98,125,24,5,87,15,0,0           ; vbroadcastss  0xf57(%rip),%ymm8        # 40f4 <_sk_callback_hsw+0x3d7>
+  DB  196,98,125,24,5,107,15,0,0          ; vbroadcastss  0xf6b(%rip),%ymm8        # 41dc <_sk_callback_hsw+0x3eb>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,67,125,25,202,1                 ; vextractf128  $0x1,%ymm9,%xmm10
@@ -3098,7 +3141,7 @@ _sk_store_u16_be_hsw LABEL PROC
   DB  196,65,17,98,200                    ; vpunpckldq    %xmm8,%xmm13,%xmm9
   DB  196,65,17,106,192                   ; vpunpckhdq    %xmm8,%xmm13,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,31                              ; jne           3281 <_sk_store_u16_be_hsw+0xfa>
+  DB  117,31                              ; jne           3355 <_sk_store_u16_be_hsw+0xfa>
   DB  196,65,120,17,28,64                 ; vmovups       %xmm11,(%r8,%rax,2)
   DB  196,65,120,17,84,64,16              ; vmovups       %xmm10,0x10(%r8,%rax,2)
   DB  196,65,120,17,76,64,32              ; vmovups       %xmm9,0x20(%r8,%rax,2)
@@ -3107,31 +3150,31 @@ _sk_store_u16_be_hsw LABEL PROC
   DB  255,224                             ; jmpq          *%rax
   DB  196,65,121,214,28,64                ; vmovq         %xmm11,(%r8,%rax,2)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,240                             ; je            327d <_sk_store_u16_be_hsw+0xf6>
+  DB  116,240                             ; je            3351 <_sk_store_u16_be_hsw+0xf6>
   DB  196,65,121,23,92,64,8               ; vmovhpd       %xmm11,0x8(%r8,%rax,2)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,227                             ; jb            327d <_sk_store_u16_be_hsw+0xf6>
+  DB  114,227                             ; jb            3351 <_sk_store_u16_be_hsw+0xf6>
   DB  196,65,121,214,84,64,16             ; vmovq         %xmm10,0x10(%r8,%rax,2)
-  DB  116,218                             ; je            327d <_sk_store_u16_be_hsw+0xf6>
+  DB  116,218                             ; je            3351 <_sk_store_u16_be_hsw+0xf6>
   DB  196,65,121,23,84,64,24              ; vmovhpd       %xmm10,0x18(%r8,%rax,2)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,205                             ; jb            327d <_sk_store_u16_be_hsw+0xf6>
+  DB  114,205                             ; jb            3351 <_sk_store_u16_be_hsw+0xf6>
   DB  196,65,121,214,76,64,32             ; vmovq         %xmm9,0x20(%r8,%rax,2)
-  DB  116,196                             ; je            327d <_sk_store_u16_be_hsw+0xf6>
+  DB  116,196                             ; je            3351 <_sk_store_u16_be_hsw+0xf6>
   DB  196,65,121,23,76,64,40              ; vmovhpd       %xmm9,0x28(%r8,%rax,2)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,183                             ; jb            327d <_sk_store_u16_be_hsw+0xf6>
+  DB  114,183                             ; jb            3351 <_sk_store_u16_be_hsw+0xf6>
   DB  196,65,121,214,68,64,48             ; vmovq         %xmm8,0x30(%r8,%rax,2)
-  DB  235,174                             ; jmp           327d <_sk_store_u16_be_hsw+0xf6>
+  DB  235,174                             ; jmp           3351 <_sk_store_u16_be_hsw+0xf6>
 
 PUBLIC _sk_load_f32_hsw
 _sk_load_f32_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  119,110                             ; ja            3345 <_sk_load_f32_hsw+0x76>
+  DB  119,110                             ; ja            3419 <_sk_load_f32_hsw+0x76>
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
-  DB  76,141,21,135,0,0,0                 ; lea           0x87(%rip),%r10        # 3370 <_sk_load_f32_hsw+0xa1>
+  DB  76,141,21,135,0,0,0                 ; lea           0x87(%rip),%r10        # 3444 <_sk_load_f32_hsw+0xa1>
   DB  73,99,4,138                         ; movslq        (%r10,%rcx,4),%rax
   DB  76,1,208                            ; add           %r10,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -3190,7 +3233,7 @@ _sk_store_f32_hsw LABEL PROC
   DB  196,65,37,20,196                    ; vunpcklpd     %ymm12,%ymm11,%ymm8
   DB  196,65,37,21,220                    ; vunpckhpd     %ymm12,%ymm11,%ymm11
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,55                              ; jne           33fd <_sk_store_f32_hsw+0x6d>
+  DB  117,55                              ; jne           34d1 <_sk_store_f32_hsw+0x6d>
   DB  196,67,45,24,225,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
   DB  196,67,61,24,235,1                  ; vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
   DB  196,67,45,6,201,49                  ; vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
@@ -3203,22 +3246,22 @@ _sk_store_f32_hsw LABEL PROC
   DB  255,224                             ; jmpq          *%rax
   DB  196,65,121,17,20,128                ; vmovupd       %xmm10,(%r8,%rax,4)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,240                             ; je            33f9 <_sk_store_f32_hsw+0x69>
+  DB  116,240                             ; je            34cd <_sk_store_f32_hsw+0x69>
   DB  196,65,121,17,76,128,16             ; vmovupd       %xmm9,0x10(%r8,%rax,4)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,227                             ; jb            33f9 <_sk_store_f32_hsw+0x69>
+  DB  114,227                             ; jb            34cd <_sk_store_f32_hsw+0x69>
   DB  196,65,121,17,68,128,32             ; vmovupd       %xmm8,0x20(%r8,%rax,4)
-  DB  116,218                             ; je            33f9 <_sk_store_f32_hsw+0x69>
+  DB  116,218                             ; je            34cd <_sk_store_f32_hsw+0x69>
   DB  196,65,121,17,92,128,48             ; vmovupd       %xmm11,0x30(%r8,%rax,4)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,205                             ; jb            33f9 <_sk_store_f32_hsw+0x69>
+  DB  114,205                             ; jb            34cd <_sk_store_f32_hsw+0x69>
   DB  196,67,125,25,84,128,64,1           ; vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
-  DB  116,195                             ; je            33f9 <_sk_store_f32_hsw+0x69>
+  DB  116,195                             ; je            34cd <_sk_store_f32_hsw+0x69>
   DB  196,67,125,25,76,128,80,1           ; vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,181                             ; jb            33f9 <_sk_store_f32_hsw+0x69>
+  DB  114,181                             ; jb            34cd <_sk_store_f32_hsw+0x69>
   DB  196,67,125,25,68,128,96,1           ; vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
-  DB  235,171                             ; jmp           33f9 <_sk_store_f32_hsw+0x69>
+  DB  235,171                             ; jmp           34cd <_sk_store_f32_hsw+0x69>
 
 PUBLIC _sk_clamp_x_hsw
 _sk_clamp_x_hsw LABEL PROC
@@ -3314,11 +3357,11 @@ _sk_mirror_y_hsw LABEL PROC
 
 PUBLIC _sk_luminance_to_alpha_hsw
 _sk_luminance_to_alpha_hsw LABEL PROC
-  DB  196,226,125,24,29,113,11,0,0        ; vbroadcastss  0xb71(%rip),%ymm3        # 40f8 <_sk_callback_hsw+0x3db>
-  DB  196,98,125,24,5,108,11,0,0          ; vbroadcastss  0xb6c(%rip),%ymm8        # 40fc <_sk_callback_hsw+0x3df>
+  DB  196,226,125,24,29,133,11,0,0        ; vbroadcastss  0xb85(%rip),%ymm3        # 41e0 <_sk_callback_hsw+0x3ef>
+  DB  196,98,125,24,5,128,11,0,0          ; vbroadcastss  0xb80(%rip),%ymm8        # 41e4 <_sk_callback_hsw+0x3f3>
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  196,226,125,184,203                 ; vfmadd231ps   %ymm3,%ymm0,%ymm1
-  DB  196,226,125,24,29,93,11,0,0         ; vbroadcastss  0xb5d(%rip),%ymm3        # 4100 <_sk_callback_hsw+0x3e3>
+  DB  196,226,125,24,29,113,11,0,0        ; vbroadcastss  0xb71(%rip),%ymm3        # 41e8 <_sk_callback_hsw+0x3f7>
   DB  196,226,109,168,217                 ; vfmadd213ps   %ymm1,%ymm2,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -3453,7 +3496,7 @@ _sk_linear_gradient_hsw LABEL PROC
   DB  196,98,125,24,72,28                 ; vbroadcastss  0x1c(%rax),%ymm9
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  77,133,192                          ; test          %r8,%r8
-  DB  15,132,143,0,0,0                    ; je            387b <_sk_linear_gradient_hsw+0xb5>
+  DB  15,132,143,0,0,0                    ; je            394f <_sk_linear_gradient_hsw+0xb5>
   DB  72,139,64,8                         ; mov           0x8(%rax),%rax
   DB  72,131,192,32                       ; add           $0x20,%rax
   DB  196,65,28,87,228                    ; vxorps        %ymm12,%ymm12,%ymm12
@@ -3480,8 +3523,8 @@ _sk_linear_gradient_hsw LABEL PROC
   DB  196,67,13,74,201,208                ; vblendvps     %ymm13,%ymm9,%ymm14,%ymm9
   DB  72,131,192,36                       ; add           $0x24,%rax
   DB  73,255,200                          ; dec           %r8
-  DB  117,140                             ; jne           3805 <_sk_linear_gradient_hsw+0x3f>
-  DB  235,17                              ; jmp           388c <_sk_linear_gradient_hsw+0xc6>
+  DB  117,140                             ; jne           38d9 <_sk_linear_gradient_hsw+0x3f>
+  DB  235,17                              ; jmp           3960 <_sk_linear_gradient_hsw+0xc6>
   DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
   DB  197,236,87,210                      ; vxorps        %ymm2,%ymm2,%ymm2
   DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
@@ -3524,24 +3567,24 @@ _sk_xy_to_polar_unit_hsw LABEL PROC
   DB  196,65,52,95,226                    ; vmaxps        %ymm10,%ymm9,%ymm12
   DB  196,65,36,94,220                    ; vdivps        %ymm12,%ymm11,%ymm11
   DB  196,65,36,89,227                    ; vmulps        %ymm11,%ymm11,%ymm12
-  DB  196,98,125,24,45,221,7,0,0          ; vbroadcastss  0x7dd(%rip),%ymm13        # 4104 <_sk_callback_hsw+0x3e7>
-  DB  196,98,125,24,53,216,7,0,0          ; vbroadcastss  0x7d8(%rip),%ymm14        # 4108 <_sk_callback_hsw+0x3eb>
+  DB  196,98,125,24,45,241,7,0,0          ; vbroadcastss  0x7f1(%rip),%ymm13        # 41ec <_sk_callback_hsw+0x3fb>
+  DB  196,98,125,24,53,236,7,0,0          ; vbroadcastss  0x7ec(%rip),%ymm14        # 41f0 <_sk_callback_hsw+0x3ff>
   DB  196,66,29,184,245                   ; vfmadd231ps   %ymm13,%ymm12,%ymm14
-  DB  196,98,125,24,45,206,7,0,0          ; vbroadcastss  0x7ce(%rip),%ymm13        # 410c <_sk_callback_hsw+0x3ef>
+  DB  196,98,125,24,45,226,7,0,0          ; vbroadcastss  0x7e2(%rip),%ymm13        # 41f4 <_sk_callback_hsw+0x403>
   DB  196,66,29,184,238                   ; vfmadd231ps   %ymm14,%ymm12,%ymm13
-  DB  196,98,125,24,53,196,7,0,0          ; vbroadcastss  0x7c4(%rip),%ymm14        # 4110 <_sk_callback_hsw+0x3f3>
+  DB  196,98,125,24,53,216,7,0,0          ; vbroadcastss  0x7d8(%rip),%ymm14        # 41f8 <_sk_callback_hsw+0x407>
   DB  196,66,29,184,245                   ; vfmadd231ps   %ymm13,%ymm12,%ymm14
   DB  196,65,36,89,222                    ; vmulps        %ymm14,%ymm11,%ymm11
   DB  196,65,52,194,202,1                 ; vcmpltps      %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,175,7,0,0          ; vbroadcastss  0x7af(%rip),%ymm10        # 4114 <_sk_callback_hsw+0x3f7>
+  DB  196,98,125,24,21,195,7,0,0          ; vbroadcastss  0x7c3(%rip),%ymm10        # 41fc <_sk_callback_hsw+0x40b>
   DB  196,65,44,92,211                    ; vsubps        %ymm11,%ymm10,%ymm10
   DB  196,67,37,74,202,144                ; vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   DB  196,193,124,194,192,1               ; vcmpltps      %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,21,153,7,0,0          ; vbroadcastss  0x799(%rip),%ymm10        # 4118 <_sk_callback_hsw+0x3fb>
+  DB  196,98,125,24,21,173,7,0,0          ; vbroadcastss  0x7ad(%rip),%ymm10        # 4200 <_sk_callback_hsw+0x40f>
   DB  196,65,44,92,209                    ; vsubps        %ymm9,%ymm10,%ymm10
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  196,65,116,194,200,1                ; vcmpltps      %ymm8,%ymm1,%ymm9
-  DB  196,98,125,24,21,131,7,0,0          ; vbroadcastss  0x783(%rip),%ymm10        # 411c <_sk_callback_hsw+0x3ff>
+  DB  196,98,125,24,21,151,7,0,0          ; vbroadcastss  0x797(%rip),%ymm10        # 4204 <_sk_callback_hsw+0x413>
   DB  197,44,92,208                       ; vsubps        %ymm0,%ymm10,%ymm10
   DB  196,195,125,74,194,144              ; vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   DB  196,65,124,194,200,3                ; vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -3552,7 +3595,7 @@ _sk_xy_to_polar_unit_hsw LABEL PROC
 PUBLIC _sk_save_xy_hsw
 _sk_save_xy_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,98,7,0,0            ; vbroadcastss  0x762(%rip),%ymm8        # 4120 <_sk_callback_hsw+0x403>
+  DB  196,98,125,24,5,118,7,0,0           ; vbroadcastss  0x776(%rip),%ymm8        # 4208 <_sk_callback_hsw+0x417>
   DB  196,65,124,88,200                   ; vaddps        %ymm8,%ymm0,%ymm9
   DB  196,67,125,8,209,1                  ; vroundps      $0x1,%ymm9,%ymm10
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
@@ -3582,9 +3625,9 @@ _sk_accumulate_hsw LABEL PROC
 PUBLIC _sk_bilinear_nx_hsw
 _sk_bilinear_nx_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,246,6,0,0          ; vbroadcastss  0x6f6(%rip),%ymm0        # 4124 <_sk_callback_hsw+0x407>
+  DB  196,226,125,24,5,10,7,0,0           ; vbroadcastss  0x70a(%rip),%ymm0        # 420c <_sk_callback_hsw+0x41b>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,237,6,0,0           ; vbroadcastss  0x6ed(%rip),%ymm8        # 4128 <_sk_callback_hsw+0x40b>
+  DB  196,98,125,24,5,1,7,0,0             ; vbroadcastss  0x701(%rip),%ymm8        # 4210 <_sk_callback_hsw+0x41f>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -3593,7 +3636,7 @@ _sk_bilinear_nx_hsw LABEL PROC
 PUBLIC _sk_bilinear_px_hsw
 _sk_bilinear_px_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,213,6,0,0          ; vbroadcastss  0x6d5(%rip),%ymm0        # 412c <_sk_callback_hsw+0x40f>
+  DB  196,226,125,24,5,233,6,0,0          ; vbroadcastss  0x6e9(%rip),%ymm0        # 4214 <_sk_callback_hsw+0x423>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -3603,9 +3646,9 @@ _sk_bilinear_px_hsw LABEL PROC
 PUBLIC _sk_bilinear_ny_hsw
 _sk_bilinear_ny_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,185,6,0,0         ; vbroadcastss  0x6b9(%rip),%ymm1        # 4130 <_sk_callback_hsw+0x413>
+  DB  196,226,125,24,13,205,6,0,0         ; vbroadcastss  0x6cd(%rip),%ymm1        # 4218 <_sk_callback_hsw+0x427>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,175,6,0,0           ; vbroadcastss  0x6af(%rip),%ymm8        # 4134 <_sk_callback_hsw+0x417>
+  DB  196,98,125,24,5,195,6,0,0           ; vbroadcastss  0x6c3(%rip),%ymm8        # 421c <_sk_callback_hsw+0x42b>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -3614,7 +3657,7 @@ _sk_bilinear_ny_hsw LABEL PROC
 PUBLIC _sk_bilinear_py_hsw
 _sk_bilinear_py_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,151,6,0,0         ; vbroadcastss  0x697(%rip),%ymm1        # 4138 <_sk_callback_hsw+0x41b>
+  DB  196,226,125,24,13,171,6,0,0         ; vbroadcastss  0x6ab(%rip),%ymm1        # 4220 <_sk_callback_hsw+0x42f>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -3624,13 +3667,13 @@ _sk_bilinear_py_hsw LABEL PROC
 PUBLIC _sk_bicubic_n3x_hsw
 _sk_bicubic_n3x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,122,6,0,0          ; vbroadcastss  0x67a(%rip),%ymm0        # 413c <_sk_callback_hsw+0x41f>
+  DB  196,226,125,24,5,142,6,0,0          ; vbroadcastss  0x68e(%rip),%ymm0        # 4224 <_sk_callback_hsw+0x433>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,113,6,0,0           ; vbroadcastss  0x671(%rip),%ymm8        # 4140 <_sk_callback_hsw+0x423>
+  DB  196,98,125,24,5,133,6,0,0           ; vbroadcastss  0x685(%rip),%ymm8        # 4228 <_sk_callback_hsw+0x437>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,98,6,0,0           ; vbroadcastss  0x662(%rip),%ymm10        # 4144 <_sk_callback_hsw+0x427>
-  DB  196,98,125,24,29,93,6,0,0           ; vbroadcastss  0x65d(%rip),%ymm11        # 4148 <_sk_callback_hsw+0x42b>
+  DB  196,98,125,24,21,118,6,0,0          ; vbroadcastss  0x676(%rip),%ymm10        # 422c <_sk_callback_hsw+0x43b>
+  DB  196,98,125,24,29,113,6,0,0          ; vbroadcastss  0x671(%rip),%ymm11        # 4230 <_sk_callback_hsw+0x43f>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,36,89,193                    ; vmulps        %ymm9,%ymm11,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -3640,16 +3683,16 @@ _sk_bicubic_n3x_hsw LABEL PROC
 PUBLIC _sk_bicubic_n1x_hsw
 _sk_bicubic_n1x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,64,6,0,0           ; vbroadcastss  0x640(%rip),%ymm0        # 414c <_sk_callback_hsw+0x42f>
+  DB  196,226,125,24,5,84,6,0,0           ; vbroadcastss  0x654(%rip),%ymm0        # 4234 <_sk_callback_hsw+0x443>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,55,6,0,0            ; vbroadcastss  0x637(%rip),%ymm8        # 4150 <_sk_callback_hsw+0x433>
+  DB  196,98,125,24,5,75,6,0,0            ; vbroadcastss  0x64b(%rip),%ymm8        # 4238 <_sk_callback_hsw+0x447>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,45,6,0,0           ; vbroadcastss  0x62d(%rip),%ymm9        # 4154 <_sk_callback_hsw+0x437>
-  DB  196,98,125,24,21,40,6,0,0           ; vbroadcastss  0x628(%rip),%ymm10        # 4158 <_sk_callback_hsw+0x43b>
+  DB  196,98,125,24,13,65,6,0,0           ; vbroadcastss  0x641(%rip),%ymm9        # 423c <_sk_callback_hsw+0x44b>
+  DB  196,98,125,24,21,60,6,0,0           ; vbroadcastss  0x63c(%rip),%ymm10        # 4240 <_sk_callback_hsw+0x44f>
   DB  196,66,61,168,209                   ; vfmadd213ps   %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,13,30,6,0,0           ; vbroadcastss  0x61e(%rip),%ymm9        # 415c <_sk_callback_hsw+0x43f>
+  DB  196,98,125,24,13,50,6,0,0           ; vbroadcastss  0x632(%rip),%ymm9        # 4244 <_sk_callback_hsw+0x453>
   DB  196,66,61,184,202                   ; vfmadd231ps   %ymm10,%ymm8,%ymm9
-  DB  196,98,125,24,21,20,6,0,0           ; vbroadcastss  0x614(%rip),%ymm10        # 4160 <_sk_callback_hsw+0x443>
+  DB  196,98,125,24,21,40,6,0,0           ; vbroadcastss  0x628(%rip),%ymm10        # 4248 <_sk_callback_hsw+0x457>
   DB  196,66,61,184,209                   ; vfmadd231ps   %ymm9,%ymm8,%ymm10
   DB  197,124,17,144,128,0,0,0            ; vmovups       %ymm10,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -3658,14 +3701,14 @@ _sk_bicubic_n1x_hsw LABEL PROC
 PUBLIC _sk_bicubic_p1x_hsw
 _sk_bicubic_p1x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,252,5,0,0           ; vbroadcastss  0x5fc(%rip),%ymm8        # 4164 <_sk_callback_hsw+0x447>
+  DB  196,98,125,24,5,16,6,0,0            ; vbroadcastss  0x610(%rip),%ymm8        # 424c <_sk_callback_hsw+0x45b>
   DB  197,188,88,0                        ; vaddps        (%rax),%ymm8,%ymm0
   DB  197,124,16,72,64                    ; vmovups       0x40(%rax),%ymm9
-  DB  196,98,125,24,21,238,5,0,0          ; vbroadcastss  0x5ee(%rip),%ymm10        # 4168 <_sk_callback_hsw+0x44b>
-  DB  196,98,125,24,29,233,5,0,0          ; vbroadcastss  0x5e9(%rip),%ymm11        # 416c <_sk_callback_hsw+0x44f>
+  DB  196,98,125,24,21,2,6,0,0            ; vbroadcastss  0x602(%rip),%ymm10        # 4250 <_sk_callback_hsw+0x45f>
+  DB  196,98,125,24,29,253,5,0,0          ; vbroadcastss  0x5fd(%rip),%ymm11        # 4254 <_sk_callback_hsw+0x463>
   DB  196,66,53,168,218                   ; vfmadd213ps   %ymm10,%ymm9,%ymm11
   DB  196,66,53,168,216                   ; vfmadd213ps   %ymm8,%ymm9,%ymm11
-  DB  196,98,125,24,5,218,5,0,0           ; vbroadcastss  0x5da(%rip),%ymm8        # 4170 <_sk_callback_hsw+0x453>
+  DB  196,98,125,24,5,238,5,0,0           ; vbroadcastss  0x5ee(%rip),%ymm8        # 4258 <_sk_callback_hsw+0x467>
   DB  196,66,53,184,195                   ; vfmadd231ps   %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -3674,12 +3717,12 @@ _sk_bicubic_p1x_hsw LABEL PROC
 PUBLIC _sk_bicubic_p3x_hsw
 _sk_bicubic_p3x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,194,5,0,0          ; vbroadcastss  0x5c2(%rip),%ymm0        # 4174 <_sk_callback_hsw+0x457>
+  DB  196,226,125,24,5,214,5,0,0          ; vbroadcastss  0x5d6(%rip),%ymm0        # 425c <_sk_callback_hsw+0x46b>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,175,5,0,0          ; vbroadcastss  0x5af(%rip),%ymm10        # 4178 <_sk_callback_hsw+0x45b>
-  DB  196,98,125,24,29,170,5,0,0          ; vbroadcastss  0x5aa(%rip),%ymm11        # 417c <_sk_callback_hsw+0x45f>
+  DB  196,98,125,24,21,195,5,0,0          ; vbroadcastss  0x5c3(%rip),%ymm10        # 4260 <_sk_callback_hsw+0x46f>
+  DB  196,98,125,24,29,190,5,0,0          ; vbroadcastss  0x5be(%rip),%ymm11        # 4264 <_sk_callback_hsw+0x473>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,52,89,195                    ; vmulps        %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -3689,13 +3732,13 @@ _sk_bicubic_p3x_hsw LABEL PROC
 PUBLIC _sk_bicubic_n3y_hsw
 _sk_bicubic_n3y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,141,5,0,0         ; vbroadcastss  0x58d(%rip),%ymm1        # 4180 <_sk_callback_hsw+0x463>
+  DB  196,226,125,24,13,161,5,0,0         ; vbroadcastss  0x5a1(%rip),%ymm1        # 4268 <_sk_callback_hsw+0x477>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,131,5,0,0           ; vbroadcastss  0x583(%rip),%ymm8        # 4184 <_sk_callback_hsw+0x467>
+  DB  196,98,125,24,5,151,5,0,0           ; vbroadcastss  0x597(%rip),%ymm8        # 426c <_sk_callback_hsw+0x47b>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,116,5,0,0          ; vbroadcastss  0x574(%rip),%ymm10        # 4188 <_sk_callback_hsw+0x46b>
-  DB  196,98,125,24,29,111,5,0,0          ; vbroadcastss  0x56f(%rip),%ymm11        # 418c <_sk_callback_hsw+0x46f>
+  DB  196,98,125,24,21,136,5,0,0          ; vbroadcastss  0x588(%rip),%ymm10        # 4270 <_sk_callback_hsw+0x47f>
+  DB  196,98,125,24,29,131,5,0,0          ; vbroadcastss  0x583(%rip),%ymm11        # 4274 <_sk_callback_hsw+0x483>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,36,89,193                    ; vmulps        %ymm9,%ymm11,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -3705,16 +3748,16 @@ _sk_bicubic_n3y_hsw LABEL PROC
 PUBLIC _sk_bicubic_n1y_hsw
 _sk_bicubic_n1y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,82,5,0,0          ; vbroadcastss  0x552(%rip),%ymm1        # 4190 <_sk_callback_hsw+0x473>
+  DB  196,226,125,24,13,102,5,0,0         ; vbroadcastss  0x566(%rip),%ymm1        # 4278 <_sk_callback_hsw+0x487>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,72,5,0,0            ; vbroadcastss  0x548(%rip),%ymm8        # 4194 <_sk_callback_hsw+0x477>
+  DB  196,98,125,24,5,92,5,0,0            ; vbroadcastss  0x55c(%rip),%ymm8        # 427c <_sk_callback_hsw+0x48b>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,62,5,0,0           ; vbroadcastss  0x53e(%rip),%ymm9        # 4198 <_sk_callback_hsw+0x47b>
-  DB  196,98,125,24,21,57,5,0,0           ; vbroadcastss  0x539(%rip),%ymm10        # 419c <_sk_callback_hsw+0x47f>
+  DB  196,98,125,24,13,82,5,0,0           ; vbroadcastss  0x552(%rip),%ymm9        # 4280 <_sk_callback_hsw+0x48f>
+  DB  196,98,125,24,21,77,5,0,0           ; vbroadcastss  0x54d(%rip),%ymm10        # 4284 <_sk_callback_hsw+0x493>
   DB  196,66,61,168,209                   ; vfmadd213ps   %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,13,47,5,0,0           ; vbroadcastss  0x52f(%rip),%ymm9        # 41a0 <_sk_callback_hsw+0x483>
+  DB  196,98,125,24,13,67,5,0,0           ; vbroadcastss  0x543(%rip),%ymm9        # 4288 <_sk_callback_hsw+0x497>
   DB  196,66,61,184,202                   ; vfmadd231ps   %ymm10,%ymm8,%ymm9
-  DB  196,98,125,24,21,37,5,0,0           ; vbroadcastss  0x525(%rip),%ymm10        # 41a4 <_sk_callback_hsw+0x487>
+  DB  196,98,125,24,21,57,5,0,0           ; vbroadcastss  0x539(%rip),%ymm10        # 428c <_sk_callback_hsw+0x49b>
   DB  196,66,61,184,209                   ; vfmadd231ps   %ymm9,%ymm8,%ymm10
   DB  197,124,17,144,160,0,0,0            ; vmovups       %ymm10,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -3723,14 +3766,14 @@ _sk_bicubic_n1y_hsw LABEL PROC
 PUBLIC _sk_bicubic_p1y_hsw
 _sk_bicubic_p1y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,13,5,0,0            ; vbroadcastss  0x50d(%rip),%ymm8        # 41a8 <_sk_callback_hsw+0x48b>
+  DB  196,98,125,24,5,33,5,0,0            ; vbroadcastss  0x521(%rip),%ymm8        # 4290 <_sk_callback_hsw+0x49f>
   DB  197,188,88,72,32                    ; vaddps        0x20(%rax),%ymm8,%ymm1
   DB  197,124,16,72,96                    ; vmovups       0x60(%rax),%ymm9
-  DB  196,98,125,24,21,254,4,0,0          ; vbroadcastss  0x4fe(%rip),%ymm10        # 41ac <_sk_callback_hsw+0x48f>
-  DB  196,98,125,24,29,249,4,0,0          ; vbroadcastss  0x4f9(%rip),%ymm11        # 41b0 <_sk_callback_hsw+0x493>
+  DB  196,98,125,24,21,18,5,0,0           ; vbroadcastss  0x512(%rip),%ymm10        # 4294 <_sk_callback_hsw+0x4a3>
+  DB  196,98,125,24,29,13,5,0,0           ; vbroadcastss  0x50d(%rip),%ymm11        # 4298 <_sk_callback_hsw+0x4a7>
   DB  196,66,53,168,218                   ; vfmadd213ps   %ymm10,%ymm9,%ymm11
   DB  196,66,53,168,216                   ; vfmadd213ps   %ymm8,%ymm9,%ymm11
-  DB  196,98,125,24,5,234,4,0,0           ; vbroadcastss  0x4ea(%rip),%ymm8        # 41b4 <_sk_callback_hsw+0x497>
+  DB  196,98,125,24,5,254,4,0,0           ; vbroadcastss  0x4fe(%rip),%ymm8        # 429c <_sk_callback_hsw+0x4ab>
   DB  196,66,53,184,195                   ; vfmadd231ps   %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -3739,12 +3782,12 @@ _sk_bicubic_p1y_hsw LABEL PROC
 PUBLIC _sk_bicubic_p3y_hsw
 _sk_bicubic_p3y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,210,4,0,0         ; vbroadcastss  0x4d2(%rip),%ymm1        # 41b8 <_sk_callback_hsw+0x49b>
+  DB  196,226,125,24,13,230,4,0,0         ; vbroadcastss  0x4e6(%rip),%ymm1        # 42a0 <_sk_callback_hsw+0x4af>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,190,4,0,0          ; vbroadcastss  0x4be(%rip),%ymm10        # 41bc <_sk_callback_hsw+0x49f>
-  DB  196,98,125,24,29,185,4,0,0          ; vbroadcastss  0x4b9(%rip),%ymm11        # 41c0 <_sk_callback_hsw+0x4a3>
+  DB  196,98,125,24,21,210,4,0,0          ; vbroadcastss  0x4d2(%rip),%ymm10        # 42a4 <_sk_callback_hsw+0x4b3>
+  DB  196,98,125,24,29,205,4,0,0          ; vbroadcastss  0x4cd(%rip),%ymm11        # 42a8 <_sk_callback_hsw+0x4b7>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,52,89,195                    ; vmulps        %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -3821,9 +3864,17 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  128,63,0                            ; cmpb          $0x0,(%rdi)
-  DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
-  DB  63                                  ; (bad)
+  DB  128,63,1                            ; cmpb          $0x1,(%rdi)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,4,0                               ; add           %al,(%rax,%rax,1)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  2,0                                 ; add           (%rax),%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  33,8                                ; and           %ecx,(%rax)
+  DB  130                                 ; (bad)
+  DB  60,0                                ; cmp           $0x0,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  191,0,0,128,63                      ; mov           $0x3f800000,%edi
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -3845,11 +3896,13 @@ ALIGN 4
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
   DB  63                                  ; (bad)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  128,191,0,0,224,64,0                ; cmpb          $0x0,0x40e00000(%rdi)
+  DB  128,63,0                            ; cmpb          $0x0,(%rdi)
+  DB  0,128,191,0,0,224                   ; add           %al,-0x1fffff41(%rax)
+  DB  64,0,0                              ; add           %al,(%rax)
+  DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
   DB  63                                  ; (bad)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  128,63,145                          ; cmpb          $0x91,(%rdi)
+  DB  145                                 ; xchg          %eax,%ecx
   DB  131,158,61,92,143,50,63             ; sbbl          $0x3f,0x328f5c3d(%rsi)
   DB  154                                 ; (bad)
   DB  153                                 ; cltd
@@ -3893,7 +3946,7 @@ ALIGN 4
   DB  190,129,128,128,59                  ; mov           $0x3b808081,%esi
   DB  129,128,128,59,0,248,0,0,8,33       ; addl          $0x21080000,-0x7ffc480(%rax)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        3f15 <.literal4+0xdd>
+  DB  224,7                               ; loopne        3ffd <.literal4+0xf1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -3907,10 +3960,10 @@ ALIGN 4
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
   DB  0,52,255                            ; add           %dh,(%rdi,%rdi,8)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3f40 <.literal4+0x108>
+  DB  127,0                               ; jg            4028 <.literal4+0x11c>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            3fb9 <.literal4+0x181>
+  DB  119,115                             ; ja            40a1 <.literal4+0x195>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -3924,10 +3977,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3f74 <.literal4+0x13c>
+  DB  127,0                               ; jg            405c <.literal4+0x150>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            3fed <.literal4+0x1b5>
+  DB  119,115                             ; ja            40d5 <.literal4+0x1c9>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -3941,10 +3994,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3fa8 <.literal4+0x170>
+  DB  127,0                               ; jg            4090 <.literal4+0x184>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4021 <.literal4+0x1e9>
+  DB  119,115                             ; ja            4109 <.literal4+0x1fd>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -3958,10 +4011,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3fdc <.literal4+0x1a4>
+  DB  127,0                               ; jg            40c4 <.literal4+0x1b8>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4055 <.literal4+0x21d>
+  DB  119,115                             ; ja            413d <.literal4+0x231>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -3974,7 +4027,7 @@ ALIGN 4
   DB  0,75,0                              ; add           %cl,0x0(%rbx)
   DB  0,128,63,0,0,200                    ; add           %al,-0x37ffffc1(%rax)
   DB  66,0,0                              ; rex.X         add %al,(%rax)
-  DB  127,67                              ; jg            4053 <.literal4+0x21b>
+  DB  127,67                              ; jg            413b <.literal4+0x22f>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,195                               ; add           %al,%bl
   DB  0,0                                 ; add           %al,(%rax)
@@ -3986,10 +4039,10 @@ ALIGN 4
   DB  190,80,128,3,62                     ; mov           $0x3e038050,%esi
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           4073 <.literal4+0x23b>
+  DB  118,63                              ; jbe           415b <.literal4+0x24f>
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            4087 <.literal4+0x24f>
+  DB  127,67                              ; jg            416f <.literal4+0x263>
   DB  129,128,128,59,0,0,128,63,129,128   ; addl          $0x80813f80,0x3b80(%rax)
   DB  128,59,0                            ; cmpb          $0x0,(%rbx)
   DB  0,128,63,129,128,128                ; add           %al,-0x7f7f7ec1(%rax)
@@ -3998,7 +4051,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        4069 <.literal4+0x231>
+  DB  224,7                               ; loopne        4151 <.literal4+0x245>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -4010,7 +4063,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        4085 <.literal4+0x24d>
+  DB  224,7                               ; loopne        416d <.literal4+0x261>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -4021,7 +4074,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  248                                 ; clc
   DB  65,0,0                              ; add           %al,(%r8)
-  DB  124,66                              ; jl            40da <.literal4+0x2a2>
+  DB  124,66                              ; jl            41c2 <.literal4+0x2b6>
   DB  0,240                               ; add           %dh,%al
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,55,0,15                 ; mov           %ecx,0xf003788(%rax)
@@ -4039,9 +4092,9 @@ ALIGN 4
   DB  137,136,136,59,15,0                 ; mov           %ecx,0xf3b88(%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,61,0,0                  ; mov           %ecx,0x3d88(%rax)
-  DB  112,65                              ; jo            411d <.literal4+0x2e5>
+  DB  112,65                              ; jo            4205 <.literal4+0x2f9>
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            412b <.literal4+0x2f3>
+  DB  127,67                              ; jg            4213 <.literal4+0x307>
   DB  128,0,128                           ; addb          $0x80,(%rax)
   DB  55                                  ; (bad)
   DB  128,0,128                           ; addb          $0x80,(%rax)
@@ -4049,7 +4102,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  255                                 ; (bad)
-  DB  127,71                              ; jg            413f <.literal4+0x307>
+  DB  127,71                              ; jg            4227 <.literal4+0x31b>
   DB  208                                 ; (bad)
   DB  179,89                              ; mov           $0x59,%bl
   DB  62,89                               ; ds            pop %rcx
@@ -4146,16 +4199,16 @@ ALIGN 32
   DB  0,0                                 ; add           %al,(%rax)
   DB  1,255                               ; add           %edi,%edi
   DB  255                                 ; (bad)
-  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004208 <_sk_callback_hsw+0xa0004eb>
+  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a0042e8 <_sk_callback_hsw+0xa0004f7>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004210 <_sk_callback_hsw+0x120004f3>
+  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 120042f0 <_sk_callback_hsw+0x120004ff>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004218 <_sk_callback_hsw+0x1a0004fb>
+  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a0042f8 <_sk_callback_hsw+0x1a000507>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004220 <_sk_callback_hsw+0x3000503>
+  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004300 <_sk_callback_hsw+0x300050f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -4198,16 +4251,16 @@ ALIGN 32
   DB  0,0                                 ; add           %al,(%rax)
   DB  1,255                               ; add           %edi,%edi
   DB  255                                 ; (bad)
-  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004268 <_sk_callback_hsw+0xa00054b>
+  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004348 <_sk_callback_hsw+0xa000557>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004270 <_sk_callback_hsw+0x12000553>
+  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004350 <_sk_callback_hsw+0x1200055f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004278 <_sk_callback_hsw+0x1a00055b>
+  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004358 <_sk_callback_hsw+0x1a000567>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004280 <_sk_callback_hsw+0x3000563>
+  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004360 <_sk_callback_hsw+0x300056f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -4250,16 +4303,16 @@ ALIGN 32
   DB  0,0                                 ; add           %al,(%rax)
   DB  1,255                               ; add           %edi,%edi
   DB  255                                 ; (bad)
-  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a0042c8 <_sk_callback_hsw+0xa0005ab>
+  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a0043a8 <_sk_callback_hsw+0xa0005b7>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 120042d0 <_sk_callback_hsw+0x120005b3>
+  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 120043b0 <_sk_callback_hsw+0x120005bf>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a0042d8 <_sk_callback_hsw+0x1a0005bb>
+  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a0043b8 <_sk_callback_hsw+0x1a0005c7>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 30042e0 <_sk_callback_hsw+0x30005c3>
+  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 30043c0 <_sk_callback_hsw+0x30005cf>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -4302,16 +4355,16 @@ ALIGN 32
   DB  0,0                                 ; add           %al,(%rax)
   DB  1,255                               ; add           %edi,%edi
   DB  255                                 ; (bad)
-  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004328 <_sk_callback_hsw+0xa00060b>
+  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004408 <_sk_callback_hsw+0xa000617>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004330 <_sk_callback_hsw+0x12000613>
+  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004410 <_sk_callback_hsw+0x1200061f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004338 <_sk_callback_hsw+0x1a00061b>
+  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004418 <_sk_callback_hsw+0x1a000627>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004340 <_sk_callback_hsw+0x3000623>
+  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004420 <_sk_callback_hsw+0x300062f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -4453,14 +4506,14 @@ _sk_seed_shader_avx LABEL PROC
   DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
   DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,164,83,0,0        ; vbroadcastss  0x53a4(%rip),%ymm1        # 5504 <_sk_callback_avx+0x119>
+  DB  196,226,125,24,13,244,84,0,0        ; vbroadcastss  0x54f4(%rip),%ymm1        # 5654 <_sk_callback_avx+0x119>
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
   DB  197,252,88,2                        ; vaddps        (%rdx),%ymm0,%ymm0
   DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,21,136,83,0,0        ; vbroadcastss  0x5388(%rip),%ymm2        # 5508 <_sk_callback_avx+0x11d>
+  DB  196,226,125,24,21,216,84,0,0        ; vbroadcastss  0x54d8(%rip),%ymm2        # 5658 <_sk_callback_avx+0x11d>
   DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
   DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
   DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
@@ -4468,6 +4521,70 @@ _sk_seed_shader_avx LABEL PROC
   DB  197,196,87,255                      ; vxorps        %ymm7,%ymm7,%ymm7
   DB  255,224                             ; jmpq          *%rax
 
+PUBLIC _sk_dither_avx
+_sk_dither_avx LABEL PROC
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  197,121,110,199                     ; vmovd         %edi,%xmm8
+  DB  196,65,121,112,192,0                ; vpshufd       $0x0,%xmm8,%xmm8
+  DB  196,67,61,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm8,%ymm8
+  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
+  DB  197,60,88,2                         ; vaddps        (%rdx),%ymm8,%ymm8
+  DB  196,65,126,91,192                   ; vcvttps2dq    %ymm8,%ymm8
+  DB  76,139,0                            ; mov           (%rax),%r8
+  DB  196,66,125,24,8                     ; vbroadcastss  (%r8),%ymm9
+  DB  196,65,60,87,209                    ; vxorps        %ymm9,%ymm8,%ymm10
+  DB  196,98,125,24,29,144,84,0,0         ; vbroadcastss  0x5490(%rip),%ymm11        # 565c <_sk_callback_avx+0x121>
+  DB  196,65,44,84,203                    ; vandps        %ymm11,%ymm10,%ymm9
+  DB  196,193,25,114,241,5                ; vpslld        $0x5,%xmm9,%xmm12
+  DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
+  DB  196,193,49,114,241,5                ; vpslld        $0x5,%xmm9,%xmm9
+  DB  196,67,29,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm12,%ymm9
+  DB  196,65,60,84,219                    ; vandps        %ymm11,%ymm8,%ymm11
+  DB  196,193,25,114,243,4                ; vpslld        $0x4,%xmm11,%xmm12
+  DB  196,67,125,25,219,1                 ; vextractf128  $0x1,%ymm11,%xmm11
+  DB  196,193,33,114,243,4                ; vpslld        $0x4,%xmm11,%xmm11
+  DB  196,67,29,24,219,1                  ; vinsertf128   $0x1,%xmm11,%ymm12,%ymm11
+  DB  196,98,125,24,37,81,84,0,0          ; vbroadcastss  0x5451(%rip),%ymm12        # 5660 <_sk_callback_avx+0x125>
+  DB  196,98,125,24,45,76,84,0,0          ; vbroadcastss  0x544c(%rip),%ymm13        # 5664 <_sk_callback_avx+0x129>
+  DB  196,65,44,84,245                    ; vandps        %ymm13,%ymm10,%ymm14
+  DB  196,193,1,114,246,2                 ; vpslld        $0x2,%xmm14,%xmm15
+  DB  196,67,125,25,246,1                 ; vextractf128  $0x1,%ymm14,%xmm14
+  DB  196,193,9,114,246,2                 ; vpslld        $0x2,%xmm14,%xmm14
+  DB  196,67,5,24,246,1                   ; vinsertf128   $0x1,%xmm14,%ymm15,%ymm14
+  DB  196,65,60,84,237                    ; vandps        %ymm13,%ymm8,%ymm13
+  DB  196,65,17,254,253                   ; vpaddd        %xmm13,%xmm13,%xmm15
+  DB  196,67,125,25,237,1                 ; vextractf128  $0x1,%ymm13,%xmm13
+  DB  196,65,17,254,237                   ; vpaddd        %xmm13,%xmm13,%xmm13
+  DB  196,67,5,24,237,1                   ; vinsertf128   $0x1,%xmm13,%ymm15,%ymm13
+  DB  196,65,44,84,212                    ; vandps        %ymm12,%ymm10,%ymm10
+  DB  196,193,1,114,210,1                 ; vpsrld        $0x1,%xmm10,%xmm15
+  DB  196,67,125,25,210,1                 ; vextractf128  $0x1,%ymm10,%xmm10
+  DB  196,193,41,114,210,1                ; vpsrld        $0x1,%xmm10,%xmm10
+  DB  196,67,5,24,210,1                   ; vinsertf128   $0x1,%xmm10,%ymm15,%ymm10
+  DB  196,65,60,84,196                    ; vandps        %ymm12,%ymm8,%ymm8
+  DB  196,193,25,114,208,2                ; vpsrld        $0x2,%xmm8,%xmm12
+  DB  196,67,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm8
+  DB  196,193,57,114,208,2                ; vpsrld        $0x2,%xmm8,%xmm8
+  DB  196,67,29,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm12,%ymm8
+  DB  196,65,20,86,219                    ; vorps         %ymm11,%ymm13,%ymm11
+  DB  196,65,36,86,192                    ; vorps         %ymm8,%ymm11,%ymm8
+  DB  196,65,52,86,206                    ; vorps         %ymm14,%ymm9,%ymm9
+  DB  196,65,60,86,193                    ; vorps         %ymm9,%ymm8,%ymm8
+  DB  196,65,60,86,194                    ; vorps         %ymm10,%ymm8,%ymm8
+  DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
+  DB  196,98,125,24,13,183,83,0,0         ; vbroadcastss  0x53b7(%rip),%ymm9        # 5668 <_sk_callback_avx+0x12d>
+  DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
+  DB  196,98,125,24,13,173,83,0,0         ; vbroadcastss  0x53ad(%rip),%ymm9        # 566c <_sk_callback_avx+0x131>
+  DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
+  DB  196,98,125,24,72,8                  ; vbroadcastss  0x8(%rax),%ymm9
+  DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
+  DB  197,60,89,195                       ; vmulps        %ymm3,%ymm8,%ymm8
+  DB  197,188,88,192                      ; vaddps        %ymm0,%ymm8,%ymm0
+  DB  197,188,88,201                      ; vaddps        %ymm1,%ymm8,%ymm1
+  DB  197,188,88,210                      ; vaddps        %ymm2,%ymm8,%ymm2
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  255,224                             ; jmpq          *%rax
+
 PUBLIC _sk_constant_color_avx
 _sk_constant_color_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -4490,7 +4607,7 @@ _sk_clear_avx LABEL PROC
 PUBLIC _sk_srcatop_avx
 _sk_srcatop_avx LABEL PROC
   DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
-  DB  196,98,125,24,5,56,83,0,0           ; vbroadcastss  0x5338(%rip),%ymm8        # 550c <_sk_callback_avx+0x121>
+  DB  196,98,125,24,5,79,83,0,0           ; vbroadcastss  0x534f(%rip),%ymm8        # 5670 <_sk_callback_avx+0x135>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -4509,7 +4626,7 @@ _sk_srcatop_avx LABEL PROC
 PUBLIC _sk_dstatop_avx
 _sk_dstatop_avx LABEL PROC
   DB  197,100,89,196                      ; vmulps        %ymm4,%ymm3,%ymm8
-  DB  196,98,125,24,13,250,82,0,0         ; vbroadcastss  0x52fa(%rip),%ymm9        # 5510 <_sk_callback_avx+0x125>
+  DB  196,98,125,24,13,17,83,0,0          ; vbroadcastss  0x5311(%rip),%ymm9        # 5674 <_sk_callback_avx+0x139>
   DB  197,52,92,207                       ; vsubps        %ymm7,%ymm9,%ymm9
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
   DB  197,188,88,192                      ; vaddps        %ymm0,%ymm8,%ymm0
@@ -4545,7 +4662,7 @@ _sk_dstin_avx LABEL PROC
 
 PUBLIC _sk_srcout_avx
 _sk_srcout_avx LABEL PROC
-  DB  196,98,125,24,5,153,82,0,0          ; vbroadcastss  0x5299(%rip),%ymm8        # 5514 <_sk_callback_avx+0x129>
+  DB  196,98,125,24,5,176,82,0,0          ; vbroadcastss  0x52b0(%rip),%ymm8        # 5678 <_sk_callback_avx+0x13d>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -4556,7 +4673,7 @@ _sk_srcout_avx LABEL PROC
 
 PUBLIC _sk_dstout_avx
 _sk_dstout_avx LABEL PROC
-  DB  196,226,125,24,5,124,82,0,0         ; vbroadcastss  0x527c(%rip),%ymm0        # 5518 <_sk_callback_avx+0x12d>
+  DB  196,226,125,24,5,147,82,0,0         ; vbroadcastss  0x5293(%rip),%ymm0        # 567c <_sk_callback_avx+0x141>
   DB  197,252,92,219                      ; vsubps        %ymm3,%ymm0,%ymm3
   DB  197,228,89,196                      ; vmulps        %ymm4,%ymm3,%ymm0
   DB  197,228,89,205                      ; vmulps        %ymm5,%ymm3,%ymm1
@@ -4567,7 +4684,7 @@ _sk_dstout_avx LABEL PROC
 
 PUBLIC _sk_srcover_avx
 _sk_srcover_avx LABEL PROC
-  DB  196,98,125,24,5,95,82,0,0           ; vbroadcastss  0x525f(%rip),%ymm8        # 551c <_sk_callback_avx+0x131>
+  DB  196,98,125,24,5,118,82,0,0          ; vbroadcastss  0x5276(%rip),%ymm8        # 5680 <_sk_callback_avx+0x145>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -4582,7 +4699,7 @@ _sk_srcover_avx LABEL PROC
 
 PUBLIC _sk_dstover_avx
 _sk_dstover_avx LABEL PROC
-  DB  196,98,125,24,5,50,82,0,0           ; vbroadcastss  0x5232(%rip),%ymm8        # 5520 <_sk_callback_avx+0x135>
+  DB  196,98,125,24,5,73,82,0,0           ; vbroadcastss  0x5249(%rip),%ymm8        # 5684 <_sk_callback_avx+0x149>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
@@ -4606,7 +4723,7 @@ _sk_modulate_avx LABEL PROC
 
 PUBLIC _sk_multiply_avx
 _sk_multiply_avx LABEL PROC
-  DB  196,98,125,24,5,241,81,0,0          ; vbroadcastss  0x51f1(%rip),%ymm8        # 5524 <_sk_callback_avx+0x139>
+  DB  196,98,125,24,5,8,82,0,0            ; vbroadcastss  0x5208(%rip),%ymm8        # 5688 <_sk_callback_avx+0x14d>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,208                       ; vmulps        %ymm0,%ymm9,%ymm10
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -4660,7 +4777,7 @@ _sk_screen_avx LABEL PROC
 
 PUBLIC _sk_xor__avx
 _sk_xor__avx LABEL PROC
-  DB  196,98,125,24,5,64,81,0,0           ; vbroadcastss  0x5140(%rip),%ymm8        # 5528 <_sk_callback_avx+0x13d>
+  DB  196,98,125,24,5,87,81,0,0           ; vbroadcastss  0x5157(%rip),%ymm8        # 568c <_sk_callback_avx+0x151>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -4695,7 +4812,7 @@ _sk_darken_avx LABEL PROC
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,95,209                  ; vmaxps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,192,80,0,0          ; vbroadcastss  0x50c0(%rip),%ymm8        # 552c <_sk_callback_avx+0x141>
+  DB  196,98,125,24,5,215,80,0,0          ; vbroadcastss  0x50d7(%rip),%ymm8        # 5690 <_sk_callback_avx+0x155>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -4719,7 +4836,7 @@ _sk_lighten_avx LABEL PROC
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,108,80,0,0          ; vbroadcastss  0x506c(%rip),%ymm8        # 5530 <_sk_callback_avx+0x145>
+  DB  196,98,125,24,5,131,80,0,0          ; vbroadcastss  0x5083(%rip),%ymm8        # 5694 <_sk_callback_avx+0x159>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -4746,7 +4863,7 @@ _sk_difference_avx LABEL PROC
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,12,80,0,0           ; vbroadcastss  0x500c(%rip),%ymm8        # 5534 <_sk_callback_avx+0x149>
+  DB  196,98,125,24,5,35,80,0,0           ; vbroadcastss  0x5023(%rip),%ymm8        # 5698 <_sk_callback_avx+0x15d>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -4767,7 +4884,7 @@ _sk_exclusion_avx LABEL PROC
   DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,199,79,0,0          ; vbroadcastss  0x4fc7(%rip),%ymm8        # 5538 <_sk_callback_avx+0x14d>
+  DB  196,98,125,24,5,222,79,0,0          ; vbroadcastss  0x4fde(%rip),%ymm8        # 569c <_sk_callback_avx+0x161>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -4776,7 +4893,7 @@ _sk_exclusion_avx LABEL PROC
 
 PUBLIC _sk_colorburn_avx
 _sk_colorburn_avx LABEL PROC
-  DB  196,98,125,24,5,178,79,0,0          ; vbroadcastss  0x4fb2(%rip),%ymm8        # 553c <_sk_callback_avx+0x151>
+  DB  196,98,125,24,5,201,79,0,0          ; vbroadcastss  0x4fc9(%rip),%ymm8        # 56a0 <_sk_callback_avx+0x165>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,216                       ; vmulps        %ymm0,%ymm9,%ymm11
   DB  196,65,44,87,210                    ; vxorps        %ymm10,%ymm10,%ymm10
@@ -4836,7 +4953,7 @@ _sk_colorburn_avx LABEL PROC
 PUBLIC _sk_colordodge_avx
 _sk_colordodge_avx LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
-  DB  196,98,125,24,13,174,78,0,0         ; vbroadcastss  0x4eae(%rip),%ymm9        # 5540 <_sk_callback_avx+0x155>
+  DB  196,98,125,24,13,197,78,0,0         ; vbroadcastss  0x4ec5(%rip),%ymm9        # 56a4 <_sk_callback_avx+0x169>
   DB  197,52,92,215                       ; vsubps        %ymm7,%ymm9,%ymm10
   DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
   DB  197,52,92,203                       ; vsubps        %ymm3,%ymm9,%ymm9
@@ -4891,7 +5008,7 @@ _sk_colordodge_avx LABEL PROC
 
 PUBLIC _sk_hardlight_avx
 _sk_hardlight_avx LABEL PROC
-  DB  196,98,125,24,5,192,77,0,0          ; vbroadcastss  0x4dc0(%rip),%ymm8        # 5544 <_sk_callback_avx+0x159>
+  DB  196,98,125,24,5,215,77,0,0          ; vbroadcastss  0x4dd7(%rip),%ymm8        # 56a8 <_sk_callback_avx+0x16d>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,200                       ; vmulps        %ymm0,%ymm10,%ymm9
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -4944,7 +5061,7 @@ _sk_hardlight_avx LABEL PROC
 
 PUBLIC _sk_overlay_avx
 _sk_overlay_avx LABEL PROC
-  DB  196,98,125,24,5,233,76,0,0          ; vbroadcastss  0x4ce9(%rip),%ymm8        # 5548 <_sk_callback_avx+0x15d>
+  DB  196,98,125,24,5,0,77,0,0            ; vbroadcastss  0x4d00(%rip),%ymm8        # 56ac <_sk_callback_avx+0x171>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,200                       ; vmulps        %ymm0,%ymm10,%ymm9
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5009,10 +5126,10 @@ _sk_softlight_avx LABEL PROC
   DB  196,65,60,88,192                    ; vaddps        %ymm8,%ymm8,%ymm8
   DB  196,65,60,89,216                    ; vmulps        %ymm8,%ymm8,%ymm11
   DB  196,65,60,88,195                    ; vaddps        %ymm11,%ymm8,%ymm8
-  DB  196,98,125,24,29,220,75,0,0         ; vbroadcastss  0x4bdc(%rip),%ymm11        # 5550 <_sk_callback_avx+0x165>
+  DB  196,98,125,24,29,243,75,0,0         ; vbroadcastss  0x4bf3(%rip),%ymm11        # 56b4 <_sk_callback_avx+0x179>
   DB  196,65,28,88,235                    ; vaddps        %ymm11,%ymm12,%ymm13
   DB  196,65,20,89,192                    ; vmulps        %ymm8,%ymm13,%ymm8
-  DB  196,98,125,24,45,205,75,0,0         ; vbroadcastss  0x4bcd(%rip),%ymm13        # 5554 <_sk_callback_avx+0x169>
+  DB  196,98,125,24,45,228,75,0,0         ; vbroadcastss  0x4be4(%rip),%ymm13        # 56b8 <_sk_callback_avx+0x17d>
   DB  196,65,28,89,245                    ; vmulps        %ymm13,%ymm12,%ymm14
   DB  196,65,12,88,192                    ; vaddps        %ymm8,%ymm14,%ymm8
   DB  196,65,124,82,244                   ; vrsqrtps      %ymm12,%ymm14
@@ -5023,7 +5140,7 @@ _sk_softlight_avx LABEL PROC
   DB  197,4,194,255,2                     ; vcmpleps      %ymm7,%ymm15,%ymm15
   DB  196,67,13,74,240,240                ; vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   DB  197,116,88,249                      ; vaddps        %ymm1,%ymm1,%ymm15
-  DB  196,98,125,24,5,139,75,0,0          ; vbroadcastss  0x4b8b(%rip),%ymm8        # 554c <_sk_callback_avx+0x161>
+  DB  196,98,125,24,5,162,75,0,0          ; vbroadcastss  0x4ba2(%rip),%ymm8        # 56b0 <_sk_callback_avx+0x175>
   DB  196,65,60,92,228                    ; vsubps        %ymm12,%ymm8,%ymm12
   DB  197,132,92,195                      ; vsubps        %ymm3,%ymm15,%ymm0
   DB  196,65,124,89,228                   ; vmulps        %ymm12,%ymm0,%ymm12
@@ -5127,7 +5244,7 @@ _sk_clamp_0_avx LABEL PROC
 
 PUBLIC _sk_clamp_1_avx
 _sk_clamp_1_avx LABEL PROC
-  DB  196,98,125,24,5,217,73,0,0          ; vbroadcastss  0x49d9(%rip),%ymm8        # 5558 <_sk_callback_avx+0x16d>
+  DB  196,98,125,24,5,240,73,0,0          ; vbroadcastss  0x49f0(%rip),%ymm8        # 56bc <_sk_callback_avx+0x181>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
@@ -5137,7 +5254,7 @@ _sk_clamp_1_avx LABEL PROC
 
 PUBLIC _sk_clamp_a_avx
 _sk_clamp_a_avx LABEL PROC
-  DB  196,98,125,24,5,188,73,0,0          ; vbroadcastss  0x49bc(%rip),%ymm8        # 555c <_sk_callback_avx+0x171>
+  DB  196,98,125,24,5,211,73,0,0          ; vbroadcastss  0x49d3(%rip),%ymm8        # 56c0 <_sk_callback_avx+0x185>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
   DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
@@ -5209,7 +5326,7 @@ PUBLIC _sk_unpremul_avx
 _sk_unpremul_avx LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
-  DB  196,98,125,24,21,4,73,0,0           ; vbroadcastss  0x4904(%rip),%ymm10        # 5560 <_sk_callback_avx+0x175>
+  DB  196,98,125,24,21,27,73,0,0          ; vbroadcastss  0x491b(%rip),%ymm10        # 56c4 <_sk_callback_avx+0x189>
   DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
   DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
@@ -5220,17 +5337,17 @@ _sk_unpremul_avx LABEL PROC
 
 PUBLIC _sk_from_srgb_avx
 _sk_from_srgb_avx LABEL PROC
-  DB  196,98,125,24,5,229,72,0,0          ; vbroadcastss  0x48e5(%rip),%ymm8        # 5564 <_sk_callback_avx+0x179>
+  DB  196,98,125,24,5,252,72,0,0          ; vbroadcastss  0x48fc(%rip),%ymm8        # 56c8 <_sk_callback_avx+0x18d>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
-  DB  196,98,125,24,29,215,72,0,0         ; vbroadcastss  0x48d7(%rip),%ymm11        # 5568 <_sk_callback_avx+0x17d>
+  DB  196,98,125,24,29,238,72,0,0         ; vbroadcastss  0x48ee(%rip),%ymm11        # 56cc <_sk_callback_avx+0x191>
   DB  196,65,124,89,227                   ; vmulps        %ymm11,%ymm0,%ymm12
-  DB  196,98,125,24,45,205,72,0,0         ; vbroadcastss  0x48cd(%rip),%ymm13        # 556c <_sk_callback_avx+0x181>
+  DB  196,98,125,24,45,228,72,0,0         ; vbroadcastss  0x48e4(%rip),%ymm13        # 56d0 <_sk_callback_avx+0x195>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,44,89,212                    ; vmulps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,190,72,0,0         ; vbroadcastss  0x48be(%rip),%ymm12        # 5570 <_sk_callback_avx+0x185>
+  DB  196,98,125,24,37,213,72,0,0         ; vbroadcastss  0x48d5(%rip),%ymm12        # 56d4 <_sk_callback_avx+0x199>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,53,180,72,0,0         ; vbroadcastss  0x48b4(%rip),%ymm14        # 5574 <_sk_callback_avx+0x189>
+  DB  196,98,125,24,53,203,72,0,0         ; vbroadcastss  0x48cb(%rip),%ymm14        # 56d8 <_sk_callback_avx+0x19d>
   DB  196,193,124,194,198,1               ; vcmpltps      %ymm14,%ymm0,%ymm0
   DB  196,195,45,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm10,%ymm0
   DB  196,65,116,89,200                   ; vmulps        %ymm8,%ymm1,%ymm9
@@ -5257,18 +5374,18 @@ _sk_to_srgb_avx LABEL PROC
   DB  197,124,82,192                      ; vrsqrtps      %ymm0,%ymm8
   DB  196,65,124,83,200                   ; vrcpps        %ymm8,%ymm9
   DB  196,65,124,82,208                   ; vrsqrtps      %ymm8,%ymm10
-  DB  196,98,125,24,5,63,72,0,0           ; vbroadcastss  0x483f(%rip),%ymm8        # 5578 <_sk_callback_avx+0x18d>
+  DB  196,98,125,24,5,86,72,0,0           ; vbroadcastss  0x4856(%rip),%ymm8        # 56dc <_sk_callback_avx+0x1a1>
   DB  196,65,124,89,216                   ; vmulps        %ymm8,%ymm0,%ymm11
-  DB  196,98,125,24,37,53,72,0,0          ; vbroadcastss  0x4835(%rip),%ymm12        # 557c <_sk_callback_avx+0x191>
+  DB  196,98,125,24,37,76,72,0,0          ; vbroadcastss  0x484c(%rip),%ymm12        # 56e0 <_sk_callback_avx+0x1a5>
   DB  196,65,52,89,204                    ; vmulps        %ymm12,%ymm9,%ymm9
-  DB  196,98,125,24,45,43,72,0,0          ; vbroadcastss  0x482b(%rip),%ymm13        # 5580 <_sk_callback_avx+0x195>
+  DB  196,98,125,24,45,66,72,0,0          ; vbroadcastss  0x4842(%rip),%ymm13        # 56e4 <_sk_callback_avx+0x1a9>
   DB  196,65,52,88,205                    ; vaddps        %ymm13,%ymm9,%ymm9
-  DB  196,98,125,24,53,33,72,0,0          ; vbroadcastss  0x4821(%rip),%ymm14        # 5584 <_sk_callback_avx+0x199>
+  DB  196,98,125,24,53,56,72,0,0          ; vbroadcastss  0x4838(%rip),%ymm14        # 56e8 <_sk_callback_avx+0x1ad>
   DB  196,65,44,89,214                    ; vmulps        %ymm14,%ymm10,%ymm10
   DB  196,65,44,88,201                    ; vaddps        %ymm9,%ymm10,%ymm9
-  DB  196,98,125,24,21,18,72,0,0          ; vbroadcastss  0x4812(%rip),%ymm10        # 5588 <_sk_callback_avx+0x19d>
+  DB  196,98,125,24,21,41,72,0,0          ; vbroadcastss  0x4829(%rip),%ymm10        # 56ec <_sk_callback_avx+0x1b1>
   DB  196,65,44,93,201                    ; vminps        %ymm9,%ymm10,%ymm9
-  DB  196,98,125,24,61,8,72,0,0           ; vbroadcastss  0x4808(%rip),%ymm15        # 558c <_sk_callback_avx+0x1a1>
+  DB  196,98,125,24,61,31,72,0,0          ; vbroadcastss  0x481f(%rip),%ymm15        # 56f0 <_sk_callback_avx+0x1b5>
   DB  196,193,124,194,199,1               ; vcmpltps      %ymm15,%ymm0,%ymm0
   DB  196,195,53,74,195,0                 ; vblendvps     %ymm0,%ymm11,%ymm9,%ymm0
   DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
@@ -5303,7 +5420,7 @@ _sk_rgb_to_hsl_avx LABEL PROC
   DB  197,124,93,201                      ; vminps        %ymm1,%ymm0,%ymm9
   DB  197,52,93,202                       ; vminps        %ymm2,%ymm9,%ymm9
   DB  196,65,60,92,209                    ; vsubps        %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,29,110,71,0,0         ; vbroadcastss  0x476e(%rip),%ymm11        # 5590 <_sk_callback_avx+0x1a5>
+  DB  196,98,125,24,29,133,71,0,0         ; vbroadcastss  0x4785(%rip),%ymm11        # 56f4 <_sk_callback_avx+0x1b9>
   DB  196,65,36,94,218                    ; vdivps        %ymm10,%ymm11,%ymm11
   DB  197,116,92,226                      ; vsubps        %ymm2,%ymm1,%ymm12
   DB  196,65,28,89,227                    ; vmulps        %ymm11,%ymm12,%ymm12
@@ -5313,19 +5430,19 @@ _sk_rgb_to_hsl_avx LABEL PROC
   DB  196,193,108,89,211                  ; vmulps        %ymm11,%ymm2,%ymm2
   DB  197,252,92,201                      ; vsubps        %ymm1,%ymm0,%ymm1
   DB  196,193,116,89,203                  ; vmulps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,71,71,0,0          ; vbroadcastss  0x4747(%rip),%ymm11        # 559c <_sk_callback_avx+0x1b1>
+  DB  196,98,125,24,29,94,71,0,0          ; vbroadcastss  0x475e(%rip),%ymm11        # 5700 <_sk_callback_avx+0x1c5>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,53,71,0,0          ; vbroadcastss  0x4735(%rip),%ymm11        # 5598 <_sk_callback_avx+0x1ad>
+  DB  196,98,125,24,29,76,71,0,0          ; vbroadcastss  0x474c(%rip),%ymm11        # 56fc <_sk_callback_avx+0x1c1>
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,227,117,74,202,224              ; vblendvps     %ymm14,%ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,29,71,0,0         ; vbroadcastss  0x471d(%rip),%ymm2        # 5594 <_sk_callback_avx+0x1a9>
+  DB  196,226,125,24,21,52,71,0,0         ; vbroadcastss  0x4734(%rip),%ymm2        # 56f8 <_sk_callback_avx+0x1bd>
   DB  196,65,12,87,246                    ; vxorps        %ymm14,%ymm14,%ymm14
   DB  196,227,13,74,210,208               ; vblendvps     %ymm13,%ymm2,%ymm14,%ymm2
   DB  197,188,194,192,0                   ; vcmpeqps      %ymm0,%ymm8,%ymm0
   DB  196,193,108,88,212                  ; vaddps        %ymm12,%ymm2,%ymm2
   DB  196,227,117,74,194,0                ; vblendvps     %ymm0,%ymm2,%ymm1,%ymm0
   DB  196,193,60,88,201                   ; vaddps        %ymm9,%ymm8,%ymm1
-  DB  196,98,125,24,37,4,71,0,0           ; vbroadcastss  0x4704(%rip),%ymm12        # 55a4 <_sk_callback_avx+0x1b9>
+  DB  196,98,125,24,37,27,71,0,0          ; vbroadcastss  0x471b(%rip),%ymm12        # 5708 <_sk_callback_avx+0x1cd>
   DB  196,193,116,89,212                  ; vmulps        %ymm12,%ymm1,%ymm2
   DB  197,28,194,226,1                    ; vcmpltps      %ymm2,%ymm12,%ymm12
   DB  196,65,36,92,216                    ; vsubps        %ymm8,%ymm11,%ymm11
@@ -5335,7 +5452,7 @@ _sk_rgb_to_hsl_avx LABEL PROC
   DB  197,172,94,201                      ; vdivps        %ymm1,%ymm10,%ymm1
   DB  196,195,125,74,198,128              ; vblendvps     %ymm8,%ymm14,%ymm0,%ymm0
   DB  196,195,117,74,206,128              ; vblendvps     %ymm8,%ymm14,%ymm1,%ymm1
-  DB  196,98,125,24,5,199,70,0,0          ; vbroadcastss  0x46c7(%rip),%ymm8        # 55a0 <_sk_callback_avx+0x1b5>
+  DB  196,98,125,24,5,222,70,0,0          ; vbroadcastss  0x46de(%rip),%ymm8        # 5704 <_sk_callback_avx+0x1c9>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -5350,7 +5467,7 @@ _sk_hsl_to_rgb_avx LABEL PROC
   DB  197,252,17,28,36                    ; vmovups       %ymm3,(%rsp)
   DB  197,252,40,225                      ; vmovaps       %ymm1,%ymm4
   DB  197,252,40,216                      ; vmovaps       %ymm0,%ymm3
-  DB  196,98,125,24,5,142,70,0,0          ; vbroadcastss  0x468e(%rip),%ymm8        # 55a8 <_sk_callback_avx+0x1bd>
+  DB  196,98,125,24,5,165,70,0,0          ; vbroadcastss  0x46a5(%rip),%ymm8        # 570c <_sk_callback_avx+0x1d1>
   DB  197,60,194,202,2                    ; vcmpleps      %ymm2,%ymm8,%ymm9
   DB  197,92,89,210                       ; vmulps        %ymm2,%ymm4,%ymm10
   DB  196,65,92,92,218                    ; vsubps        %ymm10,%ymm4,%ymm11
@@ -5358,23 +5475,23 @@ _sk_hsl_to_rgb_avx LABEL PROC
   DB  197,52,88,210                       ; vaddps        %ymm2,%ymm9,%ymm10
   DB  197,108,88,202                      ; vaddps        %ymm2,%ymm2,%ymm9
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,29,104,70,0,0         ; vbroadcastss  0x4668(%rip),%ymm11        # 55ac <_sk_callback_avx+0x1c1>
+  DB  196,98,125,24,29,127,70,0,0         ; vbroadcastss  0x467f(%rip),%ymm11        # 5710 <_sk_callback_avx+0x1d5>
   DB  196,65,100,88,219                   ; vaddps        %ymm11,%ymm3,%ymm11
   DB  196,67,125,8,227,1                  ; vroundps      $0x1,%ymm11,%ymm12
   DB  196,65,36,92,252                    ; vsubps        %ymm12,%ymm11,%ymm15
   DB  196,65,44,92,217                    ; vsubps        %ymm9,%ymm10,%ymm11
-  DB  196,98,125,24,37,82,70,0,0          ; vbroadcastss  0x4652(%rip),%ymm12        # 55b4 <_sk_callback_avx+0x1c9>
+  DB  196,98,125,24,37,105,70,0,0         ; vbroadcastss  0x4669(%rip),%ymm12        # 5718 <_sk_callback_avx+0x1dd>
   DB  196,193,4,89,196                    ; vmulps        %ymm12,%ymm15,%ymm0
-  DB  196,98,125,24,45,72,70,0,0          ; vbroadcastss  0x4648(%rip),%ymm13        # 55b8 <_sk_callback_avx+0x1cd>
+  DB  196,98,125,24,45,95,70,0,0          ; vbroadcastss  0x465f(%rip),%ymm13        # 571c <_sk_callback_avx+0x1e1>
   DB  197,20,92,240                       ; vsubps        %ymm0,%ymm13,%ymm14
   DB  196,65,36,89,246                    ; vmulps        %ymm14,%ymm11,%ymm14
   DB  196,65,52,88,246                    ; vaddps        %ymm14,%ymm9,%ymm14
-  DB  196,226,125,24,13,41,70,0,0         ; vbroadcastss  0x4629(%rip),%ymm1        # 55b0 <_sk_callback_avx+0x1c5>
+  DB  196,226,125,24,13,64,70,0,0         ; vbroadcastss  0x4640(%rip),%ymm1        # 5714 <_sk_callback_avx+0x1d9>
   DB  196,193,116,194,255,2               ; vcmpleps      %ymm15,%ymm1,%ymm7
   DB  196,195,13,74,249,112               ; vblendvps     %ymm7,%ymm9,%ymm14,%ymm7
   DB  196,65,60,194,247,2                 ; vcmpleps      %ymm15,%ymm8,%ymm14
   DB  196,227,45,74,255,224               ; vblendvps     %ymm14,%ymm7,%ymm10,%ymm7
-  DB  196,98,125,24,53,20,70,0,0          ; vbroadcastss  0x4614(%rip),%ymm14        # 55bc <_sk_callback_avx+0x1d1>
+  DB  196,98,125,24,53,43,70,0,0          ; vbroadcastss  0x462b(%rip),%ymm14        # 5720 <_sk_callback_avx+0x1e5>
   DB  196,65,12,194,255,2                 ; vcmpleps      %ymm15,%ymm14,%ymm15
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -5393,7 +5510,7 @@ _sk_hsl_to_rgb_avx LABEL PROC
   DB  197,164,89,247                      ; vmulps        %ymm7,%ymm11,%ymm6
   DB  197,180,88,246                      ; vaddps        %ymm6,%ymm9,%ymm6
   DB  196,227,77,74,237,0                 ; vblendvps     %ymm0,%ymm5,%ymm6,%ymm5
-  DB  196,226,125,24,5,182,69,0,0         ; vbroadcastss  0x45b6(%rip),%ymm0        # 55c0 <_sk_callback_avx+0x1d5>
+  DB  196,226,125,24,5,205,69,0,0         ; vbroadcastss  0x45cd(%rip),%ymm0        # 5724 <_sk_callback_avx+0x1e9>
   DB  197,228,88,192                      ; vaddps        %ymm0,%ymm3,%ymm0
   DB  196,227,125,8,216,1                 ; vroundps      $0x1,%ymm0,%ymm3
   DB  197,252,92,195                      ; vsubps        %ymm3,%ymm0,%ymm0
@@ -5441,14 +5558,14 @@ _sk_scale_u8_avx LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,68                              ; jne           1107 <_sk_scale_u8_avx+0x54>
+  DB  117,68                              ; jne           1254 <_sk_scale_u8_avx+0x54>
   DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
   DB  196,66,121,49,200                   ; vpmovzxbd     %xmm8,%xmm9
   DB  196,67,121,4,192,229                ; vpermilps     $0xe5,%xmm8,%xmm8
   DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
   DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,217,68,0,0         ; vbroadcastss  0x44d9(%rip),%ymm9        # 55c4 <_sk_callback_avx+0x1d9>
+  DB  196,98,125,24,13,240,68,0,0         ; vbroadcastss  0x44f0(%rip),%ymm9        # 5728 <_sk_callback_avx+0x1ed>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -5466,9 +5583,9 @@ _sk_scale_u8_avx LABEL PROC
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           110f <_sk_scale_u8_avx+0x5c>
+  DB  117,234                             ; jne           125c <_sk_scale_u8_avx+0x5c>
   DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
-  DB  235,155                             ; jmp           10c7 <_sk_scale_u8_avx+0x14>
+  DB  235,155                             ; jmp           1214 <_sk_scale_u8_avx+0x14>
 
 PUBLIC _sk_lerp_1_float_avx
 _sk_lerp_1_float_avx LABEL PROC
@@ -5496,14 +5613,14 @@ _sk_lerp_u8_avx LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,104                             ; jne           11e3 <_sk_lerp_u8_avx+0x78>
+  DB  117,104                             ; jne           1330 <_sk_lerp_u8_avx+0x78>
   DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
   DB  196,66,121,49,200                   ; vpmovzxbd     %xmm8,%xmm9
   DB  196,67,121,4,192,229                ; vpermilps     $0xe5,%xmm8,%xmm8
   DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
   DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,37,68,0,0          ; vbroadcastss  0x4425(%rip),%ymm9        # 55c8 <_sk_callback_avx+0x1dd>
+  DB  196,98,125,24,13,60,68,0,0          ; vbroadcastss  0x443c(%rip),%ymm9        # 572c <_sk_callback_avx+0x1f1>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
@@ -5529,35 +5646,35 @@ _sk_lerp_u8_avx LABEL PROC
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           11eb <_sk_lerp_u8_avx+0x80>
+  DB  117,234                             ; jne           1338 <_sk_lerp_u8_avx+0x80>
   DB  196,65,249,110,193                  ; vmovq         %r9,%xmm8
-  DB  233,116,255,255,255                 ; jmpq          117f <_sk_lerp_u8_avx+0x14>
+  DB  233,116,255,255,255                 ; jmpq          12cc <_sk_lerp_u8_avx+0x14>
 
 PUBLIC _sk_lerp_565_avx
 _sk_lerp_565_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,174,0,0,0                    ; jne           12c7 <_sk_lerp_565_avx+0xbc>
+  DB  15,133,174,0,0,0                    ; jne           1414 <_sk_lerp_565_avx+0xbc>
   DB  196,65,122,111,4,122                ; vmovdqu       (%r10,%rdi,2),%xmm8
   DB  197,225,239,219                     ; vpxor         %xmm3,%xmm3,%xmm3
   DB  197,185,105,219                     ; vpunpckhwd    %xmm3,%xmm8,%xmm3
   DB  196,66,121,51,192                   ; vpmovzxwd     %xmm8,%xmm8
   DB  196,227,61,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
-  DB  196,98,125,24,5,145,67,0,0          ; vbroadcastss  0x4391(%rip),%ymm8        # 55cc <_sk_callback_avx+0x1e1>
+  DB  196,98,125,24,5,168,67,0,0          ; vbroadcastss  0x43a8(%rip),%ymm8        # 5730 <_sk_callback_avx+0x1f5>
   DB  196,65,100,84,192                   ; vandps        %ymm8,%ymm3,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,130,67,0,0         ; vbroadcastss  0x4382(%rip),%ymm9        # 55d0 <_sk_callback_avx+0x1e5>
+  DB  196,98,125,24,13,153,67,0,0         ; vbroadcastss  0x4399(%rip),%ymm9        # 5734 <_sk_callback_avx+0x1f9>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,120,67,0,0         ; vbroadcastss  0x4378(%rip),%ymm9        # 55d4 <_sk_callback_avx+0x1e9>
+  DB  196,98,125,24,13,143,67,0,0         ; vbroadcastss  0x438f(%rip),%ymm9        # 5738 <_sk_callback_avx+0x1fd>
   DB  196,65,100,84,201                   ; vandps        %ymm9,%ymm3,%ymm9
   DB  196,65,124,91,201                   ; vcvtdq2ps     %ymm9,%ymm9
-  DB  196,98,125,24,21,105,67,0,0         ; vbroadcastss  0x4369(%rip),%ymm10        # 55d8 <_sk_callback_avx+0x1ed>
+  DB  196,98,125,24,21,128,67,0,0         ; vbroadcastss  0x4380(%rip),%ymm10        # 573c <_sk_callback_avx+0x201>
   DB  196,65,52,89,202                    ; vmulps        %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,95,67,0,0          ; vbroadcastss  0x435f(%rip),%ymm10        # 55dc <_sk_callback_avx+0x1f1>
+  DB  196,98,125,24,21,118,67,0,0         ; vbroadcastss  0x4376(%rip),%ymm10        # 5740 <_sk_callback_avx+0x205>
   DB  196,193,100,84,218                  ; vandps        %ymm10,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,21,81,67,0,0          ; vbroadcastss  0x4351(%rip),%ymm10        # 55e0 <_sk_callback_avx+0x1f5>
+  DB  196,98,125,24,21,104,67,0,0         ; vbroadcastss  0x4368(%rip),%ymm10        # 5744 <_sk_callback_avx+0x209>
   DB  196,193,100,89,218                  ; vmulps        %ymm10,%ymm3,%ymm3
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
@@ -5569,16 +5686,16 @@ _sk_lerp_565_avx LABEL PROC
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  197,236,88,214                      ; vaddps        %ymm6,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,31,67,0,0         ; vbroadcastss  0x431f(%rip),%ymm3        # 55e4 <_sk_callback_avx+0x1f9>
+  DB  196,226,125,24,29,54,67,0,0         ; vbroadcastss  0x4336(%rip),%ymm3        # 5748 <_sk_callback_avx+0x20d>
   DB  255,224                             ; jmpq          *%rax
   DB  65,137,200                          ; mov           %ecx,%r8d
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  196,65,57,239,192                   ; vpxor         %xmm8,%xmm8,%xmm8
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,63,255,255,255               ; ja            121f <_sk_lerp_565_avx+0x14>
+  DB  15,135,63,255,255,255               ; ja            136c <_sk_lerp_565_avx+0x14>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 1334 <_sk_lerp_565_avx+0x129>
+  DB  76,141,13,76,0,0,0                  ; lea           0x4c(%rip),%r9        # 1484 <_sk_lerp_565_avx+0x12c>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -5590,27 +5707,26 @@ _sk_lerp_565_avx LABEL PROC
   DB  196,65,57,196,68,122,4,2            ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm8,%xmm8
   DB  196,65,57,196,68,122,2,1            ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm8,%xmm8
   DB  196,65,57,196,4,122,0               ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm8,%xmm8
-  DB  233,235,254,255,255                 ; jmpq          121f <_sk_lerp_565_avx+0x14>
-  DB  244                                 ; hlt
+  DB  233,235,254,255,255                 ; jmpq          136c <_sk_lerp_565_avx+0x14>
+  DB  15,31,0                             ; nopl          (%rax)
+  DB  241                                 ; icebp
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  236                                 ; in            (%dx),%al
+  DB  233,255,255,255,225                 ; jmpq          ffffffffe200148c <_sk_callback_avx+0xffffffffe1ffbf51>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,228                             ; jmpq          *%rsp
   DB  255                                 ; (bad)
+  DB  217,255                             ; fcos
   DB  255                                 ; (bad)
+  DB  255,209                             ; callq         *%rcx
   DB  255                                 ; (bad)
-  DB  220,255                             ; fdivr         %st,%st(7)
-  DB  255                                 ; (bad)
-  DB  255,212                             ; callq         *%rsp
   DB  255                                 ; (bad)
+  DB  255,201                             ; dec           %ecx
   DB  255                                 ; (bad)
-  DB  255,204                             ; dec           %esp
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,192                             ; inc           %eax
+  DB  189                                 ; .byte         0xbd
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; .byte         0xff
@@ -5620,7 +5736,7 @@ _sk_load_tables_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,26,2,0,0                     ; jne           1578 <_sk_load_tables_avx+0x228>
+  DB  15,133,26,2,0,0                     ; jne           16c8 <_sk_load_tables_avx+0x228>
   DB  196,65,124,16,4,184                 ; vmovups       (%r8,%rdi,4),%ymm8
   DB  85                                  ; push          %rbp
   DB  65,87                               ; push          %r15
@@ -5628,7 +5744,7 @@ _sk_load_tables_avx LABEL PROC
   DB  65,85                               ; push          %r13
   DB  65,84                               ; push          %r12
   DB  83                                  ; push          %rbx
-  DB  197,124,40,13,74,69,0,0             ; vmovaps       0x454a(%rip),%ymm9        # 58c0 <_sk_callback_avx+0x4d5>
+  DB  197,124,40,13,90,69,0,0             ; vmovaps       0x455a(%rip),%ymm9        # 5a20 <_sk_callback_avx+0x4e5>
   DB  196,193,60,84,193                   ; vandps        %ymm9,%ymm8,%ymm0
   DB  196,193,249,126,193                 ; vmovq         %xmm0,%r9
   DB  69,137,203                          ; mov           %r9d,%r11d
@@ -5720,7 +5836,7 @@ _sk_load_tables_avx LABEL PROC
   DB  196,193,97,114,210,24               ; vpsrld        $0x18,%xmm10,%xmm3
   DB  196,227,61,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,131,64,0,0          ; vbroadcastss  0x4083(%rip),%ymm8        # 55e8 <_sk_callback_avx+0x1fd>
+  DB  196,98,125,24,5,151,64,0,0          ; vbroadcastss  0x4097(%rip),%ymm8        # 574c <_sk_callback_avx+0x211>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -5735,9 +5851,9 @@ _sk_load_tables_avx LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  65,254,201                          ; dec           %r9b
   DB  65,128,249,6                        ; cmp           $0x6,%r9b
-  DB  15,135,211,253,255,255              ; ja            1364 <_sk_load_tables_avx+0x14>
+  DB  15,135,211,253,255,255              ; ja            14b4 <_sk_load_tables_avx+0x14>
   DB  69,15,182,201                       ; movzbl        %r9b,%r9d
-  DB  76,141,21,140,0,0,0                 ; lea           0x8c(%rip),%r10        # 1628 <_sk_load_tables_avx+0x2d8>
+  DB  76,141,21,140,0,0,0                 ; lea           0x8c(%rip),%r10        # 1778 <_sk_load_tables_avx+0x2d8>
   DB  79,99,12,138                        ; movslq        (%r10,%r9,4),%r9
   DB  77,1,209                            ; add           %r10,%r9
   DB  65,255,225                          ; jmpq          *%r9
@@ -5760,7 +5876,7 @@ _sk_load_tables_avx LABEL PROC
   DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
   DB  196,195,57,34,4,184,0               ; vpinsrd       $0x0,(%r8,%rdi,4),%xmm8,%xmm0
   DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
-  DB  233,62,253,255,255                  ; jmpq          1364 <_sk_load_tables_avx+0x14>
+  DB  233,62,253,255,255                  ; jmpq          14b4 <_sk_load_tables_avx+0x14>
   DB  102,144                             ; xchg          %ax,%ax
   DB  236                                 ; in            (%dx),%al
   DB  255                                 ; (bad)
@@ -5778,7 +5894,7 @@ _sk_load_tables_avx LABEL PROC
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  126,255                             ; jle           1641 <_sk_load_tables_avx+0x2f1>
+  DB  126,255                             ; jle           1791 <_sk_load_tables_avx+0x2f1>
   DB  255                                 ; (bad)
   DB  255                                 ; .byte         0xff
 
@@ -5788,7 +5904,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,113,2,0,0                    ; jne           18cb <_sk_load_tables_u16_be_avx+0x287>
+  DB  15,133,113,2,0,0                    ; jne           1a1b <_sk_load_tables_u16_be_avx+0x287>
   DB  196,1,121,16,4,72                   ; vmovupd       (%r8,%r9,2),%xmm8
   DB  196,129,121,16,84,72,16             ; vmovupd       0x10(%r8,%r9,2),%xmm2
   DB  196,129,121,16,92,72,32             ; vmovupd       0x20(%r8,%r9,2),%xmm3
@@ -5810,7 +5926,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
   DB  197,177,108,208                     ; vpunpcklqdq   %xmm0,%xmm9,%xmm2
   DB  197,177,109,200                     ; vpunpckhqdq   %xmm0,%xmm9,%xmm1
   DB  196,65,57,108,212                   ; vpunpcklqdq   %xmm12,%xmm8,%xmm10
-  DB  197,121,111,29,138,66,0,0           ; vmovdqa       0x428a(%rip),%xmm11        # 5940 <_sk_callback_avx+0x555>
+  DB  197,121,111,29,154,66,0,0           ; vmovdqa       0x429a(%rip),%xmm11        # 5aa0 <_sk_callback_avx+0x565>
   DB  196,193,105,219,195                 ; vpand         %xmm11,%xmm2,%xmm0
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  196,193,121,105,209                 ; vpunpckhwd    %xmm9,%xmm0,%xmm2
@@ -5909,7 +6025,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
   DB  196,226,121,51,219                  ; vpmovzxwd     %xmm3,%xmm3
   DB  196,195,101,24,216,1                ; vinsertf128   $0x1,%xmm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,52,61,0,0           ; vbroadcastss  0x3d34(%rip),%ymm8        # 55ec <_sk_callback_avx+0x201>
+  DB  196,98,125,24,5,72,61,0,0           ; vbroadcastss  0x3d48(%rip),%ymm8        # 5750 <_sk_callback_avx+0x215>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -5922,29 +6038,29 @@ _sk_load_tables_u16_be_avx LABEL PROC
   DB  196,1,123,16,4,72                   ; vmovsd        (%r8,%r9,2),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,85                              ; je            1931 <_sk_load_tables_u16_be_avx+0x2ed>
+  DB  116,85                              ; je            1a81 <_sk_load_tables_u16_be_avx+0x2ed>
   DB  196,1,57,22,68,72,8                 ; vmovhpd       0x8(%r8,%r9,2),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,72                              ; jb            1931 <_sk_load_tables_u16_be_avx+0x2ed>
+  DB  114,72                              ; jb            1a81 <_sk_load_tables_u16_be_avx+0x2ed>
   DB  196,129,123,16,84,72,16             ; vmovsd        0x10(%r8,%r9,2),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,72                              ; je            193e <_sk_load_tables_u16_be_avx+0x2fa>
+  DB  116,72                              ; je            1a8e <_sk_load_tables_u16_be_avx+0x2fa>
   DB  196,129,105,22,84,72,24             ; vmovhpd       0x18(%r8,%r9,2),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,59                              ; jb            193e <_sk_load_tables_u16_be_avx+0x2fa>
+  DB  114,59                              ; jb            1a8e <_sk_load_tables_u16_be_avx+0x2fa>
   DB  196,129,123,16,92,72,32             ; vmovsd        0x20(%r8,%r9,2),%xmm3
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,97,253,255,255               ; je            1675 <_sk_load_tables_u16_be_avx+0x31>
+  DB  15,132,97,253,255,255               ; je            17c5 <_sk_load_tables_u16_be_avx+0x31>
   DB  196,129,97,22,92,72,40              ; vmovhpd       0x28(%r8,%r9,2),%xmm3,%xmm3
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,80,253,255,255               ; jb            1675 <_sk_load_tables_u16_be_avx+0x31>
+  DB  15,130,80,253,255,255               ; jb            17c5 <_sk_load_tables_u16_be_avx+0x31>
   DB  196,1,122,126,76,72,48              ; vmovq         0x30(%r8,%r9,2),%xmm9
-  DB  233,68,253,255,255                  ; jmpq          1675 <_sk_load_tables_u16_be_avx+0x31>
+  DB  233,68,253,255,255                  ; jmpq          17c5 <_sk_load_tables_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,55,253,255,255                  ; jmpq          1675 <_sk_load_tables_u16_be_avx+0x31>
+  DB  233,55,253,255,255                  ; jmpq          17c5 <_sk_load_tables_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
-  DB  233,46,253,255,255                  ; jmpq          1675 <_sk_load_tables_u16_be_avx+0x31>
+  DB  233,46,253,255,255                  ; jmpq          17c5 <_sk_load_tables_u16_be_avx+0x31>
 
 PUBLIC _sk_load_tables_rgb_u16_be_avx
 _sk_load_tables_rgb_u16_be_avx LABEL PROC
@@ -5952,7 +6068,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,127                       ; lea           (%rdi,%rdi,2),%r9
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,93,2,0,0                     ; jne           1bb6 <_sk_load_tables_rgb_u16_be_avx+0x26f>
+  DB  15,133,93,2,0,0                     ; jne           1d06 <_sk_load_tables_rgb_u16_be_avx+0x26f>
   DB  196,129,122,111,4,72                ; vmovdqu       (%r8,%r9,2),%xmm0
   DB  196,129,122,111,84,72,12            ; vmovdqu       0xc(%r8,%r9,2),%xmm2
   DB  196,129,122,111,76,72,24            ; vmovdqu       0x18(%r8,%r9,2),%xmm1
@@ -5979,7 +6095,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
   DB  197,185,108,202                     ; vpunpcklqdq   %xmm2,%xmm8,%xmm1
   DB  197,185,109,210                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm2
   DB  197,121,108,195                     ; vpunpcklqdq   %xmm3,%xmm0,%xmm8
-  DB  197,121,111,13,131,63,0,0           ; vmovdqa       0x3f83(%rip),%xmm9        # 5950 <_sk_callback_avx+0x565>
+  DB  197,121,111,13,147,63,0,0           ; vmovdqa       0x3f93(%rip),%xmm9        # 5ab0 <_sk_callback_avx+0x575>
   DB  196,193,113,219,193                 ; vpand         %xmm9,%xmm1,%xmm0
   DB  196,65,41,239,210                   ; vpxor         %xmm10,%xmm10,%xmm10
   DB  196,193,121,105,202                 ; vpunpckhwd    %xmm10,%xmm0,%xmm1
@@ -6071,7 +6187,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
   DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   DB  196,195,109,24,208,1                ; vinsertf128   $0x1,%xmm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,70,58,0,0         ; vbroadcastss  0x3a46(%rip),%ymm3        # 55f0 <_sk_callback_avx+0x205>
+  DB  196,226,125,24,29,90,58,0,0         ; vbroadcastss  0x3a5a(%rip),%ymm3        # 5754 <_sk_callback_avx+0x219>
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,93                               ; pop           %r13
@@ -6082,36 +6198,36 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
   DB  196,129,121,110,4,72                ; vmovd         (%r8,%r9,2),%xmm0
   DB  196,129,121,196,68,72,4,2           ; vpinsrw       $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  117,5                               ; jne           1bcf <_sk_load_tables_rgb_u16_be_avx+0x288>
-  DB  233,190,253,255,255                 ; jmpq          198d <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  117,5                               ; jne           1d1f <_sk_load_tables_rgb_u16_be_avx+0x288>
+  DB  233,190,253,255,255                 ; jmpq          1add <_sk_load_tables_rgb_u16_be_avx+0x46>
   DB  196,129,121,110,76,72,6             ; vmovd         0x6(%r8,%r9,2),%xmm1
   DB  196,1,113,196,68,72,10,2            ; vpinsrw       $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,26                              ; jb            1bfe <_sk_load_tables_rgb_u16_be_avx+0x2b7>
+  DB  114,26                              ; jb            1d4e <_sk_load_tables_rgb_u16_be_avx+0x2b7>
   DB  196,129,121,110,76,72,12            ; vmovd         0xc(%r8,%r9,2),%xmm1
   DB  196,129,113,196,84,72,16,2          ; vpinsrw       $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  117,10                              ; jne           1c03 <_sk_load_tables_rgb_u16_be_avx+0x2bc>
-  DB  233,143,253,255,255                 ; jmpq          198d <_sk_load_tables_rgb_u16_be_avx+0x46>
-  DB  233,138,253,255,255                 ; jmpq          198d <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           1d53 <_sk_load_tables_rgb_u16_be_avx+0x2bc>
+  DB  233,143,253,255,255                 ; jmpq          1add <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,138,253,255,255                 ; jmpq          1add <_sk_load_tables_rgb_u16_be_avx+0x46>
   DB  196,129,121,110,76,72,18            ; vmovd         0x12(%r8,%r9,2),%xmm1
   DB  196,1,113,196,76,72,22,2            ; vpinsrw       $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,26                              ; jb            1c32 <_sk_load_tables_rgb_u16_be_avx+0x2eb>
+  DB  114,26                              ; jb            1d82 <_sk_load_tables_rgb_u16_be_avx+0x2eb>
   DB  196,129,121,110,76,72,24            ; vmovd         0x18(%r8,%r9,2),%xmm1
   DB  196,129,113,196,76,72,28,2          ; vpinsrw       $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  117,10                              ; jne           1c37 <_sk_load_tables_rgb_u16_be_avx+0x2f0>
-  DB  233,91,253,255,255                  ; jmpq          198d <_sk_load_tables_rgb_u16_be_avx+0x46>
-  DB  233,86,253,255,255                  ; jmpq          198d <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           1d87 <_sk_load_tables_rgb_u16_be_avx+0x2f0>
+  DB  233,91,253,255,255                  ; jmpq          1add <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,86,253,255,255                  ; jmpq          1add <_sk_load_tables_rgb_u16_be_avx+0x46>
   DB  196,129,121,110,92,72,30            ; vmovd         0x1e(%r8,%r9,2),%xmm3
   DB  196,1,97,196,92,72,34,2             ; vpinsrw       $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,20                              ; jb            1c60 <_sk_load_tables_rgb_u16_be_avx+0x319>
+  DB  114,20                              ; jb            1db0 <_sk_load_tables_rgb_u16_be_avx+0x319>
   DB  196,129,121,110,92,72,36            ; vmovd         0x24(%r8,%r9,2),%xmm3
   DB  196,129,97,196,92,72,40,2           ; vpinsrw       $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
-  DB  233,45,253,255,255                  ; jmpq          198d <_sk_load_tables_rgb_u16_be_avx+0x46>
-  DB  233,40,253,255,255                  ; jmpq          198d <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,45,253,255,255                  ; jmpq          1add <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,40,253,255,255                  ; jmpq          1add <_sk_load_tables_rgb_u16_be_avx+0x46>
 
 PUBLIC _sk_byte_tables_avx
 _sk_byte_tables_avx LABEL PROC
@@ -6122,7 +6238,7 @@ _sk_byte_tables_avx LABEL PROC
   DB  65,84                               ; push          %r12
   DB  83                                  ; push          %rbx
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,122,57,0,0          ; vbroadcastss  0x397a(%rip),%ymm8        # 55f4 <_sk_callback_avx+0x209>
+  DB  196,98,125,24,5,142,57,0,0          ; vbroadcastss  0x398e(%rip),%ymm8        # 5758 <_sk_callback_avx+0x21d>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,195,249,22,192,1                ; vpextrq       $0x1,%xmm0,%r8
@@ -6159,7 +6275,7 @@ _sk_byte_tables_avx LABEL PROC
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,53,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,200,56,0,0         ; vbroadcastss  0x38c8(%rip),%ymm9        # 55f8 <_sk_callback_avx+0x20d>
+  DB  196,98,125,24,13,220,56,0,0         ; vbroadcastss  0x38dc(%rip),%ymm9        # 575c <_sk_callback_avx+0x221>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -6319,7 +6435,7 @@ _sk_byte_tables_rgb_avx LABEL PROC
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,53,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,238,53,0,0         ; vbroadcastss  0x35ee(%rip),%ymm9        # 55fc <_sk_callback_avx+0x211>
+  DB  196,98,125,24,13,2,54,0,0           ; vbroadcastss  0x3602(%rip),%ymm9        # 5760 <_sk_callback_avx+0x225>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -6606,36 +6722,36 @@ _sk_parametric_r_avx LABEL PROC
   DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,216                      ; vcvtdq2ps     %ymm0,%ymm11
-  DB  196,98,125,24,37,76,49,0,0          ; vbroadcastss  0x314c(%rip),%ymm12        # 5600 <_sk_callback_avx+0x215>
+  DB  196,98,125,24,37,96,49,0,0          ; vbroadcastss  0x3160(%rip),%ymm12        # 5764 <_sk_callback_avx+0x229>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,66,49,0,0          ; vbroadcastss  0x3142(%rip),%ymm12        # 5604 <_sk_callback_avx+0x219>
+  DB  196,98,125,24,37,86,49,0,0          ; vbroadcastss  0x3156(%rip),%ymm12        # 5768 <_sk_callback_avx+0x22d>
   DB  196,193,124,84,196                  ; vandps        %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,56,49,0,0          ; vbroadcastss  0x3138(%rip),%ymm12        # 5608 <_sk_callback_avx+0x21d>
+  DB  196,98,125,24,37,76,49,0,0          ; vbroadcastss  0x314c(%rip),%ymm12        # 576c <_sk_callback_avx+0x231>
   DB  196,193,124,86,196                  ; vorps         %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,46,49,0,0          ; vbroadcastss  0x312e(%rip),%ymm12        # 560c <_sk_callback_avx+0x221>
+  DB  196,98,125,24,37,66,49,0,0          ; vbroadcastss  0x3142(%rip),%ymm12        # 5770 <_sk_callback_avx+0x235>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,36,49,0,0          ; vbroadcastss  0x3124(%rip),%ymm12        # 5610 <_sk_callback_avx+0x225>
+  DB  196,98,125,24,37,56,49,0,0          ; vbroadcastss  0x3138(%rip),%ymm12        # 5774 <_sk_callback_avx+0x239>
   DB  196,65,124,89,228                   ; vmulps        %ymm12,%ymm0,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,21,49,0,0          ; vbroadcastss  0x3115(%rip),%ymm12        # 5614 <_sk_callback_avx+0x229>
+  DB  196,98,125,24,37,41,49,0,0          ; vbroadcastss  0x3129(%rip),%ymm12        # 5778 <_sk_callback_avx+0x23d>
   DB  196,193,124,88,196                  ; vaddps        %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,11,49,0,0          ; vbroadcastss  0x310b(%rip),%ymm12        # 5618 <_sk_callback_avx+0x22d>
+  DB  196,98,125,24,37,31,49,0,0          ; vbroadcastss  0x311f(%rip),%ymm12        # 577c <_sk_callback_avx+0x241>
   DB  197,156,94,192                      ; vdivps        %ymm0,%ymm12,%ymm0
   DB  197,164,92,192                      ; vsubps        %ymm0,%ymm11,%ymm0
   DB  197,172,89,192                      ; vmulps        %ymm0,%ymm10,%ymm0
   DB  196,99,125,8,208,1                  ; vroundps      $0x1,%ymm0,%ymm10
   DB  196,65,124,92,210                   ; vsubps        %ymm10,%ymm0,%ymm10
-  DB  196,98,125,24,29,239,48,0,0         ; vbroadcastss  0x30ef(%rip),%ymm11        # 561c <_sk_callback_avx+0x231>
+  DB  196,98,125,24,29,3,49,0,0           ; vbroadcastss  0x3103(%rip),%ymm11        # 5780 <_sk_callback_avx+0x245>
   DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
-  DB  196,98,125,24,29,229,48,0,0         ; vbroadcastss  0x30e5(%rip),%ymm11        # 5620 <_sk_callback_avx+0x235>
+  DB  196,98,125,24,29,249,48,0,0         ; vbroadcastss  0x30f9(%rip),%ymm11        # 5784 <_sk_callback_avx+0x249>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,124,92,195                  ; vsubps        %ymm11,%ymm0,%ymm0
-  DB  196,98,125,24,29,214,48,0,0         ; vbroadcastss  0x30d6(%rip),%ymm11        # 5624 <_sk_callback_avx+0x239>
+  DB  196,98,125,24,29,234,48,0,0         ; vbroadcastss  0x30ea(%rip),%ymm11        # 5788 <_sk_callback_avx+0x24d>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,204,48,0,0         ; vbroadcastss  0x30cc(%rip),%ymm11        # 5628 <_sk_callback_avx+0x23d>
+  DB  196,98,125,24,29,224,48,0,0         ; vbroadcastss  0x30e0(%rip),%ymm11        # 578c <_sk_callback_avx+0x251>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,124,88,194                  ; vaddps        %ymm10,%ymm0,%ymm0
-  DB  196,98,125,24,21,189,48,0,0         ; vbroadcastss  0x30bd(%rip),%ymm10        # 562c <_sk_callback_avx+0x241>
+  DB  196,98,125,24,21,209,48,0,0         ; vbroadcastss  0x30d1(%rip),%ymm10        # 5790 <_sk_callback_avx+0x255>
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -6643,7 +6759,7 @@ _sk_parametric_r_avx LABEL PROC
   DB  196,195,125,74,193,128              ; vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,148,48,0,0          ; vbroadcastss  0x3094(%rip),%ymm8        # 5630 <_sk_callback_avx+0x245>
+  DB  196,98,125,24,5,168,48,0,0          ; vbroadcastss  0x30a8(%rip),%ymm8        # 5794 <_sk_callback_avx+0x259>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -6663,36 +6779,36 @@ _sk_parametric_g_avx LABEL PROC
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,217                      ; vcvtdq2ps     %ymm1,%ymm11
-  DB  196,98,125,24,37,69,48,0,0          ; vbroadcastss  0x3045(%rip),%ymm12        # 5634 <_sk_callback_avx+0x249>
+  DB  196,98,125,24,37,89,48,0,0          ; vbroadcastss  0x3059(%rip),%ymm12        # 5798 <_sk_callback_avx+0x25d>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,59,48,0,0          ; vbroadcastss  0x303b(%rip),%ymm12        # 5638 <_sk_callback_avx+0x24d>
+  DB  196,98,125,24,37,79,48,0,0          ; vbroadcastss  0x304f(%rip),%ymm12        # 579c <_sk_callback_avx+0x261>
   DB  196,193,116,84,204                  ; vandps        %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,49,48,0,0          ; vbroadcastss  0x3031(%rip),%ymm12        # 563c <_sk_callback_avx+0x251>
+  DB  196,98,125,24,37,69,48,0,0          ; vbroadcastss  0x3045(%rip),%ymm12        # 57a0 <_sk_callback_avx+0x265>
   DB  196,193,116,86,204                  ; vorps         %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,39,48,0,0          ; vbroadcastss  0x3027(%rip),%ymm12        # 5640 <_sk_callback_avx+0x255>
+  DB  196,98,125,24,37,59,48,0,0          ; vbroadcastss  0x303b(%rip),%ymm12        # 57a4 <_sk_callback_avx+0x269>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,29,48,0,0          ; vbroadcastss  0x301d(%rip),%ymm12        # 5644 <_sk_callback_avx+0x259>
+  DB  196,98,125,24,37,49,48,0,0          ; vbroadcastss  0x3031(%rip),%ymm12        # 57a8 <_sk_callback_avx+0x26d>
   DB  196,65,116,89,228                   ; vmulps        %ymm12,%ymm1,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,14,48,0,0          ; vbroadcastss  0x300e(%rip),%ymm12        # 5648 <_sk_callback_avx+0x25d>
+  DB  196,98,125,24,37,34,48,0,0          ; vbroadcastss  0x3022(%rip),%ymm12        # 57ac <_sk_callback_avx+0x271>
   DB  196,193,116,88,204                  ; vaddps        %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,4,48,0,0           ; vbroadcastss  0x3004(%rip),%ymm12        # 564c <_sk_callback_avx+0x261>
+  DB  196,98,125,24,37,24,48,0,0          ; vbroadcastss  0x3018(%rip),%ymm12        # 57b0 <_sk_callback_avx+0x275>
   DB  197,156,94,201                      ; vdivps        %ymm1,%ymm12,%ymm1
   DB  197,164,92,201                      ; vsubps        %ymm1,%ymm11,%ymm1
   DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
   DB  196,99,125,8,209,1                  ; vroundps      $0x1,%ymm1,%ymm10
   DB  196,65,116,92,210                   ; vsubps        %ymm10,%ymm1,%ymm10
-  DB  196,98,125,24,29,232,47,0,0         ; vbroadcastss  0x2fe8(%rip),%ymm11        # 5650 <_sk_callback_avx+0x265>
+  DB  196,98,125,24,29,252,47,0,0         ; vbroadcastss  0x2ffc(%rip),%ymm11        # 57b4 <_sk_callback_avx+0x279>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,222,47,0,0         ; vbroadcastss  0x2fde(%rip),%ymm11        # 5654 <_sk_callback_avx+0x269>
+  DB  196,98,125,24,29,242,47,0,0         ; vbroadcastss  0x2ff2(%rip),%ymm11        # 57b8 <_sk_callback_avx+0x27d>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,116,92,203                  ; vsubps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,207,47,0,0         ; vbroadcastss  0x2fcf(%rip),%ymm11        # 5658 <_sk_callback_avx+0x26d>
+  DB  196,98,125,24,29,227,47,0,0         ; vbroadcastss  0x2fe3(%rip),%ymm11        # 57bc <_sk_callback_avx+0x281>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,197,47,0,0         ; vbroadcastss  0x2fc5(%rip),%ymm11        # 565c <_sk_callback_avx+0x271>
+  DB  196,98,125,24,29,217,47,0,0         ; vbroadcastss  0x2fd9(%rip),%ymm11        # 57c0 <_sk_callback_avx+0x285>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,116,88,202                  ; vaddps        %ymm10,%ymm1,%ymm1
-  DB  196,98,125,24,21,182,47,0,0         ; vbroadcastss  0x2fb6(%rip),%ymm10        # 5660 <_sk_callback_avx+0x275>
+  DB  196,98,125,24,21,202,47,0,0         ; vbroadcastss  0x2fca(%rip),%ymm10        # 57c4 <_sk_callback_avx+0x289>
   DB  196,193,116,89,202                  ; vmulps        %ymm10,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -6700,7 +6816,7 @@ _sk_parametric_g_avx LABEL PROC
   DB  196,195,117,74,201,128              ; vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
-  DB  196,98,125,24,5,141,47,0,0          ; vbroadcastss  0x2f8d(%rip),%ymm8        # 5664 <_sk_callback_avx+0x279>
+  DB  196,98,125,24,5,161,47,0,0          ; vbroadcastss  0x2fa1(%rip),%ymm8        # 57c8 <_sk_callback_avx+0x28d>
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -6720,36 +6836,36 @@ _sk_parametric_b_avx LABEL PROC
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,218                      ; vcvtdq2ps     %ymm2,%ymm11
-  DB  196,98,125,24,37,62,47,0,0          ; vbroadcastss  0x2f3e(%rip),%ymm12        # 5668 <_sk_callback_avx+0x27d>
+  DB  196,98,125,24,37,82,47,0,0          ; vbroadcastss  0x2f52(%rip),%ymm12        # 57cc <_sk_callback_avx+0x291>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,52,47,0,0          ; vbroadcastss  0x2f34(%rip),%ymm12        # 566c <_sk_callback_avx+0x281>
+  DB  196,98,125,24,37,72,47,0,0          ; vbroadcastss  0x2f48(%rip),%ymm12        # 57d0 <_sk_callback_avx+0x295>
   DB  196,193,108,84,212                  ; vandps        %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,42,47,0,0          ; vbroadcastss  0x2f2a(%rip),%ymm12        # 5670 <_sk_callback_avx+0x285>
+  DB  196,98,125,24,37,62,47,0,0          ; vbroadcastss  0x2f3e(%rip),%ymm12        # 57d4 <_sk_callback_avx+0x299>
   DB  196,193,108,86,212                  ; vorps         %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,32,47,0,0          ; vbroadcastss  0x2f20(%rip),%ymm12        # 5674 <_sk_callback_avx+0x289>
+  DB  196,98,125,24,37,52,47,0,0          ; vbroadcastss  0x2f34(%rip),%ymm12        # 57d8 <_sk_callback_avx+0x29d>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,22,47,0,0          ; vbroadcastss  0x2f16(%rip),%ymm12        # 5678 <_sk_callback_avx+0x28d>
+  DB  196,98,125,24,37,42,47,0,0          ; vbroadcastss  0x2f2a(%rip),%ymm12        # 57dc <_sk_callback_avx+0x2a1>
   DB  196,65,108,89,228                   ; vmulps        %ymm12,%ymm2,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,7,47,0,0           ; vbroadcastss  0x2f07(%rip),%ymm12        # 567c <_sk_callback_avx+0x291>
+  DB  196,98,125,24,37,27,47,0,0          ; vbroadcastss  0x2f1b(%rip),%ymm12        # 57e0 <_sk_callback_avx+0x2a5>
   DB  196,193,108,88,212                  ; vaddps        %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,253,46,0,0         ; vbroadcastss  0x2efd(%rip),%ymm12        # 5680 <_sk_callback_avx+0x295>
+  DB  196,98,125,24,37,17,47,0,0          ; vbroadcastss  0x2f11(%rip),%ymm12        # 57e4 <_sk_callback_avx+0x2a9>
   DB  197,156,94,210                      ; vdivps        %ymm2,%ymm12,%ymm2
   DB  197,164,92,210                      ; vsubps        %ymm2,%ymm11,%ymm2
   DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
   DB  196,99,125,8,210,1                  ; vroundps      $0x1,%ymm2,%ymm10
   DB  196,65,108,92,210                   ; vsubps        %ymm10,%ymm2,%ymm10
-  DB  196,98,125,24,29,225,46,0,0         ; vbroadcastss  0x2ee1(%rip),%ymm11        # 5684 <_sk_callback_avx+0x299>
+  DB  196,98,125,24,29,245,46,0,0         ; vbroadcastss  0x2ef5(%rip),%ymm11        # 57e8 <_sk_callback_avx+0x2ad>
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
-  DB  196,98,125,24,29,215,46,0,0         ; vbroadcastss  0x2ed7(%rip),%ymm11        # 5688 <_sk_callback_avx+0x29d>
+  DB  196,98,125,24,29,235,46,0,0         ; vbroadcastss  0x2eeb(%rip),%ymm11        # 57ec <_sk_callback_avx+0x2b1>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,108,92,211                  ; vsubps        %ymm11,%ymm2,%ymm2
-  DB  196,98,125,24,29,200,46,0,0         ; vbroadcastss  0x2ec8(%rip),%ymm11        # 568c <_sk_callback_avx+0x2a1>
+  DB  196,98,125,24,29,220,46,0,0         ; vbroadcastss  0x2edc(%rip),%ymm11        # 57f0 <_sk_callback_avx+0x2b5>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,190,46,0,0         ; vbroadcastss  0x2ebe(%rip),%ymm11        # 5690 <_sk_callback_avx+0x2a5>
+  DB  196,98,125,24,29,210,46,0,0         ; vbroadcastss  0x2ed2(%rip),%ymm11        # 57f4 <_sk_callback_avx+0x2b9>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,108,88,210                  ; vaddps        %ymm10,%ymm2,%ymm2
-  DB  196,98,125,24,21,175,46,0,0         ; vbroadcastss  0x2eaf(%rip),%ymm10        # 5694 <_sk_callback_avx+0x2a9>
+  DB  196,98,125,24,21,195,46,0,0         ; vbroadcastss  0x2ec3(%rip),%ymm10        # 57f8 <_sk_callback_avx+0x2bd>
   DB  196,193,108,89,210                  ; vmulps        %ymm10,%ymm2,%ymm2
   DB  197,253,91,210                      ; vcvtps2dq     %ymm2,%ymm2
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -6757,7 +6873,7 @@ _sk_parametric_b_avx LABEL PROC
   DB  196,195,109,74,209,128              ; vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,134,46,0,0          ; vbroadcastss  0x2e86(%rip),%ymm8        # 5698 <_sk_callback_avx+0x2ad>
+  DB  196,98,125,24,5,154,46,0,0          ; vbroadcastss  0x2e9a(%rip),%ymm8        # 57fc <_sk_callback_avx+0x2c1>
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -6777,36 +6893,36 @@ _sk_parametric_a_avx LABEL PROC
   DB  196,193,100,88,219                  ; vaddps        %ymm11,%ymm3,%ymm3
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,219                      ; vcvtdq2ps     %ymm3,%ymm11
-  DB  196,98,125,24,37,55,46,0,0          ; vbroadcastss  0x2e37(%rip),%ymm12        # 569c <_sk_callback_avx+0x2b1>
+  DB  196,98,125,24,37,75,46,0,0          ; vbroadcastss  0x2e4b(%rip),%ymm12        # 5800 <_sk_callback_avx+0x2c5>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,45,46,0,0          ; vbroadcastss  0x2e2d(%rip),%ymm12        # 56a0 <_sk_callback_avx+0x2b5>
+  DB  196,98,125,24,37,65,46,0,0          ; vbroadcastss  0x2e41(%rip),%ymm12        # 5804 <_sk_callback_avx+0x2c9>
   DB  196,193,100,84,220                  ; vandps        %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,35,46,0,0          ; vbroadcastss  0x2e23(%rip),%ymm12        # 56a4 <_sk_callback_avx+0x2b9>
+  DB  196,98,125,24,37,55,46,0,0          ; vbroadcastss  0x2e37(%rip),%ymm12        # 5808 <_sk_callback_avx+0x2cd>
   DB  196,193,100,86,220                  ; vorps         %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,25,46,0,0          ; vbroadcastss  0x2e19(%rip),%ymm12        # 56a8 <_sk_callback_avx+0x2bd>
+  DB  196,98,125,24,37,45,46,0,0          ; vbroadcastss  0x2e2d(%rip),%ymm12        # 580c <_sk_callback_avx+0x2d1>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,15,46,0,0          ; vbroadcastss  0x2e0f(%rip),%ymm12        # 56ac <_sk_callback_avx+0x2c1>
+  DB  196,98,125,24,37,35,46,0,0          ; vbroadcastss  0x2e23(%rip),%ymm12        # 5810 <_sk_callback_avx+0x2d5>
   DB  196,65,100,89,228                   ; vmulps        %ymm12,%ymm3,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,0,46,0,0           ; vbroadcastss  0x2e00(%rip),%ymm12        # 56b0 <_sk_callback_avx+0x2c5>
+  DB  196,98,125,24,37,20,46,0,0          ; vbroadcastss  0x2e14(%rip),%ymm12        # 5814 <_sk_callback_avx+0x2d9>
   DB  196,193,100,88,220                  ; vaddps        %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,246,45,0,0         ; vbroadcastss  0x2df6(%rip),%ymm12        # 56b4 <_sk_callback_avx+0x2c9>
+  DB  196,98,125,24,37,10,46,0,0          ; vbroadcastss  0x2e0a(%rip),%ymm12        # 5818 <_sk_callback_avx+0x2dd>
   DB  197,156,94,219                      ; vdivps        %ymm3,%ymm12,%ymm3
   DB  197,164,92,219                      ; vsubps        %ymm3,%ymm11,%ymm3
   DB  197,172,89,219                      ; vmulps        %ymm3,%ymm10,%ymm3
   DB  196,99,125,8,211,1                  ; vroundps      $0x1,%ymm3,%ymm10
   DB  196,65,100,92,210                   ; vsubps        %ymm10,%ymm3,%ymm10
-  DB  196,98,125,24,29,218,45,0,0         ; vbroadcastss  0x2dda(%rip),%ymm11        # 56b8 <_sk_callback_avx+0x2cd>
+  DB  196,98,125,24,29,238,45,0,0         ; vbroadcastss  0x2dee(%rip),%ymm11        # 581c <_sk_callback_avx+0x2e1>
   DB  196,193,100,88,219                  ; vaddps        %ymm11,%ymm3,%ymm3
-  DB  196,98,125,24,29,208,45,0,0         ; vbroadcastss  0x2dd0(%rip),%ymm11        # 56bc <_sk_callback_avx+0x2d1>
+  DB  196,98,125,24,29,228,45,0,0         ; vbroadcastss  0x2de4(%rip),%ymm11        # 5820 <_sk_callback_avx+0x2e5>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,100,92,219                  ; vsubps        %ymm11,%ymm3,%ymm3
-  DB  196,98,125,24,29,193,45,0,0         ; vbroadcastss  0x2dc1(%rip),%ymm11        # 56c0 <_sk_callback_avx+0x2d5>
+  DB  196,98,125,24,29,213,45,0,0         ; vbroadcastss  0x2dd5(%rip),%ymm11        # 5824 <_sk_callback_avx+0x2e9>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,183,45,0,0         ; vbroadcastss  0x2db7(%rip),%ymm11        # 56c4 <_sk_callback_avx+0x2d9>
+  DB  196,98,125,24,29,203,45,0,0         ; vbroadcastss  0x2dcb(%rip),%ymm11        # 5828 <_sk_callback_avx+0x2ed>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,100,88,218                  ; vaddps        %ymm10,%ymm3,%ymm3
-  DB  196,98,125,24,21,168,45,0,0         ; vbroadcastss  0x2da8(%rip),%ymm10        # 56c8 <_sk_callback_avx+0x2dd>
+  DB  196,98,125,24,21,188,45,0,0         ; vbroadcastss  0x2dbc(%rip),%ymm10        # 582c <_sk_callback_avx+0x2f1>
   DB  196,193,100,89,218                  ; vmulps        %ymm10,%ymm3,%ymm3
   DB  197,253,91,219                      ; vcvtps2dq     %ymm3,%ymm3
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -6814,38 +6930,38 @@ _sk_parametric_a_avx LABEL PROC
   DB  196,195,101,74,217,128              ; vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
-  DB  196,98,125,24,5,127,45,0,0          ; vbroadcastss  0x2d7f(%rip),%ymm8        # 56cc <_sk_callback_avx+0x2e1>
+  DB  196,98,125,24,5,147,45,0,0          ; vbroadcastss  0x2d93(%rip),%ymm8        # 5830 <_sk_callback_avx+0x2f5>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_lab_to_xyz_avx
 _sk_lab_to_xyz_avx LABEL PROC
-  DB  196,98,125,24,5,113,45,0,0          ; vbroadcastss  0x2d71(%rip),%ymm8        # 56d0 <_sk_callback_avx+0x2e5>
+  DB  196,98,125,24,5,133,45,0,0          ; vbroadcastss  0x2d85(%rip),%ymm8        # 5834 <_sk_callback_avx+0x2f9>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,103,45,0,0          ; vbroadcastss  0x2d67(%rip),%ymm8        # 56d4 <_sk_callback_avx+0x2e9>
+  DB  196,98,125,24,5,123,45,0,0          ; vbroadcastss  0x2d7b(%rip),%ymm8        # 5838 <_sk_callback_avx+0x2fd>
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  196,98,125,24,13,93,45,0,0          ; vbroadcastss  0x2d5d(%rip),%ymm9        # 56d8 <_sk_callback_avx+0x2ed>
+  DB  196,98,125,24,13,113,45,0,0         ; vbroadcastss  0x2d71(%rip),%ymm9        # 583c <_sk_callback_avx+0x301>
   DB  196,193,116,88,201                  ; vaddps        %ymm9,%ymm1,%ymm1
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  196,193,108,88,209                  ; vaddps        %ymm9,%ymm2,%ymm2
-  DB  196,98,125,24,5,73,45,0,0           ; vbroadcastss  0x2d49(%rip),%ymm8        # 56dc <_sk_callback_avx+0x2f1>
+  DB  196,98,125,24,5,93,45,0,0           ; vbroadcastss  0x2d5d(%rip),%ymm8        # 5840 <_sk_callback_avx+0x305>
   DB  196,193,124,88,192                  ; vaddps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,63,45,0,0           ; vbroadcastss  0x2d3f(%rip),%ymm8        # 56e0 <_sk_callback_avx+0x2f5>
+  DB  196,98,125,24,5,83,45,0,0           ; vbroadcastss  0x2d53(%rip),%ymm8        # 5844 <_sk_callback_avx+0x309>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,53,45,0,0           ; vbroadcastss  0x2d35(%rip),%ymm8        # 56e4 <_sk_callback_avx+0x2f9>
+  DB  196,98,125,24,5,73,45,0,0           ; vbroadcastss  0x2d49(%rip),%ymm8        # 5848 <_sk_callback_avx+0x30d>
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  197,252,88,201                      ; vaddps        %ymm1,%ymm0,%ymm1
-  DB  196,98,125,24,5,39,45,0,0           ; vbroadcastss  0x2d27(%rip),%ymm8        # 56e8 <_sk_callback_avx+0x2fd>
+  DB  196,98,125,24,5,59,45,0,0           ; vbroadcastss  0x2d3b(%rip),%ymm8        # 584c <_sk_callback_avx+0x311>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  197,252,92,210                      ; vsubps        %ymm2,%ymm0,%ymm2
   DB  197,116,89,193                      ; vmulps        %ymm1,%ymm1,%ymm8
   DB  196,65,116,89,192                   ; vmulps        %ymm8,%ymm1,%ymm8
-  DB  196,98,125,24,13,16,45,0,0          ; vbroadcastss  0x2d10(%rip),%ymm9        # 56ec <_sk_callback_avx+0x301>
+  DB  196,98,125,24,13,36,45,0,0          ; vbroadcastss  0x2d24(%rip),%ymm9        # 5850 <_sk_callback_avx+0x315>
   DB  196,65,52,194,208,1                 ; vcmpltps      %ymm8,%ymm9,%ymm10
-  DB  196,98,125,24,29,5,45,0,0           ; vbroadcastss  0x2d05(%rip),%ymm11        # 56f0 <_sk_callback_avx+0x305>
+  DB  196,98,125,24,29,25,45,0,0          ; vbroadcastss  0x2d19(%rip),%ymm11        # 5854 <_sk_callback_avx+0x319>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,37,251,44,0,0         ; vbroadcastss  0x2cfb(%rip),%ymm12        # 56f4 <_sk_callback_avx+0x309>
+  DB  196,98,125,24,37,15,45,0,0          ; vbroadcastss  0x2d0f(%rip),%ymm12        # 5858 <_sk_callback_avx+0x31d>
   DB  196,193,116,89,204                  ; vmulps        %ymm12,%ymm1,%ymm1
   DB  196,67,117,74,192,160               ; vblendvps     %ymm10,%ymm8,%ymm1,%ymm8
   DB  197,252,89,200                      ; vmulps        %ymm0,%ymm0,%ymm1
@@ -6860,9 +6976,9 @@ _sk_lab_to_xyz_avx LABEL PROC
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,193,108,89,212                  ; vmulps        %ymm12,%ymm2,%ymm2
   DB  196,227,109,74,208,144              ; vblendvps     %ymm9,%ymm0,%ymm2,%ymm2
-  DB  196,226,125,24,5,177,44,0,0         ; vbroadcastss  0x2cb1(%rip),%ymm0        # 56f8 <_sk_callback_avx+0x30d>
+  DB  196,226,125,24,5,197,44,0,0         ; vbroadcastss  0x2cc5(%rip),%ymm0        # 585c <_sk_callback_avx+0x321>
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
-  DB  196,98,125,24,5,168,44,0,0          ; vbroadcastss  0x2ca8(%rip),%ymm8        # 56fc <_sk_callback_avx+0x311>
+  DB  196,98,125,24,5,188,44,0,0          ; vbroadcastss  0x2cbc(%rip),%ymm8        # 5860 <_sk_callback_avx+0x325>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -6874,14 +6990,14 @@ _sk_load_a8_avx LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,62                              ; jne           2aab <_sk_load_a8_avx+0x4e>
+  DB  117,62                              ; jne           2bfb <_sk_load_a8_avx+0x4e>
   DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
   DB  196,226,121,49,200                  ; vpmovzxbd     %xmm0,%xmm1
   DB  196,227,121,4,192,229               ; vpermilps     $0xe5,%xmm0,%xmm0
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,108,44,0,0        ; vbroadcastss  0x2c6c(%rip),%ymm1        # 5700 <_sk_callback_avx+0x315>
+  DB  196,226,125,24,13,128,44,0,0        ; vbroadcastss  0x2c80(%rip),%ymm1        # 5864 <_sk_callback_avx+0x329>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -6898,9 +7014,9 @@ _sk_load_a8_avx LABEL PROC
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           2ab3 <_sk_load_a8_avx+0x56>
+  DB  117,234                             ; jne           2c03 <_sk_load_a8_avx+0x56>
   DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
-  DB  235,161                             ; jmp           2a71 <_sk_load_a8_avx+0x14>
+  DB  235,161                             ; jmp           2bc1 <_sk_load_a8_avx+0x14>
 
 PUBLIC _sk_gather_a8_avx
 _sk_gather_a8_avx LABEL PROC
@@ -6948,7 +7064,7 @@ _sk_gather_a8_avx LABEL PROC
   DB  196,226,121,49,201                  ; vpmovzxbd     %xmm1,%xmm1
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,97,43,0,0         ; vbroadcastss  0x2b61(%rip),%ymm1        # 5704 <_sk_callback_avx+0x319>
+  DB  196,226,125,24,13,117,43,0,0        ; vbroadcastss  0x2b75(%rip),%ymm1        # 5868 <_sk_callback_avx+0x32d>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -6964,14 +7080,14 @@ PUBLIC _sk_store_a8_avx
 _sk_store_a8_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,60,43,0,0           ; vbroadcastss  0x2b3c(%rip),%ymm8        # 5708 <_sk_callback_avx+0x31d>
+  DB  196,98,125,24,5,80,43,0,0           ; vbroadcastss  0x2b50(%rip),%ymm8        # 586c <_sk_callback_avx+0x331>
   DB  196,65,100,89,192                   ; vmulps        %ymm8,%ymm3,%ymm8
   DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  196,65,57,103,192                   ; vpackuswb     %xmm8,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           2bf5 <_sk_store_a8_avx+0x37>
+  DB  117,10                              ; jne           2d45 <_sk_store_a8_avx+0x37>
   DB  196,65,123,17,4,58                  ; vmovsd        %xmm8,(%r10,%rdi,1)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -6979,10 +7095,10 @@ _sk_store_a8_avx LABEL PROC
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            2bf1 <_sk_store_a8_avx+0x33>
+  DB  119,236                             ; ja            2d41 <_sk_store_a8_avx+0x33>
   DB  196,66,121,48,192                   ; vpmovzxbw     %xmm8,%xmm8
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,67,0,0,0                  ; lea           0x43(%rip),%r9        # 2c58 <_sk_store_a8_avx+0x9a>
+  DB  76,141,13,67,0,0,0                  ; lea           0x43(%rip),%r9        # 2da8 <_sk_store_a8_avx+0x9a>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -6993,7 +7109,7 @@ _sk_store_a8_avx LABEL PROC
   DB  196,67,121,20,68,58,2,4             ; vpextrb       $0x4,%xmm8,0x2(%r10,%rdi,1)
   DB  196,67,121,20,68,58,1,2             ; vpextrb       $0x2,%xmm8,0x1(%r10,%rdi,1)
   DB  196,67,121,20,4,58,0                ; vpextrb       $0x0,%xmm8,(%r10,%rdi,1)
-  DB  235,154                             ; jmp           2bf1 <_sk_store_a8_avx+0x33>
+  DB  235,154                             ; jmp           2d41 <_sk_store_a8_avx+0x33>
   DB  144                                 ; nop
   DB  246,255                             ; idiv          %bh
   DB  255                                 ; (bad)
@@ -7025,17 +7141,17 @@ _sk_load_g8_avx LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,67                              ; jne           2cc7 <_sk_load_g8_avx+0x53>
+  DB  117,67                              ; jne           2e17 <_sk_load_g8_avx+0x53>
   DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
   DB  196,226,121,49,200                  ; vpmovzxbd     %xmm0,%xmm1
   DB  196,227,121,4,192,229               ; vpermilps     $0xe5,%xmm0,%xmm0
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,97,42,0,0         ; vbroadcastss  0x2a61(%rip),%ymm1        # 570c <_sk_callback_avx+0x321>
+  DB  196,226,125,24,13,117,42,0,0        ; vbroadcastss  0x2a75(%rip),%ymm1        # 5870 <_sk_callback_avx+0x335>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,86,42,0,0         ; vbroadcastss  0x2a56(%rip),%ymm3        # 5710 <_sk_callback_avx+0x325>
+  DB  196,226,125,24,29,106,42,0,0        ; vbroadcastss  0x2a6a(%rip),%ymm3        # 5874 <_sk_callback_avx+0x339>
   DB  76,137,193                          ; mov           %r8,%rcx
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
@@ -7049,9 +7165,9 @@ _sk_load_g8_avx LABEL PROC
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           2ccf <_sk_load_g8_avx+0x5b>
+  DB  117,234                             ; jne           2e1f <_sk_load_g8_avx+0x5b>
   DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
-  DB  235,156                             ; jmp           2c88 <_sk_load_g8_avx+0x14>
+  DB  235,156                             ; jmp           2dd8 <_sk_load_g8_avx+0x14>
 
 PUBLIC _sk_gather_g8_avx
 _sk_gather_g8_avx LABEL PROC
@@ -7099,10 +7215,10 @@ _sk_gather_g8_avx LABEL PROC
   DB  196,226,121,49,201                  ; vpmovzxbd     %xmm1,%xmm1
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,85,41,0,0         ; vbroadcastss  0x2955(%rip),%ymm1        # 5714 <_sk_callback_avx+0x329>
+  DB  196,226,125,24,13,105,41,0,0        ; vbroadcastss  0x2969(%rip),%ymm1        # 5878 <_sk_callback_avx+0x33d>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,74,41,0,0         ; vbroadcastss  0x294a(%rip),%ymm3        # 5718 <_sk_callback_avx+0x32d>
+  DB  196,226,125,24,29,94,41,0,0         ; vbroadcastss  0x295e(%rip),%ymm3        # 587c <_sk_callback_avx+0x341>
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
   DB  91                                  ; pop           %rbx
@@ -7116,9 +7232,9 @@ _sk_gather_i8_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  73,137,192                          ; mov           %rax,%r8
   DB  77,133,192                          ; test          %r8,%r8
-  DB  116,5                               ; je            2dee <_sk_gather_i8_avx+0xf>
+  DB  116,5                               ; je            2f3e <_sk_gather_i8_avx+0xf>
   DB  76,137,192                          ; mov           %r8,%rax
-  DB  235,2                               ; jmp           2df0 <_sk_gather_i8_avx+0x11>
+  DB  235,2                               ; jmp           2f40 <_sk_gather_i8_avx+0x11>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,87                               ; push          %r15
   DB  65,86                               ; push          %r14
@@ -7180,10 +7296,10 @@ _sk_gather_i8_avx LABEL PROC
   DB  196,163,121,34,4,163,2              ; vpinsrd       $0x2,(%rbx,%r12,4),%xmm0,%xmm0
   DB  196,163,121,34,28,19,3              ; vpinsrd       $0x3,(%rbx,%r10,1),%xmm0,%xmm3
   DB  196,227,61,24,195,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  DB  197,124,40,21,198,41,0,0            ; vmovaps       0x29c6(%rip),%ymm10        # 58e0 <_sk_callback_avx+0x4f5>
+  DB  197,124,40,21,214,41,0,0            ; vmovaps       0x29d6(%rip),%ymm10        # 5a40 <_sk_callback_avx+0x505>
   DB  196,193,124,84,194                  ; vandps        %ymm10,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,240,39,0,0         ; vbroadcastss  0x27f0(%rip),%ymm9        # 571c <_sk_callback_avx+0x331>
+  DB  196,98,125,24,13,4,40,0,0           ; vbroadcastss  0x2804(%rip),%ymm9        # 5880 <_sk_callback_avx+0x345>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
   DB  197,233,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm2
@@ -7215,38 +7331,38 @@ _sk_load_565_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,128,0,0,0                    ; jne           3024 <_sk_load_565_avx+0x8e>
+  DB  15,133,128,0,0,0                    ; jne           3174 <_sk_load_565_avx+0x8e>
   DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
   DB  197,241,239,201                     ; vpxor         %xmm1,%xmm1,%xmm1
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,209,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  DB  196,226,125,24,5,90,39,0,0          ; vbroadcastss  0x275a(%rip),%ymm0        # 5720 <_sk_callback_avx+0x335>
+  DB  196,226,125,24,5,110,39,0,0         ; vbroadcastss  0x276e(%rip),%ymm0        # 5884 <_sk_callback_avx+0x349>
   DB  197,236,84,192                      ; vandps        %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,77,39,0,0         ; vbroadcastss  0x274d(%rip),%ymm1        # 5724 <_sk_callback_avx+0x339>
+  DB  196,226,125,24,13,97,39,0,0         ; vbroadcastss  0x2761(%rip),%ymm1        # 5888 <_sk_callback_avx+0x34d>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,68,39,0,0         ; vbroadcastss  0x2744(%rip),%ymm1        # 5728 <_sk_callback_avx+0x33d>
+  DB  196,226,125,24,13,88,39,0,0         ; vbroadcastss  0x2758(%rip),%ymm1        # 588c <_sk_callback_avx+0x351>
   DB  197,236,84,201                      ; vandps        %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,55,39,0,0         ; vbroadcastss  0x2737(%rip),%ymm3        # 572c <_sk_callback_avx+0x341>
+  DB  196,226,125,24,29,75,39,0,0         ; vbroadcastss  0x274b(%rip),%ymm3        # 5890 <_sk_callback_avx+0x355>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,24,29,46,39,0,0         ; vbroadcastss  0x272e(%rip),%ymm3        # 5730 <_sk_callback_avx+0x345>
+  DB  196,226,125,24,29,66,39,0,0         ; vbroadcastss  0x2742(%rip),%ymm3        # 5894 <_sk_callback_avx+0x359>
   DB  197,236,84,211                      ; vandps        %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,33,39,0,0         ; vbroadcastss  0x2721(%rip),%ymm3        # 5734 <_sk_callback_avx+0x349>
+  DB  196,226,125,24,29,53,39,0,0         ; vbroadcastss  0x2735(%rip),%ymm3        # 5898 <_sk_callback_avx+0x35d>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,22,39,0,0         ; vbroadcastss  0x2716(%rip),%ymm3        # 5738 <_sk_callback_avx+0x34d>
+  DB  196,226,125,24,29,42,39,0,0         ; vbroadcastss  0x272a(%rip),%ymm3        # 589c <_sk_callback_avx+0x361>
   DB  255,224                             ; jmpq          *%rax
   DB  65,137,200                          ; mov           %ecx,%r8d
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,110,255,255,255              ; ja            2faa <_sk_load_565_avx+0x14>
+  DB  15,135,110,255,255,255              ; ja            30fa <_sk_load_565_avx+0x14>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 3090 <_sk_load_565_avx+0xfa>
+  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 31e0 <_sk_load_565_avx+0xfa>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7258,7 +7374,7 @@ _sk_load_565_avx LABEL PROC
   DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  DB  233,26,255,255,255                  ; jmpq          2faa <_sk_load_565_avx+0x14>
+  DB  233,26,255,255,255                  ; jmpq          30fa <_sk_load_565_avx+0x14>
   DB  244                                 ; hlt
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
@@ -7334,23 +7450,23 @@ _sk_gather_565_avx LABEL PROC
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,209,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  DB  196,226,125,24,5,182,37,0,0         ; vbroadcastss  0x25b6(%rip),%ymm0        # 573c <_sk_callback_avx+0x351>
+  DB  196,226,125,24,5,202,37,0,0         ; vbroadcastss  0x25ca(%rip),%ymm0        # 58a0 <_sk_callback_avx+0x365>
   DB  197,236,84,192                      ; vandps        %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,169,37,0,0        ; vbroadcastss  0x25a9(%rip),%ymm1        # 5740 <_sk_callback_avx+0x355>
+  DB  196,226,125,24,13,189,37,0,0        ; vbroadcastss  0x25bd(%rip),%ymm1        # 58a4 <_sk_callback_avx+0x369>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,160,37,0,0        ; vbroadcastss  0x25a0(%rip),%ymm1        # 5744 <_sk_callback_avx+0x359>
+  DB  196,226,125,24,13,180,37,0,0        ; vbroadcastss  0x25b4(%rip),%ymm1        # 58a8 <_sk_callback_avx+0x36d>
   DB  197,236,84,201                      ; vandps        %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,147,37,0,0        ; vbroadcastss  0x2593(%rip),%ymm3        # 5748 <_sk_callback_avx+0x35d>
+  DB  196,226,125,24,29,167,37,0,0        ; vbroadcastss  0x25a7(%rip),%ymm3        # 58ac <_sk_callback_avx+0x371>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,24,29,138,37,0,0        ; vbroadcastss  0x258a(%rip),%ymm3        # 574c <_sk_callback_avx+0x361>
+  DB  196,226,125,24,29,158,37,0,0        ; vbroadcastss  0x259e(%rip),%ymm3        # 58b0 <_sk_callback_avx+0x375>
   DB  197,236,84,211                      ; vandps        %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,125,37,0,0        ; vbroadcastss  0x257d(%rip),%ymm3        # 5750 <_sk_callback_avx+0x365>
+  DB  196,226,125,24,29,145,37,0,0        ; vbroadcastss  0x2591(%rip),%ymm3        # 58b4 <_sk_callback_avx+0x379>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,114,37,0,0        ; vbroadcastss  0x2572(%rip),%ymm3        # 5754 <_sk_callback_avx+0x369>
+  DB  196,226,125,24,29,134,37,0,0        ; vbroadcastss  0x2586(%rip),%ymm3        # 58b8 <_sk_callback_avx+0x37d>
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,94                               ; pop           %r14
@@ -7362,14 +7478,14 @@ PUBLIC _sk_store_565_avx
 _sk_store_565_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,94,37,0,0           ; vbroadcastss  0x255e(%rip),%ymm8        # 5758 <_sk_callback_avx+0x36d>
+  DB  196,98,125,24,5,114,37,0,0          ; vbroadcastss  0x2572(%rip),%ymm8        # 58bc <_sk_callback_avx+0x381>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,41,114,241,11               ; vpslld        $0xb,%xmm9,%xmm10
   DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
   DB  196,193,49,114,241,11               ; vpslld        $0xb,%xmm9,%xmm9
   DB  196,67,45,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
-  DB  196,98,125,24,21,55,37,0,0          ; vbroadcastss  0x2537(%rip),%ymm10        # 575c <_sk_callback_avx+0x371>
+  DB  196,98,125,24,21,75,37,0,0          ; vbroadcastss  0x254b(%rip),%ymm10        # 58c0 <_sk_callback_avx+0x385>
   DB  196,65,116,89,210                   ; vmulps        %ymm10,%ymm1,%ymm10
   DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
   DB  196,193,33,114,242,5                ; vpslld        $0x5,%xmm10,%xmm11
@@ -7383,7 +7499,7 @@ _sk_store_565_avx LABEL PROC
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           3275 <_sk_store_565_avx+0x89>
+  DB  117,10                              ; jne           33c5 <_sk_store_565_avx+0x89>
   DB  196,65,122,127,4,122                ; vmovdqu       %xmm8,(%r10,%rdi,2)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7391,9 +7507,9 @@ _sk_store_565_avx LABEL PROC
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            3271 <_sk_store_565_avx+0x85>
+  DB  119,236                             ; ja            33c1 <_sk_store_565_avx+0x85>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 32d4 <_sk_store_565_avx+0xe8>
+  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 3424 <_sk_store_565_avx+0xe8>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7404,7 +7520,7 @@ _sk_store_565_avx LABEL PROC
   DB  196,67,121,21,68,122,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   DB  196,67,121,21,68,122,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   DB  196,67,121,21,4,122,0               ; vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  DB  235,159                             ; jmp           3271 <_sk_store_565_avx+0x85>
+  DB  235,159                             ; jmp           33c1 <_sk_store_565_avx+0x85>
   DB  102,144                             ; xchg          %ax,%ax
   DB  245                                 ; cmc
   DB  255                                 ; (bad)
@@ -7435,31 +7551,31 @@ _sk_load_4444_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,152,0,0,0                    ; jne           3396 <_sk_load_4444_avx+0xa6>
+  DB  15,133,152,0,0,0                    ; jne           34e6 <_sk_load_4444_avx+0xa6>
   DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
   DB  197,241,239,201                     ; vpxor         %xmm1,%xmm1,%xmm1
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,217,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  DB  196,226,125,24,5,64,36,0,0          ; vbroadcastss  0x2440(%rip),%ymm0        # 5760 <_sk_callback_avx+0x375>
+  DB  196,226,125,24,5,84,36,0,0          ; vbroadcastss  0x2454(%rip),%ymm0        # 58c4 <_sk_callback_avx+0x389>
   DB  197,228,84,192                      ; vandps        %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,51,36,0,0         ; vbroadcastss  0x2433(%rip),%ymm1        # 5764 <_sk_callback_avx+0x379>
+  DB  196,226,125,24,13,71,36,0,0         ; vbroadcastss  0x2447(%rip),%ymm1        # 58c8 <_sk_callback_avx+0x38d>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,42,36,0,0         ; vbroadcastss  0x242a(%rip),%ymm1        # 5768 <_sk_callback_avx+0x37d>
+  DB  196,226,125,24,13,62,36,0,0         ; vbroadcastss  0x243e(%rip),%ymm1        # 58cc <_sk_callback_avx+0x391>
   DB  197,228,84,201                      ; vandps        %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,29,36,0,0         ; vbroadcastss  0x241d(%rip),%ymm2        # 576c <_sk_callback_avx+0x381>
+  DB  196,226,125,24,21,49,36,0,0         ; vbroadcastss  0x2431(%rip),%ymm2        # 58d0 <_sk_callback_avx+0x395>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,20,36,0,0         ; vbroadcastss  0x2414(%rip),%ymm2        # 5770 <_sk_callback_avx+0x385>
+  DB  196,226,125,24,21,40,36,0,0         ; vbroadcastss  0x2428(%rip),%ymm2        # 58d4 <_sk_callback_avx+0x399>
   DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,7,36,0,0            ; vbroadcastss  0x2407(%rip),%ymm8        # 5774 <_sk_callback_avx+0x389>
+  DB  196,98,125,24,5,27,36,0,0           ; vbroadcastss  0x241b(%rip),%ymm8        # 58d8 <_sk_callback_avx+0x39d>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,253,35,0,0          ; vbroadcastss  0x23fd(%rip),%ymm8        # 5778 <_sk_callback_avx+0x38d>
+  DB  196,98,125,24,5,17,36,0,0           ; vbroadcastss  0x2411(%rip),%ymm8        # 58dc <_sk_callback_avx+0x3a1>
   DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,239,35,0,0          ; vbroadcastss  0x23ef(%rip),%ymm8        # 577c <_sk_callback_avx+0x391>
+  DB  196,98,125,24,5,3,36,0,0            ; vbroadcastss  0x2403(%rip),%ymm8        # 58e0 <_sk_callback_avx+0x3a5>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7468,9 +7584,9 @@ _sk_load_4444_avx LABEL PROC
   DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,86,255,255,255               ; ja            3304 <_sk_load_4444_avx+0x14>
+  DB  15,135,86,255,255,255               ; ja            3454 <_sk_load_4444_avx+0x14>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,75,0,0,0                  ; lea           0x4b(%rip),%r9        # 3404 <_sk_load_4444_avx+0x114>
+  DB  76,141,13,75,0,0,0                  ; lea           0x4b(%rip),%r9        # 3554 <_sk_load_4444_avx+0x114>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7482,7 +7598,7 @@ _sk_load_4444_avx LABEL PROC
   DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  DB  233,2,255,255,255                   ; jmpq          3304 <_sk_load_4444_avx+0x14>
+  DB  233,2,255,255,255                   ; jmpq          3454 <_sk_load_4444_avx+0x14>
   DB  102,144                             ; xchg          %ax,%ax
   DB  242,255                             ; repnz         (bad)
   DB  255                                 ; (bad)
@@ -7559,25 +7675,25 @@ _sk_gather_4444_avx LABEL PROC
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,217,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  DB  196,226,125,24,5,134,34,0,0         ; vbroadcastss  0x2286(%rip),%ymm0        # 5780 <_sk_callback_avx+0x395>
+  DB  196,226,125,24,5,154,34,0,0         ; vbroadcastss  0x229a(%rip),%ymm0        # 58e4 <_sk_callback_avx+0x3a9>
   DB  197,228,84,192                      ; vandps        %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,121,34,0,0        ; vbroadcastss  0x2279(%rip),%ymm1        # 5784 <_sk_callback_avx+0x399>
+  DB  196,226,125,24,13,141,34,0,0        ; vbroadcastss  0x228d(%rip),%ymm1        # 58e8 <_sk_callback_avx+0x3ad>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,112,34,0,0        ; vbroadcastss  0x2270(%rip),%ymm1        # 5788 <_sk_callback_avx+0x39d>
+  DB  196,226,125,24,13,132,34,0,0        ; vbroadcastss  0x2284(%rip),%ymm1        # 58ec <_sk_callback_avx+0x3b1>
   DB  197,228,84,201                      ; vandps        %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,99,34,0,0         ; vbroadcastss  0x2263(%rip),%ymm2        # 578c <_sk_callback_avx+0x3a1>
+  DB  196,226,125,24,21,119,34,0,0        ; vbroadcastss  0x2277(%rip),%ymm2        # 58f0 <_sk_callback_avx+0x3b5>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,90,34,0,0         ; vbroadcastss  0x225a(%rip),%ymm2        # 5790 <_sk_callback_avx+0x3a5>
+  DB  196,226,125,24,21,110,34,0,0        ; vbroadcastss  0x226e(%rip),%ymm2        # 58f4 <_sk_callback_avx+0x3b9>
   DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,77,34,0,0           ; vbroadcastss  0x224d(%rip),%ymm8        # 5794 <_sk_callback_avx+0x3a9>
+  DB  196,98,125,24,5,97,34,0,0           ; vbroadcastss  0x2261(%rip),%ymm8        # 58f8 <_sk_callback_avx+0x3bd>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,67,34,0,0           ; vbroadcastss  0x2243(%rip),%ymm8        # 5798 <_sk_callback_avx+0x3ad>
+  DB  196,98,125,24,5,87,34,0,0           ; vbroadcastss  0x2257(%rip),%ymm8        # 58fc <_sk_callback_avx+0x3c1>
   DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,53,34,0,0           ; vbroadcastss  0x2235(%rip),%ymm8        # 579c <_sk_callback_avx+0x3b1>
+  DB  196,98,125,24,5,73,34,0,0           ; vbroadcastss  0x2249(%rip),%ymm8        # 5900 <_sk_callback_avx+0x3c5>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -7591,7 +7707,7 @@ PUBLIC _sk_store_4444_avx
 _sk_store_4444_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,26,34,0,0           ; vbroadcastss  0x221a(%rip),%ymm8        # 57a0 <_sk_callback_avx+0x3b5>
+  DB  196,98,125,24,5,46,34,0,0           ; vbroadcastss  0x222e(%rip),%ymm8        # 5904 <_sk_callback_avx+0x3c9>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,41,114,241,12               ; vpslld        $0xc,%xmm9,%xmm10
@@ -7618,7 +7734,7 @@ _sk_store_4444_avx LABEL PROC
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           361f <_sk_store_4444_avx+0xa7>
+  DB  117,10                              ; jne           376f <_sk_store_4444_avx+0xa7>
   DB  196,65,122,127,4,122                ; vmovdqu       %xmm8,(%r10,%rdi,2)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7626,9 +7742,9 @@ _sk_store_4444_avx LABEL PROC
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            361b <_sk_store_4444_avx+0xa3>
+  DB  119,236                             ; ja            376b <_sk_store_4444_avx+0xa3>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,66,0,0,0                  ; lea           0x42(%rip),%r9        # 367c <_sk_store_4444_avx+0x104>
+  DB  76,141,13,66,0,0,0                  ; lea           0x42(%rip),%r9        # 37cc <_sk_store_4444_avx+0x104>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7639,7 +7755,7 @@ _sk_store_4444_avx LABEL PROC
   DB  196,67,121,21,68,122,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   DB  196,67,121,21,68,122,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   DB  196,67,121,21,4,122,0               ; vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  DB  235,159                             ; jmp           361b <_sk_store_4444_avx+0xa3>
+  DB  235,159                             ; jmp           376b <_sk_store_4444_avx+0xa3>
   DB  247,255                             ; idiv          %edi
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
@@ -7668,12 +7784,12 @@ _sk_load_8888_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,135,0,0,0                    ; jne           372d <_sk_load_8888_avx+0x95>
+  DB  15,133,135,0,0,0                    ; jne           387d <_sk_load_8888_avx+0x95>
   DB  196,65,124,16,12,186                ; vmovups       (%r10,%rdi,4),%ymm9
-  DB  197,124,40,21,76,34,0,0             ; vmovaps       0x224c(%rip),%ymm10        # 5900 <_sk_callback_avx+0x515>
+  DB  197,124,40,21,92,34,0,0             ; vmovaps       0x225c(%rip),%ymm10        # 5a60 <_sk_callback_avx+0x525>
   DB  196,193,52,84,194                   ; vandps        %ymm10,%ymm9,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,5,222,32,0,0          ; vbroadcastss  0x20de(%rip),%ymm8        # 57a4 <_sk_callback_avx+0x3b9>
+  DB  196,98,125,24,5,242,32,0,0          ; vbroadcastss  0x20f2(%rip),%ymm8        # 5908 <_sk_callback_avx+0x3cd>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  196,193,113,114,209,8               ; vpsrld        $0x8,%xmm9,%xmm1
   DB  196,99,125,25,203,1                 ; vextractf128  $0x1,%ymm9,%xmm3
@@ -7700,9 +7816,9 @@ _sk_load_8888_avx LABEL PROC
   DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,102,255,255,255              ; ja            36ac <_sk_load_8888_avx+0x14>
+  DB  15,135,102,255,255,255              ; ja            37fc <_sk_load_8888_avx+0x14>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,139,0,0,0                 ; lea           0x8b(%rip),%r9        # 37dc <_sk_load_8888_avx+0x144>
+  DB  76,141,13,139,0,0,0                 ; lea           0x8b(%rip),%r9        # 392c <_sk_load_8888_avx+0x144>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7725,7 +7841,7 @@ _sk_load_8888_avx LABEL PROC
   DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
   DB  196,195,49,34,4,186,0               ; vpinsrd       $0x0,(%r10,%rdi,4),%xmm9,%xmm0
   DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
-  DB  233,210,254,255,255                 ; jmpq          36ac <_sk_load_8888_avx+0x14>
+  DB  233,210,254,255,255                 ; jmpq          37fc <_sk_load_8888_avx+0x14>
   DB  102,144                             ; xchg          %ax,%ax
   DB  236                                 ; in            (%dx),%al
   DB  255                                 ; (bad)
@@ -7743,7 +7859,7 @@ _sk_load_8888_avx LABEL PROC
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  126,255                             ; jle           37f5 <_sk_load_8888_avx+0x15d>
+  DB  126,255                             ; jle           3945 <_sk_load_8888_avx+0x15d>
   DB  255                                 ; (bad)
   DB  255                                 ; .byte         0xff
 
@@ -7786,10 +7902,10 @@ _sk_gather_8888_avx LABEL PROC
   DB  196,131,121,34,4,152,2              ; vpinsrd       $0x2,(%r8,%r11,4),%xmm0,%xmm0
   DB  196,131,121,34,28,144,3             ; vpinsrd       $0x3,(%r8,%r10,4),%xmm0,%xmm3
   DB  196,227,61,24,195,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  DB  197,124,40,21,118,32,0,0            ; vmovaps       0x2076(%rip),%ymm10        # 5920 <_sk_callback_avx+0x535>
+  DB  197,124,40,21,134,32,0,0            ; vmovaps       0x2086(%rip),%ymm10        # 5a80 <_sk_callback_avx+0x545>
   DB  196,193,124,84,194                  ; vandps        %ymm10,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,236,30,0,0         ; vbroadcastss  0x1eec(%rip),%ymm9        # 57a8 <_sk_callback_avx+0x3bd>
+  DB  196,98,125,24,13,0,31,0,0           ; vbroadcastss  0x1f00(%rip),%ymm9        # 590c <_sk_callback_avx+0x3d1>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
   DB  197,233,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm2
@@ -7819,7 +7935,7 @@ PUBLIC _sk_store_8888_avx
 _sk_store_8888_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,122,30,0,0          ; vbroadcastss  0x1e7a(%rip),%ymm8        # 57ac <_sk_callback_avx+0x3c1>
+  DB  196,98,125,24,5,142,30,0,0          ; vbroadcastss  0x1e8e(%rip),%ymm8        # 5910 <_sk_callback_avx+0x3d5>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,65,116,89,208                   ; vmulps        %ymm8,%ymm1,%ymm10
@@ -7844,7 +7960,7 @@ _sk_store_8888_avx LABEL PROC
   DB  196,65,45,86,192                    ; vorpd         %ymm8,%ymm10,%ymm8
   DB  196,65,53,86,192                    ; vorpd         %ymm8,%ymm9,%ymm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           39c0 <_sk_store_8888_avx+0x9c>
+  DB  117,10                              ; jne           3b10 <_sk_store_8888_avx+0x9c>
   DB  196,65,124,17,4,186                 ; vmovups       %ymm8,(%r10,%rdi,4)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7852,9 +7968,9 @@ _sk_store_8888_avx LABEL PROC
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            39bc <_sk_store_8888_avx+0x98>
+  DB  119,236                             ; ja            3b0c <_sk_store_8888_avx+0x98>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,85,0,0,0                  ; lea           0x55(%rip),%r9        # 3a30 <_sk_store_8888_avx+0x10c>
+  DB  76,141,13,85,0,0,0                  ; lea           0x55(%rip),%r9        # 3b80 <_sk_store_8888_avx+0x10c>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7868,7 +7984,7 @@ _sk_store_8888_avx LABEL PROC
   DB  196,67,121,22,68,186,8,2            ; vpextrd       $0x2,%xmm8,0x8(%r10,%rdi,4)
   DB  196,67,121,22,68,186,4,1            ; vpextrd       $0x1,%xmm8,0x4(%r10,%rdi,4)
   DB  196,65,121,126,4,186                ; vmovd         %xmm8,(%r10,%rdi,4)
-  DB  235,143                             ; jmp           39bc <_sk_store_8888_avx+0x98>
+  DB  235,143                             ; jmp           3b0c <_sk_store_8888_avx+0x98>
   DB  15,31,0                             ; nopl          (%rax)
   DB  245                                 ; cmc
   DB  255                                 ; (bad)
@@ -7904,7 +8020,7 @@ _sk_load_f16_avx LABEL PROC
   DB  197,252,17,116,36,64                ; vmovups       %ymm6,0x40(%rsp)
   DB  197,252,17,108,36,32                ; vmovups       %ymm5,0x20(%rsp)
   DB  197,254,127,36,36                   ; vmovdqu       %ymm4,(%rsp)
-  DB  15,133,143,2,0,0                    ; jne           3d07 <_sk_load_f16_avx+0x2bb>
+  DB  15,133,143,2,0,0                    ; jne           3e57 <_sk_load_f16_avx+0x2bb>
   DB  197,121,16,4,248                    ; vmovupd       (%rax,%rdi,8),%xmm8
   DB  197,249,16,84,248,16                ; vmovupd       0x10(%rax,%rdi,8),%xmm2
   DB  197,249,16,76,248,32                ; vmovupd       0x20(%rax,%rdi,8),%xmm1
@@ -7922,13 +8038,13 @@ _sk_load_f16_avx LABEL PROC
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
-  DB  196,98,125,24,37,223,28,0,0         ; vbroadcastss  0x1cdf(%rip),%ymm12        # 57b0 <_sk_callback_avx+0x3c5>
+  DB  196,98,125,24,37,243,28,0,0         ; vbroadcastss  0x1cf3(%rip),%ymm12        # 5914 <_sk_callback_avx+0x3d9>
   DB  196,193,124,84,204                  ; vandps        %ymm12,%ymm0,%ymm1
   DB  197,252,87,193                      ; vxorps        %ymm1,%ymm0,%ymm0
   DB  196,195,125,25,198,1                ; vextractf128  $0x1,%ymm0,%xmm14
-  DB  196,98,121,24,29,203,28,0,0         ; vbroadcastss  0x1ccb(%rip),%xmm11        # 57b4 <_sk_callback_avx+0x3c9>
+  DB  196,98,121,24,29,223,28,0,0         ; vbroadcastss  0x1cdf(%rip),%xmm11        # 5918 <_sk_callback_avx+0x3dd>
   DB  196,193,8,87,219                    ; vxorps        %xmm11,%xmm14,%xmm3
-  DB  196,98,121,24,45,193,28,0,0         ; vbroadcastss  0x1cc1(%rip),%xmm13        # 57b8 <_sk_callback_avx+0x3cd>
+  DB  196,98,121,24,45,213,28,0,0         ; vbroadcastss  0x1cd5(%rip),%xmm13        # 591c <_sk_callback_avx+0x3e1>
   DB  197,145,102,219                     ; vpcmpgtd      %xmm3,%xmm13,%xmm3
   DB  196,65,120,87,211                   ; vxorps        %xmm11,%xmm0,%xmm10
   DB  196,65,17,102,210                   ; vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -7942,7 +8058,7 @@ _sk_load_f16_avx LABEL PROC
   DB  196,227,125,24,195,1                ; vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   DB  197,252,86,193                      ; vorps         %ymm1,%ymm0,%ymm0
   DB  196,227,125,25,193,1                ; vextractf128  $0x1,%ymm0,%xmm1
-  DB  196,226,121,24,29,119,28,0,0        ; vbroadcastss  0x1c77(%rip),%xmm3        # 57bc <_sk_callback_avx+0x3d1>
+  DB  196,226,121,24,29,139,28,0,0        ; vbroadcastss  0x1c8b(%rip),%xmm3        # 5920 <_sk_callback_avx+0x3e5>
   DB  197,241,254,203                     ; vpaddd        %xmm3,%xmm1,%xmm1
   DB  197,249,254,195                     ; vpaddd        %xmm3,%xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
@@ -8035,29 +8151,29 @@ _sk_load_f16_avx LABEL PROC
   DB  197,123,16,4,248                    ; vmovsd        (%rax,%rdi,8),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,79                              ; je            3d66 <_sk_load_f16_avx+0x31a>
+  DB  116,79                              ; je            3eb6 <_sk_load_f16_avx+0x31a>
   DB  197,57,22,68,248,8                  ; vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,67                              ; jb            3d66 <_sk_load_f16_avx+0x31a>
+  DB  114,67                              ; jb            3eb6 <_sk_load_f16_avx+0x31a>
   DB  197,251,16,84,248,16                ; vmovsd        0x10(%rax,%rdi,8),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,68                              ; je            3d73 <_sk_load_f16_avx+0x327>
+  DB  116,68                              ; je            3ec3 <_sk_load_f16_avx+0x327>
   DB  197,233,22,84,248,24                ; vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,56                              ; jb            3d73 <_sk_load_f16_avx+0x327>
+  DB  114,56                              ; jb            3ec3 <_sk_load_f16_avx+0x327>
   DB  197,251,16,76,248,32                ; vmovsd        0x20(%rax,%rdi,8),%xmm1
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,68,253,255,255               ; je            3a8f <_sk_load_f16_avx+0x43>
+  DB  15,132,68,253,255,255               ; je            3bdf <_sk_load_f16_avx+0x43>
   DB  197,241,22,76,248,40                ; vmovhpd       0x28(%rax,%rdi,8),%xmm1,%xmm1
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,52,253,255,255               ; jb            3a8f <_sk_load_f16_avx+0x43>
+  DB  15,130,52,253,255,255               ; jb            3bdf <_sk_load_f16_avx+0x43>
   DB  197,122,126,76,248,48               ; vmovq         0x30(%rax,%rdi,8),%xmm9
-  DB  233,41,253,255,255                  ; jmpq          3a8f <_sk_load_f16_avx+0x43>
+  DB  233,41,253,255,255                  ; jmpq          3bdf <_sk_load_f16_avx+0x43>
   DB  197,241,87,201                      ; vxorpd        %xmm1,%xmm1,%xmm1
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,28,253,255,255                  ; jmpq          3a8f <_sk_load_f16_avx+0x43>
+  DB  233,28,253,255,255                  ; jmpq          3bdf <_sk_load_f16_avx+0x43>
   DB  197,241,87,201                      ; vxorpd        %xmm1,%xmm1,%xmm1
-  DB  233,19,253,255,255                  ; jmpq          3a8f <_sk_load_f16_avx+0x43>
+  DB  233,19,253,255,255                  ; jmpq          3bdf <_sk_load_f16_avx+0x43>
 
 PUBLIC _sk_gather_f16_avx
 _sk_gather_f16_avx LABEL PROC
@@ -8119,13 +8235,13 @@ _sk_gather_f16_avx LABEL PROC
   DB  197,249,105,210                     ; vpunpckhwd    %xmm2,%xmm0,%xmm2
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,194,1                ; vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
-  DB  196,98,125,24,37,55,25,0,0          ; vbroadcastss  0x1937(%rip),%ymm12        # 57c0 <_sk_callback_avx+0x3d5>
+  DB  196,98,125,24,37,75,25,0,0          ; vbroadcastss  0x194b(%rip),%ymm12        # 5924 <_sk_callback_avx+0x3e9>
   DB  196,193,124,84,212                  ; vandps        %ymm12,%ymm0,%ymm2
   DB  197,252,87,194                      ; vxorps        %ymm2,%ymm0,%ymm0
   DB  196,195,125,25,198,1                ; vextractf128  $0x1,%ymm0,%xmm14
-  DB  196,98,121,24,29,35,25,0,0          ; vbroadcastss  0x1923(%rip),%xmm11        # 57c4 <_sk_callback_avx+0x3d9>
+  DB  196,98,121,24,29,55,25,0,0          ; vbroadcastss  0x1937(%rip),%xmm11        # 5928 <_sk_callback_avx+0x3ed>
   DB  196,193,8,87,219                    ; vxorps        %xmm11,%xmm14,%xmm3
-  DB  196,98,121,24,45,25,25,0,0          ; vbroadcastss  0x1919(%rip),%xmm13        # 57c8 <_sk_callback_avx+0x3dd>
+  DB  196,98,121,24,45,45,25,0,0          ; vbroadcastss  0x192d(%rip),%xmm13        # 592c <_sk_callback_avx+0x3f1>
   DB  197,145,102,219                     ; vpcmpgtd      %xmm3,%xmm13,%xmm3
   DB  196,65,120,87,211                   ; vxorps        %xmm11,%xmm0,%xmm10
   DB  196,65,17,102,210                   ; vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -8139,7 +8255,7 @@ _sk_gather_f16_avx LABEL PROC
   DB  196,227,125,24,195,1                ; vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   DB  197,252,86,194                      ; vorps         %ymm2,%ymm0,%ymm0
   DB  196,227,125,25,194,1                ; vextractf128  $0x1,%ymm0,%xmm2
-  DB  196,226,121,24,29,207,24,0,0        ; vbroadcastss  0x18cf(%rip),%xmm3        # 57cc <_sk_callback_avx+0x3e1>
+  DB  196,226,121,24,29,227,24,0,0        ; vbroadcastss  0x18e3(%rip),%xmm3        # 5930 <_sk_callback_avx+0x3f5>
   DB  197,233,254,211                     ; vpaddd        %xmm3,%xmm2,%xmm2
   DB  197,249,254,195                     ; vpaddd        %xmm3,%xmm0,%xmm0
   DB  196,227,125,24,194,1                ; vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
@@ -8241,12 +8357,12 @@ _sk_store_f16_avx LABEL PROC
   DB  197,252,17,180,36,128,0,0,0         ; vmovups       %ymm6,0x80(%rsp)
   DB  197,252,17,108,36,96                ; vmovups       %ymm5,0x60(%rsp)
   DB  197,252,17,100,36,64                ; vmovups       %ymm4,0x40(%rsp)
-  DB  196,98,125,24,13,220,22,0,0         ; vbroadcastss  0x16dc(%rip),%ymm9        # 57d0 <_sk_callback_avx+0x3e5>
+  DB  196,98,125,24,13,240,22,0,0         ; vbroadcastss  0x16f0(%rip),%ymm9        # 5934 <_sk_callback_avx+0x3f9>
   DB  196,65,124,84,209                   ; vandps        %ymm9,%ymm0,%ymm10
   DB  197,252,17,4,36                     ; vmovups       %ymm0,(%rsp)
   DB  196,65,124,87,218                   ; vxorps        %ymm10,%ymm0,%ymm11
   DB  196,67,125,25,220,1                 ; vextractf128  $0x1,%ymm11,%xmm12
-  DB  196,98,121,24,5,194,22,0,0          ; vbroadcastss  0x16c2(%rip),%xmm8        # 57d4 <_sk_callback_avx+0x3e9>
+  DB  196,98,121,24,5,214,22,0,0          ; vbroadcastss  0x16d6(%rip),%xmm8        # 5938 <_sk_callback_avx+0x3fd>
   DB  196,65,57,102,236                   ; vpcmpgtd      %xmm12,%xmm8,%xmm13
   DB  196,65,57,102,243                   ; vpcmpgtd      %xmm11,%xmm8,%xmm14
   DB  196,67,13,24,237,1                  ; vinsertf128   $0x1,%xmm13,%ymm14,%ymm13
@@ -8256,7 +8372,7 @@ _sk_store_f16_avx LABEL PROC
   DB  196,67,13,24,242,1                  ; vinsertf128   $0x1,%xmm10,%ymm14,%ymm14
   DB  196,193,33,114,211,13               ; vpsrld        $0xd,%xmm11,%xmm11
   DB  196,193,25,114,212,13               ; vpsrld        $0xd,%xmm12,%xmm12
-  DB  196,98,125,24,21,137,22,0,0         ; vbroadcastss  0x1689(%rip),%ymm10        # 57d8 <_sk_callback_avx+0x3ed>
+  DB  196,98,125,24,21,157,22,0,0         ; vbroadcastss  0x169d(%rip),%ymm10        # 593c <_sk_callback_avx+0x401>
   DB  196,65,12,86,242                    ; vorps         %ymm10,%ymm14,%ymm14
   DB  196,67,125,25,247,1                 ; vextractf128  $0x1,%ymm14,%xmm15
   DB  196,65,1,254,228                    ; vpaddd        %xmm12,%xmm15,%xmm12
@@ -8338,7 +8454,7 @@ _sk_store_f16_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,75                              ; jne           4336 <_sk_store_f16_avx+0x270>
+  DB  117,75                              ; jne           4486 <_sk_store_f16_avx+0x270>
   DB  197,120,17,28,248                   ; vmovups       %xmm11,(%rax,%rdi,8)
   DB  197,120,17,84,248,16                ; vmovups       %xmm10,0x10(%rax,%rdi,8)
   DB  197,120,17,76,248,32                ; vmovups       %xmm9,0x20(%rax,%rdi,8)
@@ -8354,22 +8470,22 @@ _sk_store_f16_avx LABEL PROC
   DB  255,224                             ; jmpq          *%rax
   DB  197,121,214,28,248                  ; vmovq         %xmm11,(%rax,%rdi,8)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,193                             ; je            4302 <_sk_store_f16_avx+0x23c>
+  DB  116,193                             ; je            4452 <_sk_store_f16_avx+0x23c>
   DB  197,121,23,92,248,8                 ; vmovhpd       %xmm11,0x8(%rax,%rdi,8)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,181                             ; jb            4302 <_sk_store_f16_avx+0x23c>
+  DB  114,181                             ; jb            4452 <_sk_store_f16_avx+0x23c>
   DB  197,121,214,84,248,16               ; vmovq         %xmm10,0x10(%rax,%rdi,8)
-  DB  116,173                             ; je            4302 <_sk_store_f16_avx+0x23c>
+  DB  116,173                             ; je            4452 <_sk_store_f16_avx+0x23c>
   DB  197,121,23,84,248,24                ; vmovhpd       %xmm10,0x18(%rax,%rdi,8)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,161                             ; jb            4302 <_sk_store_f16_avx+0x23c>
+  DB  114,161                             ; jb            4452 <_sk_store_f16_avx+0x23c>
   DB  197,121,214,76,248,32               ; vmovq         %xmm9,0x20(%rax,%rdi,8)
-  DB  116,153                             ; je            4302 <_sk_store_f16_avx+0x23c>
+  DB  116,153                             ; je            4452 <_sk_store_f16_avx+0x23c>
   DB  197,121,23,76,248,40                ; vmovhpd       %xmm9,0x28(%rax,%rdi,8)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,141                             ; jb            4302 <_sk_store_f16_avx+0x23c>
+  DB  114,141                             ; jb            4452 <_sk_store_f16_avx+0x23c>
   DB  197,121,214,68,248,48               ; vmovq         %xmm8,0x30(%rax,%rdi,8)
-  DB  235,133                             ; jmp           4302 <_sk_store_f16_avx+0x23c>
+  DB  235,133                             ; jmp           4452 <_sk_store_f16_avx+0x23c>
 
 PUBLIC _sk_load_u16_be_avx
 _sk_load_u16_be_avx LABEL PROC
@@ -8377,7 +8493,7 @@ _sk_load_u16_be_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,253,0,0,0                    ; jne           4490 <_sk_load_u16_be_avx+0x113>
+  DB  15,133,253,0,0,0                    ; jne           45e0 <_sk_load_u16_be_avx+0x113>
   DB  196,65,121,16,4,64                  ; vmovupd       (%r8,%rax,2),%xmm8
   DB  196,193,121,16,84,64,16             ; vmovupd       0x10(%r8,%rax,2),%xmm2
   DB  196,193,121,16,92,64,32             ; vmovupd       0x20(%r8,%rax,2),%xmm3
@@ -8399,7 +8515,7 @@ _sk_load_u16_be_avx LABEL PROC
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,29,216,19,0,0         ; vbroadcastss  0x13d8(%rip),%ymm11        # 57dc <_sk_callback_avx+0x3f1>
+  DB  196,98,125,24,29,236,19,0,0         ; vbroadcastss  0x13ec(%rip),%ymm11        # 5940 <_sk_callback_avx+0x405>
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,177,109,202                     ; vpunpckhqdq   %xmm2,%xmm9,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -8433,29 +8549,29 @@ _sk_load_u16_be_avx LABEL PROC
   DB  196,65,123,16,4,64                  ; vmovsd        (%r8,%rax,2),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,85                              ; je            44f6 <_sk_load_u16_be_avx+0x179>
+  DB  116,85                              ; je            4646 <_sk_load_u16_be_avx+0x179>
   DB  196,65,57,22,68,64,8                ; vmovhpd       0x8(%r8,%rax,2),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,72                              ; jb            44f6 <_sk_load_u16_be_avx+0x179>
+  DB  114,72                              ; jb            4646 <_sk_load_u16_be_avx+0x179>
   DB  196,193,123,16,84,64,16             ; vmovsd        0x10(%r8,%rax,2),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,72                              ; je            4503 <_sk_load_u16_be_avx+0x186>
+  DB  116,72                              ; je            4653 <_sk_load_u16_be_avx+0x186>
   DB  196,193,105,22,84,64,24             ; vmovhpd       0x18(%r8,%rax,2),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,59                              ; jb            4503 <_sk_load_u16_be_avx+0x186>
+  DB  114,59                              ; jb            4653 <_sk_load_u16_be_avx+0x186>
   DB  196,193,123,16,92,64,32             ; vmovsd        0x20(%r8,%rax,2),%xmm3
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,213,254,255,255              ; je            43ae <_sk_load_u16_be_avx+0x31>
+  DB  15,132,213,254,255,255              ; je            44fe <_sk_load_u16_be_avx+0x31>
   DB  196,193,97,22,92,64,40              ; vmovhpd       0x28(%r8,%rax,2),%xmm3,%xmm3
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,196,254,255,255              ; jb            43ae <_sk_load_u16_be_avx+0x31>
+  DB  15,130,196,254,255,255              ; jb            44fe <_sk_load_u16_be_avx+0x31>
   DB  196,65,122,126,76,64,48             ; vmovq         0x30(%r8,%rax,2),%xmm9
-  DB  233,184,254,255,255                 ; jmpq          43ae <_sk_load_u16_be_avx+0x31>
+  DB  233,184,254,255,255                 ; jmpq          44fe <_sk_load_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,171,254,255,255                 ; jmpq          43ae <_sk_load_u16_be_avx+0x31>
+  DB  233,171,254,255,255                 ; jmpq          44fe <_sk_load_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
-  DB  233,162,254,255,255                 ; jmpq          43ae <_sk_load_u16_be_avx+0x31>
+  DB  233,162,254,255,255                 ; jmpq          44fe <_sk_load_u16_be_avx+0x31>
 
 PUBLIC _sk_load_rgb_u16_be_avx
 _sk_load_rgb_u16_be_avx LABEL PROC
@@ -8463,7 +8579,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,127                        ; lea           (%rdi,%rdi,2),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,243,0,0,0                    ; jne           4611 <_sk_load_rgb_u16_be_avx+0x105>
+  DB  15,133,243,0,0,0                    ; jne           4761 <_sk_load_rgb_u16_be_avx+0x105>
   DB  196,193,122,111,4,64                ; vmovdqu       (%r8,%rax,2),%xmm0
   DB  196,193,122,111,84,64,12            ; vmovdqu       0xc(%r8,%rax,2),%xmm2
   DB  196,193,122,111,76,64,24            ; vmovdqu       0x18(%r8,%rax,2),%xmm1
@@ -8490,7 +8606,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,29,56,18,0,0          ; vbroadcastss  0x1238(%rip),%ymm11        # 57e0 <_sk_callback_avx+0x3f5>
+  DB  196,98,125,24,29,76,18,0,0          ; vbroadcastss  0x124c(%rip),%ymm11        # 5944 <_sk_callback_avx+0x409>
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,185,109,202                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -8511,48 +8627,48 @@ _sk_load_rgb_u16_be_avx LABEL PROC
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,211                  ; vmulps        %ymm11,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,213,17,0,0        ; vbroadcastss  0x11d5(%rip),%ymm3        # 57e4 <_sk_callback_avx+0x3f9>
+  DB  196,226,125,24,29,233,17,0,0        ; vbroadcastss  0x11e9(%rip),%ymm3        # 5948 <_sk_callback_avx+0x40d>
   DB  255,224                             ; jmpq          *%rax
   DB  196,193,121,110,4,64                ; vmovd         (%r8,%rax,2),%xmm0
   DB  196,193,121,196,68,64,4,2           ; vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  117,5                               ; jne           462a <_sk_load_rgb_u16_be_avx+0x11e>
-  DB  233,40,255,255,255                  ; jmpq          4552 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  117,5                               ; jne           477a <_sk_load_rgb_u16_be_avx+0x11e>
+  DB  233,40,255,255,255                  ; jmpq          46a2 <_sk_load_rgb_u16_be_avx+0x46>
   DB  196,193,121,110,76,64,6             ; vmovd         0x6(%r8,%rax,2),%xmm1
   DB  196,65,113,196,68,64,10,2           ; vpinsrw       $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,26                              ; jb            4659 <_sk_load_rgb_u16_be_avx+0x14d>
+  DB  114,26                              ; jb            47a9 <_sk_load_rgb_u16_be_avx+0x14d>
   DB  196,193,121,110,76,64,12            ; vmovd         0xc(%r8,%rax,2),%xmm1
   DB  196,193,113,196,84,64,16,2          ; vpinsrw       $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  117,10                              ; jne           465e <_sk_load_rgb_u16_be_avx+0x152>
-  DB  233,249,254,255,255                 ; jmpq          4552 <_sk_load_rgb_u16_be_avx+0x46>
-  DB  233,244,254,255,255                 ; jmpq          4552 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           47ae <_sk_load_rgb_u16_be_avx+0x152>
+  DB  233,249,254,255,255                 ; jmpq          46a2 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,244,254,255,255                 ; jmpq          46a2 <_sk_load_rgb_u16_be_avx+0x46>
   DB  196,193,121,110,76,64,18            ; vmovd         0x12(%r8,%rax,2),%xmm1
   DB  196,65,113,196,76,64,22,2           ; vpinsrw       $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,26                              ; jb            468d <_sk_load_rgb_u16_be_avx+0x181>
+  DB  114,26                              ; jb            47dd <_sk_load_rgb_u16_be_avx+0x181>
   DB  196,193,121,110,76,64,24            ; vmovd         0x18(%r8,%rax,2),%xmm1
   DB  196,193,113,196,76,64,28,2          ; vpinsrw       $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  117,10                              ; jne           4692 <_sk_load_rgb_u16_be_avx+0x186>
-  DB  233,197,254,255,255                 ; jmpq          4552 <_sk_load_rgb_u16_be_avx+0x46>
-  DB  233,192,254,255,255                 ; jmpq          4552 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           47e2 <_sk_load_rgb_u16_be_avx+0x186>
+  DB  233,197,254,255,255                 ; jmpq          46a2 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,192,254,255,255                 ; jmpq          46a2 <_sk_load_rgb_u16_be_avx+0x46>
   DB  196,193,121,110,92,64,30            ; vmovd         0x1e(%r8,%rax,2),%xmm3
   DB  196,65,97,196,92,64,34,2            ; vpinsrw       $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,20                              ; jb            46bb <_sk_load_rgb_u16_be_avx+0x1af>
+  DB  114,20                              ; jb            480b <_sk_load_rgb_u16_be_avx+0x1af>
   DB  196,193,121,110,92,64,36            ; vmovd         0x24(%r8,%rax,2),%xmm3
   DB  196,193,97,196,92,64,40,2           ; vpinsrw       $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
-  DB  233,151,254,255,255                 ; jmpq          4552 <_sk_load_rgb_u16_be_avx+0x46>
-  DB  233,146,254,255,255                 ; jmpq          4552 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,151,254,255,255                 ; jmpq          46a2 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,146,254,255,255                 ; jmpq          46a2 <_sk_load_rgb_u16_be_avx+0x46>
 
 PUBLIC _sk_store_u16_be_avx
 _sk_store_u16_be_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
-  DB  196,98,125,24,5,18,17,0,0           ; vbroadcastss  0x1112(%rip),%ymm8        # 57e8 <_sk_callback_avx+0x3fd>
+  DB  196,98,125,24,5,38,17,0,0           ; vbroadcastss  0x1126(%rip),%ymm8        # 594c <_sk_callback_avx+0x411>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,67,125,25,202,1                 ; vextractf128  $0x1,%ymm9,%xmm10
@@ -8590,7 +8706,7 @@ _sk_store_u16_be_avx LABEL PROC
   DB  196,65,17,98,200                    ; vpunpckldq    %xmm8,%xmm13,%xmm9
   DB  196,65,17,106,192                   ; vpunpckhdq    %xmm8,%xmm13,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,31                              ; jne           47ba <_sk_store_u16_be_avx+0xfa>
+  DB  117,31                              ; jne           490a <_sk_store_u16_be_avx+0xfa>
   DB  196,65,120,17,28,64                 ; vmovups       %xmm11,(%r8,%rax,2)
   DB  196,65,120,17,84,64,16              ; vmovups       %xmm10,0x10(%r8,%rax,2)
   DB  196,65,120,17,76,64,32              ; vmovups       %xmm9,0x20(%r8,%rax,2)
@@ -8599,31 +8715,31 @@ _sk_store_u16_be_avx LABEL PROC
   DB  255,224                             ; jmpq          *%rax
   DB  196,65,121,214,28,64                ; vmovq         %xmm11,(%r8,%rax,2)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,240                             ; je            47b6 <_sk_store_u16_be_avx+0xf6>
+  DB  116,240                             ; je            4906 <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,23,92,64,8               ; vmovhpd       %xmm11,0x8(%r8,%rax,2)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,227                             ; jb            47b6 <_sk_store_u16_be_avx+0xf6>
+  DB  114,227                             ; jb            4906 <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,214,84,64,16             ; vmovq         %xmm10,0x10(%r8,%rax,2)
-  DB  116,218                             ; je            47b6 <_sk_store_u16_be_avx+0xf6>
+  DB  116,218                             ; je            4906 <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,23,84,64,24              ; vmovhpd       %xmm10,0x18(%r8,%rax,2)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,205                             ; jb            47b6 <_sk_store_u16_be_avx+0xf6>
+  DB  114,205                             ; jb            4906 <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,214,76,64,32             ; vmovq         %xmm9,0x20(%r8,%rax,2)
-  DB  116,196                             ; je            47b6 <_sk_store_u16_be_avx+0xf6>
+  DB  116,196                             ; je            4906 <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,23,76,64,40              ; vmovhpd       %xmm9,0x28(%r8,%rax,2)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,183                             ; jb            47b6 <_sk_store_u16_be_avx+0xf6>
+  DB  114,183                             ; jb            4906 <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,214,68,64,48             ; vmovq         %xmm8,0x30(%r8,%rax,2)
-  DB  235,174                             ; jmp           47b6 <_sk_store_u16_be_avx+0xf6>
+  DB  235,174                             ; jmp           4906 <_sk_store_u16_be_avx+0xf6>
 
 PUBLIC _sk_load_f32_avx
 _sk_load_f32_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  119,110                             ; ja            487e <_sk_load_f32_avx+0x76>
+  DB  119,110                             ; ja            49ce <_sk_load_f32_avx+0x76>
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
-  DB  76,141,21,134,0,0,0                 ; lea           0x86(%rip),%r10        # 48a8 <_sk_load_f32_avx+0xa0>
+  DB  76,141,21,134,0,0,0                 ; lea           0x86(%rip),%r10        # 49f8 <_sk_load_f32_avx+0xa0>
   DB  73,99,4,138                         ; movslq        (%r10,%rcx,4),%rax
   DB  76,1,208                            ; add           %r10,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8680,7 +8796,7 @@ _sk_store_f32_avx LABEL PROC
   DB  196,65,37,20,196                    ; vunpcklpd     %ymm12,%ymm11,%ymm8
   DB  196,65,37,21,220                    ; vunpckhpd     %ymm12,%ymm11,%ymm11
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,55                              ; jne           4935 <_sk_store_f32_avx+0x6d>
+  DB  117,55                              ; jne           4a85 <_sk_store_f32_avx+0x6d>
   DB  196,67,45,24,225,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
   DB  196,67,61,24,235,1                  ; vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
   DB  196,67,45,6,201,49                  ; vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
@@ -8693,22 +8809,22 @@ _sk_store_f32_avx LABEL PROC
   DB  255,224                             ; jmpq          *%rax
   DB  196,65,121,17,20,128                ; vmovupd       %xmm10,(%r8,%rax,4)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,240                             ; je            4931 <_sk_store_f32_avx+0x69>
+  DB  116,240                             ; je            4a81 <_sk_store_f32_avx+0x69>
   DB  196,65,121,17,76,128,16             ; vmovupd       %xmm9,0x10(%r8,%rax,4)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,227                             ; jb            4931 <_sk_store_f32_avx+0x69>
+  DB  114,227                             ; jb            4a81 <_sk_store_f32_avx+0x69>
   DB  196,65,121,17,68,128,32             ; vmovupd       %xmm8,0x20(%r8,%rax,4)
-  DB  116,218                             ; je            4931 <_sk_store_f32_avx+0x69>
+  DB  116,218                             ; je            4a81 <_sk_store_f32_avx+0x69>
   DB  196,65,121,17,92,128,48             ; vmovupd       %xmm11,0x30(%r8,%rax,4)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,205                             ; jb            4931 <_sk_store_f32_avx+0x69>
+  DB  114,205                             ; jb            4a81 <_sk_store_f32_avx+0x69>
   DB  196,67,125,25,84,128,64,1           ; vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
-  DB  116,195                             ; je            4931 <_sk_store_f32_avx+0x69>
+  DB  116,195                             ; je            4a81 <_sk_store_f32_avx+0x69>
   DB  196,67,125,25,76,128,80,1           ; vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,181                             ; jb            4931 <_sk_store_f32_avx+0x69>
+  DB  114,181                             ; jb            4a81 <_sk_store_f32_avx+0x69>
   DB  196,67,125,25,68,128,96,1           ; vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
-  DB  235,171                             ; jmp           4931 <_sk_store_f32_avx+0x69>
+  DB  235,171                             ; jmp           4a81 <_sk_store_f32_avx+0x69>
 
 PUBLIC _sk_clamp_x_avx
 _sk_clamp_x_avx LABEL PROC
@@ -8830,12 +8946,12 @@ _sk_mirror_y_avx LABEL PROC
 
 PUBLIC _sk_luminance_to_alpha_avx
 _sk_luminance_to_alpha_avx LABEL PROC
-  DB  196,226,125,24,29,155,12,0,0        ; vbroadcastss  0xc9b(%rip),%ymm3        # 57ec <_sk_callback_avx+0x401>
+  DB  196,226,125,24,29,175,12,0,0        ; vbroadcastss  0xcaf(%rip),%ymm3        # 5950 <_sk_callback_avx+0x415>
   DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
-  DB  196,226,125,24,29,146,12,0,0        ; vbroadcastss  0xc92(%rip),%ymm3        # 57f0 <_sk_callback_avx+0x405>
+  DB  196,226,125,24,29,166,12,0,0        ; vbroadcastss  0xca6(%rip),%ymm3        # 5954 <_sk_callback_avx+0x419>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,133,12,0,0        ; vbroadcastss  0xc85(%rip),%ymm1        # 57f4 <_sk_callback_avx+0x409>
+  DB  196,226,125,24,13,153,12,0,0        ; vbroadcastss  0xc99(%rip),%ymm1        # 5958 <_sk_callback_avx+0x41d>
   DB  197,236,89,201                      ; vmulps        %ymm1,%ymm2,%ymm1
   DB  197,252,88,217                      ; vaddps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -9003,7 +9119,7 @@ _sk_linear_gradient_avx LABEL PROC
   DB  196,226,125,24,88,28                ; vbroadcastss  0x1c(%rax),%ymm3
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  77,133,192                          ; test          %r8,%r8
-  DB  15,132,146,0,0,0                    ; je            4ec5 <_sk_linear_gradient_avx+0xb8>
+  DB  15,132,146,0,0,0                    ; je            5015 <_sk_linear_gradient_avx+0xb8>
   DB  72,139,64,8                         ; mov           0x8(%rax),%rax
   DB  72,131,192,32                       ; add           $0x20,%rax
   DB  196,65,28,87,228                    ; vxorps        %ymm12,%ymm12,%ymm12
@@ -9030,8 +9146,8 @@ _sk_linear_gradient_avx LABEL PROC
   DB  196,227,13,74,219,208               ; vblendvps     %ymm13,%ymm3,%ymm14,%ymm3
   DB  72,131,192,36                       ; add           $0x24,%rax
   DB  73,255,200                          ; dec           %r8
-  DB  117,140                             ; jne           4e4f <_sk_linear_gradient_avx+0x42>
-  DB  235,20                              ; jmp           4ed9 <_sk_linear_gradient_avx+0xcc>
+  DB  117,140                             ; jne           4f9f <_sk_linear_gradient_avx+0x42>
+  DB  235,20                              ; jmp           5029 <_sk_linear_gradient_avx+0xcc>
   DB  196,65,36,87,219                    ; vxorps        %ymm11,%ymm11,%ymm11
   DB  196,65,44,87,210                    ; vxorps        %ymm10,%ymm10,%ymm10
   DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
@@ -9082,27 +9198,27 @@ _sk_xy_to_polar_unit_avx LABEL PROC
   DB  196,65,52,95,226                    ; vmaxps        %ymm10,%ymm9,%ymm12
   DB  196,65,36,94,220                    ; vdivps        %ymm12,%ymm11,%ymm11
   DB  196,65,36,89,227                    ; vmulps        %ymm11,%ymm11,%ymm12
-  DB  196,98,125,24,45,106,8,0,0          ; vbroadcastss  0x86a(%rip),%ymm13        # 57f8 <_sk_callback_avx+0x40d>
+  DB  196,98,125,24,45,126,8,0,0          ; vbroadcastss  0x87e(%rip),%ymm13        # 595c <_sk_callback_avx+0x421>
   DB  196,65,28,89,237                    ; vmulps        %ymm13,%ymm12,%ymm13
-  DB  196,98,125,24,53,96,8,0,0           ; vbroadcastss  0x860(%rip),%ymm14        # 57fc <_sk_callback_avx+0x411>
+  DB  196,98,125,24,53,116,8,0,0          ; vbroadcastss  0x874(%rip),%ymm14        # 5960 <_sk_callback_avx+0x425>
   DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
   DB  196,65,28,89,237                    ; vmulps        %ymm13,%ymm12,%ymm13
-  DB  196,98,125,24,53,81,8,0,0           ; vbroadcastss  0x851(%rip),%ymm14        # 5800 <_sk_callback_avx+0x415>
+  DB  196,98,125,24,53,101,8,0,0          ; vbroadcastss  0x865(%rip),%ymm14        # 5964 <_sk_callback_avx+0x429>
   DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
   DB  196,65,28,89,229                    ; vmulps        %ymm13,%ymm12,%ymm12
-  DB  196,98,125,24,45,66,8,0,0           ; vbroadcastss  0x842(%rip),%ymm13        # 5804 <_sk_callback_avx+0x419>
+  DB  196,98,125,24,45,86,8,0,0           ; vbroadcastss  0x856(%rip),%ymm13        # 5968 <_sk_callback_avx+0x42d>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
   DB  196,65,52,194,202,1                 ; vcmpltps      %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,45,8,0,0           ; vbroadcastss  0x82d(%rip),%ymm10        # 5808 <_sk_callback_avx+0x41d>
+  DB  196,98,125,24,21,65,8,0,0           ; vbroadcastss  0x841(%rip),%ymm10        # 596c <_sk_callback_avx+0x431>
   DB  196,65,44,92,211                    ; vsubps        %ymm11,%ymm10,%ymm10
   DB  196,67,37,74,202,144                ; vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   DB  196,193,124,194,192,1               ; vcmpltps      %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,21,23,8,0,0           ; vbroadcastss  0x817(%rip),%ymm10        # 580c <_sk_callback_avx+0x421>
+  DB  196,98,125,24,21,43,8,0,0           ; vbroadcastss  0x82b(%rip),%ymm10        # 5970 <_sk_callback_avx+0x435>
   DB  196,65,44,92,209                    ; vsubps        %ymm9,%ymm10,%ymm10
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  196,65,116,194,200,1                ; vcmpltps      %ymm8,%ymm1,%ymm9
-  DB  196,98,125,24,21,1,8,0,0            ; vbroadcastss  0x801(%rip),%ymm10        # 5810 <_sk_callback_avx+0x425>
+  DB  196,98,125,24,21,21,8,0,0           ; vbroadcastss  0x815(%rip),%ymm10        # 5974 <_sk_callback_avx+0x439>
   DB  197,44,92,208                       ; vsubps        %ymm0,%ymm10,%ymm10
   DB  196,195,125,74,194,144              ; vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   DB  196,65,124,194,200,3                ; vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -9113,7 +9229,7 @@ _sk_xy_to_polar_unit_avx LABEL PROC
 PUBLIC _sk_save_xy_avx
 _sk_save_xy_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,224,7,0,0           ; vbroadcastss  0x7e0(%rip),%ymm8        # 5814 <_sk_callback_avx+0x429>
+  DB  196,98,125,24,5,244,7,0,0           ; vbroadcastss  0x7f4(%rip),%ymm8        # 5978 <_sk_callback_avx+0x43d>
   DB  196,65,124,88,200                   ; vaddps        %ymm8,%ymm0,%ymm9
   DB  196,67,125,8,209,1                  ; vroundps      $0x1,%ymm9,%ymm10
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
@@ -9146,9 +9262,9 @@ _sk_accumulate_avx LABEL PROC
 PUBLIC _sk_bilinear_nx_avx
 _sk_bilinear_nx_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,108,7,0,0          ; vbroadcastss  0x76c(%rip),%ymm0        # 5818 <_sk_callback_avx+0x42d>
+  DB  196,226,125,24,5,128,7,0,0          ; vbroadcastss  0x780(%rip),%ymm0        # 597c <_sk_callback_avx+0x441>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,99,7,0,0            ; vbroadcastss  0x763(%rip),%ymm8        # 581c <_sk_callback_avx+0x431>
+  DB  196,98,125,24,5,119,7,0,0           ; vbroadcastss  0x777(%rip),%ymm8        # 5980 <_sk_callback_avx+0x445>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -9157,7 +9273,7 @@ _sk_bilinear_nx_avx LABEL PROC
 PUBLIC _sk_bilinear_px_avx
 _sk_bilinear_px_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,75,7,0,0           ; vbroadcastss  0x74b(%rip),%ymm0        # 5820 <_sk_callback_avx+0x435>
+  DB  196,226,125,24,5,95,7,0,0           ; vbroadcastss  0x75f(%rip),%ymm0        # 5984 <_sk_callback_avx+0x449>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -9167,9 +9283,9 @@ _sk_bilinear_px_avx LABEL PROC
 PUBLIC _sk_bilinear_ny_avx
 _sk_bilinear_ny_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,47,7,0,0          ; vbroadcastss  0x72f(%rip),%ymm1        # 5824 <_sk_callback_avx+0x439>
+  DB  196,226,125,24,13,67,7,0,0          ; vbroadcastss  0x743(%rip),%ymm1        # 5988 <_sk_callback_avx+0x44d>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,37,7,0,0            ; vbroadcastss  0x725(%rip),%ymm8        # 5828 <_sk_callback_avx+0x43d>
+  DB  196,98,125,24,5,57,7,0,0            ; vbroadcastss  0x739(%rip),%ymm8        # 598c <_sk_callback_avx+0x451>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -9178,7 +9294,7 @@ _sk_bilinear_ny_avx LABEL PROC
 PUBLIC _sk_bilinear_py_avx
 _sk_bilinear_py_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,13,7,0,0          ; vbroadcastss  0x70d(%rip),%ymm1        # 582c <_sk_callback_avx+0x441>
+  DB  196,226,125,24,13,33,7,0,0          ; vbroadcastss  0x721(%rip),%ymm1        # 5990 <_sk_callback_avx+0x455>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -9188,14 +9304,14 @@ _sk_bilinear_py_avx LABEL PROC
 PUBLIC _sk_bicubic_n3x_avx
 _sk_bicubic_n3x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,240,6,0,0          ; vbroadcastss  0x6f0(%rip),%ymm0        # 5830 <_sk_callback_avx+0x445>
+  DB  196,226,125,24,5,4,7,0,0            ; vbroadcastss  0x704(%rip),%ymm0        # 5994 <_sk_callback_avx+0x459>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,231,6,0,0           ; vbroadcastss  0x6e7(%rip),%ymm8        # 5834 <_sk_callback_avx+0x449>
+  DB  196,98,125,24,5,251,6,0,0           ; vbroadcastss  0x6fb(%rip),%ymm8        # 5998 <_sk_callback_avx+0x45d>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,216,6,0,0          ; vbroadcastss  0x6d8(%rip),%ymm10        # 5838 <_sk_callback_avx+0x44d>
+  DB  196,98,125,24,21,236,6,0,0          ; vbroadcastss  0x6ec(%rip),%ymm10        # 599c <_sk_callback_avx+0x461>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,206,6,0,0          ; vbroadcastss  0x6ce(%rip),%ymm10        # 583c <_sk_callback_avx+0x451>
+  DB  196,98,125,24,21,226,6,0,0          ; vbroadcastss  0x6e2(%rip),%ymm10        # 59a0 <_sk_callback_avx+0x465>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -9205,19 +9321,19 @@ _sk_bicubic_n3x_avx LABEL PROC
 PUBLIC _sk_bicubic_n1x_avx
 _sk_bicubic_n1x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,177,6,0,0          ; vbroadcastss  0x6b1(%rip),%ymm0        # 5840 <_sk_callback_avx+0x455>
+  DB  196,226,125,24,5,197,6,0,0          ; vbroadcastss  0x6c5(%rip),%ymm0        # 59a4 <_sk_callback_avx+0x469>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,168,6,0,0           ; vbroadcastss  0x6a8(%rip),%ymm8        # 5844 <_sk_callback_avx+0x459>
+  DB  196,98,125,24,5,188,6,0,0           ; vbroadcastss  0x6bc(%rip),%ymm8        # 59a8 <_sk_callback_avx+0x46d>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,158,6,0,0          ; vbroadcastss  0x69e(%rip),%ymm9        # 5848 <_sk_callback_avx+0x45d>
+  DB  196,98,125,24,13,178,6,0,0          ; vbroadcastss  0x6b2(%rip),%ymm9        # 59ac <_sk_callback_avx+0x471>
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,148,6,0,0          ; vbroadcastss  0x694(%rip),%ymm10        # 584c <_sk_callback_avx+0x461>
+  DB  196,98,125,24,21,168,6,0,0          ; vbroadcastss  0x6a8(%rip),%ymm10        # 59b0 <_sk_callback_avx+0x475>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,133,6,0,0          ; vbroadcastss  0x685(%rip),%ymm10        # 5850 <_sk_callback_avx+0x465>
+  DB  196,98,125,24,21,153,6,0,0          ; vbroadcastss  0x699(%rip),%ymm10        # 59b4 <_sk_callback_avx+0x479>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,118,6,0,0          ; vbroadcastss  0x676(%rip),%ymm9        # 5854 <_sk_callback_avx+0x469>
+  DB  196,98,125,24,13,138,6,0,0          ; vbroadcastss  0x68a(%rip),%ymm9        # 59b8 <_sk_callback_avx+0x47d>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -9226,17 +9342,17 @@ _sk_bicubic_n1x_avx LABEL PROC
 PUBLIC _sk_bicubic_p1x_avx
 _sk_bicubic_p1x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,94,6,0,0            ; vbroadcastss  0x65e(%rip),%ymm8        # 5858 <_sk_callback_avx+0x46d>
+  DB  196,98,125,24,5,114,6,0,0           ; vbroadcastss  0x672(%rip),%ymm8        # 59bc <_sk_callback_avx+0x481>
   DB  197,188,88,0                        ; vaddps        (%rax),%ymm8,%ymm0
   DB  197,124,16,72,64                    ; vmovups       0x40(%rax),%ymm9
-  DB  196,98,125,24,21,80,6,0,0           ; vbroadcastss  0x650(%rip),%ymm10        # 585c <_sk_callback_avx+0x471>
+  DB  196,98,125,24,21,100,6,0,0          ; vbroadcastss  0x664(%rip),%ymm10        # 59c0 <_sk_callback_avx+0x485>
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
-  DB  196,98,125,24,29,70,6,0,0           ; vbroadcastss  0x646(%rip),%ymm11        # 5860 <_sk_callback_avx+0x475>
+  DB  196,98,125,24,29,90,6,0,0           ; vbroadcastss  0x65a(%rip),%ymm11        # 59c4 <_sk_callback_avx+0x489>
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
   DB  196,65,44,88,192                    ; vaddps        %ymm8,%ymm10,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
-  DB  196,98,125,24,13,45,6,0,0           ; vbroadcastss  0x62d(%rip),%ymm9        # 5864 <_sk_callback_avx+0x479>
+  DB  196,98,125,24,13,65,6,0,0           ; vbroadcastss  0x641(%rip),%ymm9        # 59c8 <_sk_callback_avx+0x48d>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -9245,13 +9361,13 @@ _sk_bicubic_p1x_avx LABEL PROC
 PUBLIC _sk_bicubic_p3x_avx
 _sk_bicubic_p3x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,21,6,0,0           ; vbroadcastss  0x615(%rip),%ymm0        # 5868 <_sk_callback_avx+0x47d>
+  DB  196,226,125,24,5,41,6,0,0           ; vbroadcastss  0x629(%rip),%ymm0        # 59cc <_sk_callback_avx+0x491>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,2,6,0,0            ; vbroadcastss  0x602(%rip),%ymm10        # 586c <_sk_callback_avx+0x481>
+  DB  196,98,125,24,21,22,6,0,0           ; vbroadcastss  0x616(%rip),%ymm10        # 59d0 <_sk_callback_avx+0x495>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,248,5,0,0          ; vbroadcastss  0x5f8(%rip),%ymm10        # 5870 <_sk_callback_avx+0x485>
+  DB  196,98,125,24,21,12,6,0,0           ; vbroadcastss  0x60c(%rip),%ymm10        # 59d4 <_sk_callback_avx+0x499>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -9261,14 +9377,14 @@ _sk_bicubic_p3x_avx LABEL PROC
 PUBLIC _sk_bicubic_n3y_avx
 _sk_bicubic_n3y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,219,5,0,0         ; vbroadcastss  0x5db(%rip),%ymm1        # 5874 <_sk_callback_avx+0x489>
+  DB  196,226,125,24,13,239,5,0,0         ; vbroadcastss  0x5ef(%rip),%ymm1        # 59d8 <_sk_callback_avx+0x49d>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,209,5,0,0           ; vbroadcastss  0x5d1(%rip),%ymm8        # 5878 <_sk_callback_avx+0x48d>
+  DB  196,98,125,24,5,229,5,0,0           ; vbroadcastss  0x5e5(%rip),%ymm8        # 59dc <_sk_callback_avx+0x4a1>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,194,5,0,0          ; vbroadcastss  0x5c2(%rip),%ymm10        # 587c <_sk_callback_avx+0x491>
+  DB  196,98,125,24,21,214,5,0,0          ; vbroadcastss  0x5d6(%rip),%ymm10        # 59e0 <_sk_callback_avx+0x4a5>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,184,5,0,0          ; vbroadcastss  0x5b8(%rip),%ymm10        # 5880 <_sk_callback_avx+0x495>
+  DB  196,98,125,24,21,204,5,0,0          ; vbroadcastss  0x5cc(%rip),%ymm10        # 59e4 <_sk_callback_avx+0x4a9>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -9278,19 +9394,19 @@ _sk_bicubic_n3y_avx LABEL PROC
 PUBLIC _sk_bicubic_n1y_avx
 _sk_bicubic_n1y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,155,5,0,0         ; vbroadcastss  0x59b(%rip),%ymm1        # 5884 <_sk_callback_avx+0x499>
+  DB  196,226,125,24,13,175,5,0,0         ; vbroadcastss  0x5af(%rip),%ymm1        # 59e8 <_sk_callback_avx+0x4ad>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,145,5,0,0           ; vbroadcastss  0x591(%rip),%ymm8        # 5888 <_sk_callback_avx+0x49d>
+  DB  196,98,125,24,5,165,5,0,0           ; vbroadcastss  0x5a5(%rip),%ymm8        # 59ec <_sk_callback_avx+0x4b1>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,135,5,0,0          ; vbroadcastss  0x587(%rip),%ymm9        # 588c <_sk_callback_avx+0x4a1>
+  DB  196,98,125,24,13,155,5,0,0          ; vbroadcastss  0x59b(%rip),%ymm9        # 59f0 <_sk_callback_avx+0x4b5>
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,125,5,0,0          ; vbroadcastss  0x57d(%rip),%ymm10        # 5890 <_sk_callback_avx+0x4a5>
+  DB  196,98,125,24,21,145,5,0,0          ; vbroadcastss  0x591(%rip),%ymm10        # 59f4 <_sk_callback_avx+0x4b9>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,110,5,0,0          ; vbroadcastss  0x56e(%rip),%ymm10        # 5894 <_sk_callback_avx+0x4a9>
+  DB  196,98,125,24,21,130,5,0,0          ; vbroadcastss  0x582(%rip),%ymm10        # 59f8 <_sk_callback_avx+0x4bd>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,95,5,0,0           ; vbroadcastss  0x55f(%rip),%ymm9        # 5898 <_sk_callback_avx+0x4ad>
+  DB  196,98,125,24,13,115,5,0,0          ; vbroadcastss  0x573(%rip),%ymm9        # 59fc <_sk_callback_avx+0x4c1>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -9299,17 +9415,17 @@ _sk_bicubic_n1y_avx LABEL PROC
 PUBLIC _sk_bicubic_p1y_avx
 _sk_bicubic_p1y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,71,5,0,0            ; vbroadcastss  0x547(%rip),%ymm8        # 589c <_sk_callback_avx+0x4b1>
+  DB  196,98,125,24,5,91,5,0,0            ; vbroadcastss  0x55b(%rip),%ymm8        # 5a00 <_sk_callback_avx+0x4c5>
   DB  197,188,88,72,32                    ; vaddps        0x20(%rax),%ymm8,%ymm1
   DB  197,124,16,72,96                    ; vmovups       0x60(%rax),%ymm9
-  DB  196,98,125,24,21,56,5,0,0           ; vbroadcastss  0x538(%rip),%ymm10        # 58a0 <_sk_callback_avx+0x4b5>
+  DB  196,98,125,24,21,76,5,0,0           ; vbroadcastss  0x54c(%rip),%ymm10        # 5a04 <_sk_callback_avx+0x4c9>
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
-  DB  196,98,125,24,29,46,5,0,0           ; vbroadcastss  0x52e(%rip),%ymm11        # 58a4 <_sk_callback_avx+0x4b9>
+  DB  196,98,125,24,29,66,5,0,0           ; vbroadcastss  0x542(%rip),%ymm11        # 5a08 <_sk_callback_avx+0x4cd>
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
   DB  196,65,44,88,192                    ; vaddps        %ymm8,%ymm10,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
-  DB  196,98,125,24,13,21,5,0,0           ; vbroadcastss  0x515(%rip),%ymm9        # 58a8 <_sk_callback_avx+0x4bd>
+  DB  196,98,125,24,13,41,5,0,0           ; vbroadcastss  0x529(%rip),%ymm9        # 5a0c <_sk_callback_avx+0x4d1>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -9318,13 +9434,13 @@ _sk_bicubic_p1y_avx LABEL PROC
 PUBLIC _sk_bicubic_p3y_avx
 _sk_bicubic_p3y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,253,4,0,0         ; vbroadcastss  0x4fd(%rip),%ymm1        # 58ac <_sk_callback_avx+0x4c1>
+  DB  196,226,125,24,13,17,5,0,0          ; vbroadcastss  0x511(%rip),%ymm1        # 5a10 <_sk_callback_avx+0x4d5>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,233,4,0,0          ; vbroadcastss  0x4e9(%rip),%ymm10        # 58b0 <_sk_callback_avx+0x4c5>
+  DB  196,98,125,24,21,253,4,0,0          ; vbroadcastss  0x4fd(%rip),%ymm10        # 5a14 <_sk_callback_avx+0x4d9>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,223,4,0,0          ; vbroadcastss  0x4df(%rip),%ymm10        # 58b4 <_sk_callback_avx+0x4c9>
+  DB  196,98,125,24,21,243,4,0,0          ; vbroadcastss  0x4f3(%rip),%ymm10        # 5a18 <_sk_callback_avx+0x4dd>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -9401,9 +9517,17 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  128,63,0                            ; cmpb          $0x0,(%rdi)
-  DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
-  DB  63                                  ; (bad)
+  DB  128,63,1                            ; cmpb          $0x1,(%rdi)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,4,0                               ; add           %al,(%rax,%rax,1)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  2,0                                 ; add           (%rax),%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  33,8                                ; and           %ecx,(%rax)
+  DB  130                                 ; (bad)
+  DB  60,0                                ; cmp           $0x0,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  191,0,0,128,63                      ; mov           $0x3f800000,%edi
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -9425,11 +9549,13 @@ ALIGN 4
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
   DB  63                                  ; (bad)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  128,191,0,0,224,64,0                ; cmpb          $0x0,0x40e00000(%rdi)
+  DB  128,63,0                            ; cmpb          $0x0,(%rdi)
+  DB  0,128,191,0,0,224                   ; add           %al,-0x1fffff41(%rax)
+  DB  64,0,0                              ; add           %al,(%rax)
+  DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
   DB  63                                  ; (bad)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  128,63,145                          ; cmpb          $0x91,(%rdi)
+  DB  145                                 ; xchg          %eax,%ecx
   DB  131,158,61,154,153,153,62           ; sbbl          $0x3e,-0x666665c3(%rsi)
   DB  92                                  ; pop           %rsp
   DB  143                                 ; (bad)
@@ -9473,7 +9599,7 @@ ALIGN 4
   DB  190,129,128,128,59                  ; mov           $0x3b808081,%esi
   DB  129,128,128,59,0,248,0,0,8,33       ; addl          $0x21080000,-0x7ffc480(%rax)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        55dd <.literal4+0xd9>
+  DB  224,7                               ; loopne        5741 <.literal4+0xed>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -9487,10 +9613,10 @@ ALIGN 4
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
   DB  0,52,255                            ; add           %dh,(%rdi,%rdi,8)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            5608 <.literal4+0x104>
+  DB  127,0                               ; jg            576c <.literal4+0x118>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            5681 <.literal4+0x17d>
+  DB  119,115                             ; ja            57e5 <.literal4+0x191>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -9504,10 +9630,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            563c <.literal4+0x138>
+  DB  127,0                               ; jg            57a0 <.literal4+0x14c>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            56b5 <.literal4+0x1b1>
+  DB  119,115                             ; ja            5819 <.literal4+0x1c5>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -9521,10 +9647,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            5670 <.literal4+0x16c>
+  DB  127,0                               ; jg            57d4 <.literal4+0x180>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            56e9 <.literal4+0x1e5>
+  DB  119,115                             ; ja            584d <.literal4+0x1f9>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -9538,10 +9664,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            56a4 <.literal4+0x1a0>
+  DB  127,0                               ; jg            5808 <.literal4+0x1b4>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            571d <.literal4+0x219>
+  DB  119,115                             ; ja            5881 <.literal4+0x22d>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -9554,7 +9680,7 @@ ALIGN 4
   DB  0,75,0                              ; add           %cl,0x0(%rbx)
   DB  0,128,63,0,0,200                    ; add           %al,-0x37ffffc1(%rax)
   DB  66,0,0                              ; rex.X         add %al,(%rax)
-  DB  127,67                              ; jg            571b <.literal4+0x217>
+  DB  127,67                              ; jg            587f <.literal4+0x22b>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,195                               ; add           %al,%bl
   DB  0,0                                 ; add           %al,(%rax)
@@ -9566,10 +9692,10 @@ ALIGN 4
   DB  190,80,128,3,62                     ; mov           $0x3e038050,%esi
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           573b <.literal4+0x237>
+  DB  118,63                              ; jbe           589f <.literal4+0x24b>
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            574f <.literal4+0x24b>
+  DB  127,67                              ; jg            58b3 <.literal4+0x25f>
   DB  129,128,128,59,0,0,128,63,129,128   ; addl          $0x80813f80,0x3b80(%rax)
   DB  128,59,0                            ; cmpb          $0x0,(%rbx)
   DB  0,128,63,129,128,128                ; add           %al,-0x7f7f7ec1(%rax)
@@ -9578,7 +9704,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        5731 <.literal4+0x22d>
+  DB  224,7                               ; loopne        5895 <.literal4+0x241>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -9590,7 +9716,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        574d <.literal4+0x249>
+  DB  224,7                               ; loopne        58b1 <.literal4+0x25d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -9601,7 +9727,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  248                                 ; clc
   DB  65,0,0                              ; add           %al,(%r8)
-  DB  124,66                              ; jl            57a2 <.literal4+0x29e>
+  DB  124,66                              ; jl            5906 <.literal4+0x2b2>
   DB  0,240                               ; add           %dh,%al
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,55,0,15                 ; mov           %ecx,0xf003788(%rax)
@@ -9619,9 +9745,9 @@ ALIGN 4
   DB  137,136,136,59,15,0                 ; mov           %ecx,0xf3b88(%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,61,0,0                  ; mov           %ecx,0x3d88(%rax)
-  DB  112,65                              ; jo            57e5 <.literal4+0x2e1>
+  DB  112,65                              ; jo            5949 <.literal4+0x2f5>
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            57f3 <.literal4+0x2ef>
+  DB  127,67                              ; jg            5957 <.literal4+0x303>
   DB  0,128,0,0,0,0                       ; add           %al,0x0(%rax)
   DB  0,128,0,4,0,128                     ; add           %al,-0x7ffffc00(%rax)
   DB  0,0                                 ; add           %al,(%rax)
@@ -9637,7 +9763,7 @@ ALIGN 4
   DB  0,128,55,0,0,128                    ; add           %al,-0x7fffffc9(%rax)
   DB  63                                  ; (bad)
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            5833 <.literal4+0x32f>
+  DB  127,71                              ; jg            5997 <.literal4+0x343>
   DB  208                                 ; (bad)
   DB  179,89                              ; mov           $0x59,%bl
   DB  62,89                               ; ds            pop %rcx
@@ -9885,7 +10011,7 @@ _sk_seed_shader_sse41 LABEL PROC
   DB  102,15,110,199                      ; movd          %edi,%xmm0
   DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
   DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
-  DB  15,40,21,81,57,0,0                  ; movaps        0x3951(%rip),%xmm2        # 3a60 <_sk_callback_sse41+0xb2>
+  DB  15,40,21,49,58,0,0                  ; movaps        0x3a31(%rip),%xmm2        # 3b40 <_sk_callback_sse41+0xab>
   DB  15,88,202                           ; addps         %xmm2,%xmm1
   DB  15,16,2                             ; movups        (%rdx),%xmm0
   DB  15,88,193                           ; addps         %xmm1,%xmm0
@@ -9894,7 +10020,7 @@ _sk_seed_shader_sse41 LABEL PROC
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  15,88,202                           ; addps         %xmm2,%xmm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,21,64,57,0,0                  ; movaps        0x3940(%rip),%xmm2        # 3a70 <_sk_callback_sse41+0xc2>
+  DB  15,40,21,32,58,0,0                  ; movaps        0x3a20(%rip),%xmm2        # 3b50 <_sk_callback_sse41+0xbb>
   DB  15,87,219                           ; xorps         %xmm3,%xmm3
   DB  15,87,228                           ; xorps         %xmm4,%xmm4
   DB  15,87,237                           ; xorps         %xmm5,%xmm5
@@ -9902,6 +10028,54 @@ _sk_seed_shader_sse41 LABEL PROC
   DB  15,87,255                           ; xorps         %xmm7,%xmm7
   DB  255,224                             ; jmpq          *%rax
 
+PUBLIC _sk_dither_sse41
+_sk_dither_sse41 LABEL PROC
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  102,68,15,110,199                   ; movd          %edi,%xmm8
+  DB  102,69,15,112,192,0                 ; pshufd        $0x0,%xmm8,%xmm8
+  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
+  DB  68,15,16,10                         ; movups        (%rdx),%xmm9
+  DB  69,15,88,200                        ; addps         %xmm8,%xmm9
+  DB  243,69,15,91,201                    ; cvttps2dq     %xmm9,%xmm9
+  DB  72,139,8                            ; mov           (%rax),%rcx
+  DB  102,68,15,110,1                     ; movd          (%rcx),%xmm8
+  DB  102,69,15,112,192,0                 ; pshufd        $0x0,%xmm8,%xmm8
+  DB  102,69,15,239,193                   ; pxor          %xmm9,%xmm8
+  DB  102,68,15,111,21,229,57,0,0         ; movdqa        0x39e5(%rip),%xmm10        # 3b60 <_sk_callback_sse41+0xcb>
+  DB  102,69,15,111,216                   ; movdqa        %xmm8,%xmm11
+  DB  102,69,15,219,218                   ; pand          %xmm10,%xmm11
+  DB  102,65,15,114,243,5                 ; pslld         $0x5,%xmm11
+  DB  102,69,15,219,209                   ; pand          %xmm9,%xmm10
+  DB  102,65,15,114,242,4                 ; pslld         $0x4,%xmm10
+  DB  102,68,15,111,37,209,57,0,0         ; movdqa        0x39d1(%rip),%xmm12        # 3b70 <_sk_callback_sse41+0xdb>
+  DB  102,68,15,111,45,216,57,0,0         ; movdqa        0x39d8(%rip),%xmm13        # 3b80 <_sk_callback_sse41+0xeb>
+  DB  102,69,15,111,240                   ; movdqa        %xmm8,%xmm14
+  DB  102,69,15,219,245                   ; pand          %xmm13,%xmm14
+  DB  102,65,15,114,246,2                 ; pslld         $0x2,%xmm14
+  DB  102,69,15,219,233                   ; pand          %xmm9,%xmm13
+  DB  102,69,15,254,237                   ; paddd         %xmm13,%xmm13
+  DB  102,69,15,219,196                   ; pand          %xmm12,%xmm8
+  DB  102,65,15,114,208,1                 ; psrld         $0x1,%xmm8
+  DB  102,69,15,219,204                   ; pand          %xmm12,%xmm9
+  DB  102,65,15,114,209,2                 ; psrld         $0x2,%xmm9
+  DB  102,69,15,235,234                   ; por           %xmm10,%xmm13
+  DB  102,69,15,235,233                   ; por           %xmm9,%xmm13
+  DB  102,69,15,235,243                   ; por           %xmm11,%xmm14
+  DB  102,69,15,235,245                   ; por           %xmm13,%xmm14
+  DB  102,69,15,235,240                   ; por           %xmm8,%xmm14
+  DB  69,15,91,198                        ; cvtdq2ps      %xmm14,%xmm8
+  DB  68,15,89,5,147,57,0,0               ; mulps         0x3993(%rip),%xmm8        # 3b90 <_sk_callback_sse41+0xfb>
+  DB  68,15,88,5,155,57,0,0               ; addps         0x399b(%rip),%xmm8        # 3ba0 <_sk_callback_sse41+0x10b>
+  DB  243,68,15,16,72,8                   ; movss         0x8(%rax),%xmm9
+  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
+  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
+  DB  68,15,89,203                        ; mulps         %xmm3,%xmm9
+  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
+  DB  65,15,88,201                        ; addps         %xmm9,%xmm1
+  DB  65,15,88,209                        ; addps         %xmm9,%xmm2
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  255,224                             ; jmpq          *%rax
+
 PUBLIC _sk_constant_color_sse41
 _sk_constant_color_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -9928,7 +10102,7 @@ _sk_clear_sse41 LABEL PROC
 PUBLIC _sk_srcatop_sse41
 _sk_srcatop_sse41 LABEL PROC
   DB  15,89,199                           ; mulps         %xmm7,%xmm0
-  DB  68,15,40,5,251,56,0,0               ; movaps        0x38fb(%rip),%xmm8        # 3a80 <_sk_callback_sse41+0xd2>
+  DB  68,15,40,5,68,57,0,0                ; movaps        0x3944(%rip),%xmm8        # 3bb0 <_sk_callback_sse41+0x11b>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
@@ -9951,7 +10125,7 @@ PUBLIC _sk_dstatop_sse41
 _sk_dstatop_sse41 LABEL PROC
   DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
   DB  68,15,89,196                        ; mulps         %xmm4,%xmm8
-  DB  68,15,40,13,190,56,0,0              ; movaps        0x38be(%rip),%xmm9        # 3a90 <_sk_callback_sse41+0xe2>
+  DB  68,15,40,13,7,57,0,0                ; movaps        0x3907(%rip),%xmm9        # 3bc0 <_sk_callback_sse41+0x12b>
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  65,15,88,192                        ; addps         %xmm8,%xmm0
@@ -9992,7 +10166,7 @@ _sk_dstin_sse41 LABEL PROC
 
 PUBLIC _sk_srcout_sse41
 _sk_srcout_sse41 LABEL PROC
-  DB  68,15,40,5,98,56,0,0                ; movaps        0x3862(%rip),%xmm8        # 3aa0 <_sk_callback_sse41+0xf2>
+  DB  68,15,40,5,171,56,0,0               ; movaps        0x38ab(%rip),%xmm8        # 3bd0 <_sk_callback_sse41+0x13b>
   DB  68,15,92,199                        ; subps         %xmm7,%xmm8
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
@@ -10003,7 +10177,7 @@ _sk_srcout_sse41 LABEL PROC
 
 PUBLIC _sk_dstout_sse41
 _sk_dstout_sse41 LABEL PROC
-  DB  68,15,40,5,82,56,0,0                ; movaps        0x3852(%rip),%xmm8        # 3ab0 <_sk_callback_sse41+0x102>
+  DB  68,15,40,5,155,56,0,0               ; movaps        0x389b(%rip),%xmm8        # 3be0 <_sk_callback_sse41+0x14b>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  15,89,196                           ; mulps         %xmm4,%xmm0
@@ -10018,7 +10192,7 @@ _sk_dstout_sse41 LABEL PROC
 
 PUBLIC _sk_srcover_sse41
 _sk_srcover_sse41 LABEL PROC
-  DB  68,15,40,5,53,56,0,0                ; movaps        0x3835(%rip),%xmm8        # 3ac0 <_sk_callback_sse41+0x112>
+  DB  68,15,40,5,126,56,0,0               ; movaps        0x387e(%rip),%xmm8        # 3bf0 <_sk_callback_sse41+0x15b>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
@@ -10036,7 +10210,7 @@ _sk_srcover_sse41 LABEL PROC
 
 PUBLIC _sk_dstover_sse41
 _sk_dstover_sse41 LABEL PROC
-  DB  68,15,40,5,9,56,0,0                 ; movaps        0x3809(%rip),%xmm8        # 3ad0 <_sk_callback_sse41+0x122>
+  DB  68,15,40,5,82,56,0,0                ; movaps        0x3852(%rip),%xmm8        # 3c00 <_sk_callback_sse41+0x16b>
   DB  68,15,92,199                        ; subps         %xmm7,%xmm8
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -10060,7 +10234,7 @@ _sk_modulate_sse41 LABEL PROC
 
 PUBLIC _sk_multiply_sse41
 _sk_multiply_sse41 LABEL PROC
-  DB  68,15,40,5,221,55,0,0               ; movaps        0x37dd(%rip),%xmm8        # 3ae0 <_sk_callback_sse41+0x132>
+  DB  68,15,40,5,38,56,0,0                ; movaps        0x3826(%rip),%xmm8        # 3c10 <_sk_callback_sse41+0x17b>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
@@ -10130,7 +10304,7 @@ _sk_screen_sse41 LABEL PROC
 PUBLIC _sk_xor__sse41
 _sk_xor__sse41 LABEL PROC
   DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
-  DB  15,40,29,14,55,0,0                  ; movaps        0x370e(%rip),%xmm3        # 3af0 <_sk_callback_sse41+0x142>
+  DB  15,40,29,87,55,0,0                  ; movaps        0x3757(%rip),%xmm3        # 3c20 <_sk_callback_sse41+0x18b>
   DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
@@ -10176,7 +10350,7 @@ _sk_darken_sse41 LABEL PROC
   DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
   DB  65,15,95,209                        ; maxps         %xmm9,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,121,54,0,0                 ; movaps        0x3679(%rip),%xmm2        # 3b00 <_sk_callback_sse41+0x152>
+  DB  15,40,21,194,54,0,0                 ; movaps        0x36c2(%rip),%xmm2        # 3c30 <_sk_callback_sse41+0x19b>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -10208,7 +10382,7 @@ _sk_lighten_sse41 LABEL PROC
   DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
   DB  65,15,93,209                        ; minps         %xmm9,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,30,54,0,0                  ; movaps        0x361e(%rip),%xmm2        # 3b10 <_sk_callback_sse41+0x162>
+  DB  15,40,21,103,54,0,0                 ; movaps        0x3667(%rip),%xmm2        # 3c40 <_sk_callback_sse41+0x1ab>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -10243,7 +10417,7 @@ _sk_difference_sse41 LABEL PROC
   DB  65,15,93,209                        ; minps         %xmm9,%xmm2
   DB  15,88,210                           ; addps         %xmm2,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,184,53,0,0                 ; movaps        0x35b8(%rip),%xmm2        # 3b20 <_sk_callback_sse41+0x172>
+  DB  15,40,21,1,54,0,0                   ; movaps        0x3601(%rip),%xmm2        # 3c50 <_sk_callback_sse41+0x1bb>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -10268,7 +10442,7 @@ _sk_exclusion_sse41 LABEL PROC
   DB  15,89,214                           ; mulps         %xmm6,%xmm2
   DB  15,88,210                           ; addps         %xmm2,%xmm2
   DB  68,15,92,202                        ; subps         %xmm2,%xmm9
-  DB  15,40,13,121,53,0,0                 ; movaps        0x3579(%rip),%xmm1        # 3b30 <_sk_callback_sse41+0x182>
+  DB  15,40,13,194,53,0,0                 ; movaps        0x35c2(%rip),%xmm1        # 3c60 <_sk_callback_sse41+0x1cb>
   DB  15,92,203                           ; subps         %xmm3,%xmm1
   DB  15,89,207                           ; mulps         %xmm7,%xmm1
   DB  15,88,217                           ; addps         %xmm1,%xmm3
@@ -10280,7 +10454,7 @@ _sk_exclusion_sse41 LABEL PROC
 PUBLIC _sk_colorburn_sse41
 _sk_colorburn_sse41 LABEL PROC
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
-  DB  68,15,40,21,104,53,0,0              ; movaps        0x3568(%rip),%xmm10        # 3b40 <_sk_callback_sse41+0x192>
+  DB  68,15,40,21,177,53,0,0              ; movaps        0x35b1(%rip),%xmm10        # 3c70 <_sk_callback_sse41+0x1db>
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
   DB  68,15,92,223                        ; subps         %xmm7,%xmm11
   DB  69,15,40,203                        ; movaps        %xmm11,%xmm9
@@ -10360,7 +10534,7 @@ _sk_colorburn_sse41 LABEL PROC
 PUBLIC _sk_colordodge_sse41
 _sk_colordodge_sse41 LABEL PROC
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
-  DB  68,15,40,21,70,52,0,0               ; movaps        0x3446(%rip),%xmm10        # 3b50 <_sk_callback_sse41+0x1a2>
+  DB  68,15,40,21,143,52,0,0              ; movaps        0x348f(%rip),%xmm10        # 3c80 <_sk_callback_sse41+0x1eb>
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
   DB  68,15,92,223                        ; subps         %xmm7,%xmm11
   DB  69,15,40,227                        ; movaps        %xmm11,%xmm12
@@ -10441,7 +10615,7 @@ _sk_hardlight_sse41 LABEL PROC
   DB  15,40,244                           ; movaps        %xmm4,%xmm6
   DB  15,40,227                           ; movaps        %xmm3,%xmm4
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
-  DB  68,15,40,21,28,51,0,0               ; movaps        0x331c(%rip),%xmm10        # 3b60 <_sk_callback_sse41+0x1b2>
+  DB  68,15,40,21,101,51,0,0              ; movaps        0x3365(%rip),%xmm10        # 3c90 <_sk_callback_sse41+0x1fb>
   DB  65,15,40,234                        ; movaps        %xmm10,%xmm5
   DB  15,92,239                           ; subps         %xmm7,%xmm5
   DB  15,40,197                           ; movaps        %xmm5,%xmm0
@@ -10523,7 +10697,7 @@ PUBLIC _sk_overlay_sse41
 _sk_overlay_sse41 LABEL PROC
   DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
   DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
-  DB  68,15,40,21,254,49,0,0              ; movaps        0x31fe(%rip),%xmm10        # 3b70 <_sk_callback_sse41+0x1c2>
+  DB  68,15,40,21,71,50,0,0               ; movaps        0x3247(%rip),%xmm10        # 3ca0 <_sk_callback_sse41+0x20b>
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
   DB  68,15,92,223                        ; subps         %xmm7,%xmm11
   DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
@@ -10607,7 +10781,7 @@ _sk_softlight_sse41 LABEL PROC
   DB  15,40,198                           ; movaps        %xmm6,%xmm0
   DB  15,94,199                           ; divps         %xmm7,%xmm0
   DB  65,15,84,193                        ; andps         %xmm9,%xmm0
-  DB  15,40,13,209,48,0,0                 ; movaps        0x30d1(%rip),%xmm1        # 3b80 <_sk_callback_sse41+0x1d2>
+  DB  15,40,13,26,49,0,0                  ; movaps        0x311a(%rip),%xmm1        # 3cb0 <_sk_callback_sse41+0x21b>
   DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
   DB  68,15,92,208                        ; subps         %xmm0,%xmm10
   DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
@@ -10620,10 +10794,10 @@ _sk_softlight_sse41 LABEL PROC
   DB  15,40,208                           ; movaps        %xmm0,%xmm2
   DB  15,89,210                           ; mulps         %xmm2,%xmm2
   DB  15,88,208                           ; addps         %xmm0,%xmm2
-  DB  68,15,40,45,175,48,0,0              ; movaps        0x30af(%rip),%xmm13        # 3b90 <_sk_callback_sse41+0x1e2>
+  DB  68,15,40,45,248,48,0,0              ; movaps        0x30f8(%rip),%xmm13        # 3cc0 <_sk_callback_sse41+0x22b>
   DB  69,15,88,245                        ; addps         %xmm13,%xmm14
   DB  68,15,89,242                        ; mulps         %xmm2,%xmm14
-  DB  68,15,40,37,175,48,0,0              ; movaps        0x30af(%rip),%xmm12        # 3ba0 <_sk_callback_sse41+0x1f2>
+  DB  68,15,40,37,248,48,0,0              ; movaps        0x30f8(%rip),%xmm12        # 3cd0 <_sk_callback_sse41+0x23b>
   DB  69,15,89,252                        ; mulps         %xmm12,%xmm15
   DB  69,15,88,254                        ; addps         %xmm14,%xmm15
   DB  15,40,198                           ; movaps        %xmm6,%xmm0
@@ -10768,7 +10942,7 @@ _sk_clamp_0_sse41 LABEL PROC
 
 PUBLIC _sk_clamp_1_sse41
 _sk_clamp_1_sse41 LABEL PROC
-  DB  68,15,40,5,191,46,0,0               ; movaps        0x2ebf(%rip),%xmm8        # 3bb0 <_sk_callback_sse41+0x202>
+  DB  68,15,40,5,8,47,0,0                 ; movaps        0x2f08(%rip),%xmm8        # 3ce0 <_sk_callback_sse41+0x24b>
   DB  65,15,93,192                        ; minps         %xmm8,%xmm0
   DB  65,15,93,200                        ; minps         %xmm8,%xmm1
   DB  65,15,93,208                        ; minps         %xmm8,%xmm2
@@ -10778,7 +10952,7 @@ _sk_clamp_1_sse41 LABEL PROC
 
 PUBLIC _sk_clamp_a_sse41
 _sk_clamp_a_sse41 LABEL PROC
-  DB  15,93,29,180,46,0,0                 ; minps         0x2eb4(%rip),%xmm3        # 3bc0 <_sk_callback_sse41+0x212>
+  DB  15,93,29,253,46,0,0                 ; minps         0x2efd(%rip),%xmm3        # 3cf0 <_sk_callback_sse41+0x25b>
   DB  15,93,195                           ; minps         %xmm3,%xmm0
   DB  15,93,203                           ; minps         %xmm3,%xmm1
   DB  15,93,211                           ; minps         %xmm3,%xmm2
@@ -10851,7 +11025,7 @@ _sk_premul_sse41 LABEL PROC
 PUBLIC _sk_unpremul_sse41
 _sk_unpremul_sse41 LABEL PROC
   DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
-  DB  68,15,40,13,31,46,0,0               ; movaps        0x2e1f(%rip),%xmm9        # 3bd0 <_sk_callback_sse41+0x222>
+  DB  68,15,40,13,104,46,0,0              ; movaps        0x2e68(%rip),%xmm9        # 3d00 <_sk_callback_sse41+0x26b>
   DB  68,15,94,203                        ; divps         %xmm3,%xmm9
   DB  68,15,194,195,4                     ; cmpneqps      %xmm3,%xmm8
   DB  69,15,84,193                        ; andps         %xmm9,%xmm8
@@ -10863,20 +11037,20 @@ _sk_unpremul_sse41 LABEL PROC
 
 PUBLIC _sk_from_srgb_sse41
 _sk_from_srgb_sse41 LABEL PROC
-  DB  68,15,40,29,10,46,0,0               ; movaps        0x2e0a(%rip),%xmm11        # 3be0 <_sk_callback_sse41+0x232>
+  DB  68,15,40,29,83,46,0,0               ; movaps        0x2e53(%rip),%xmm11        # 3d10 <_sk_callback_sse41+0x27b>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
   DB  68,15,40,208                        ; movaps        %xmm0,%xmm10
   DB  69,15,89,210                        ; mulps         %xmm10,%xmm10
-  DB  68,15,40,37,2,46,0,0                ; movaps        0x2e02(%rip),%xmm12        # 3bf0 <_sk_callback_sse41+0x242>
+  DB  68,15,40,37,75,46,0,0               ; movaps        0x2e4b(%rip),%xmm12        # 3d20 <_sk_callback_sse41+0x28b>
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
   DB  69,15,89,196                        ; mulps         %xmm12,%xmm8
-  DB  68,15,40,45,2,46,0,0                ; movaps        0x2e02(%rip),%xmm13        # 3c00 <_sk_callback_sse41+0x252>
+  DB  68,15,40,45,75,46,0,0               ; movaps        0x2e4b(%rip),%xmm13        # 3d30 <_sk_callback_sse41+0x29b>
   DB  69,15,88,197                        ; addps         %xmm13,%xmm8
   DB  69,15,89,194                        ; mulps         %xmm10,%xmm8
-  DB  68,15,40,53,2,46,0,0                ; movaps        0x2e02(%rip),%xmm14        # 3c10 <_sk_callback_sse41+0x262>
+  DB  68,15,40,53,75,46,0,0               ; movaps        0x2e4b(%rip),%xmm14        # 3d40 <_sk_callback_sse41+0x2ab>
   DB  69,15,88,198                        ; addps         %xmm14,%xmm8
-  DB  68,15,40,61,6,46,0,0                ; movaps        0x2e06(%rip),%xmm15        # 3c20 <_sk_callback_sse41+0x272>
+  DB  68,15,40,61,79,46,0,0               ; movaps        0x2e4f(%rip),%xmm15        # 3d50 <_sk_callback_sse41+0x2bb>
   DB  65,15,194,199,1                     ; cmpltps       %xmm15,%xmm0
   DB  102,69,15,56,20,193                 ; blendvps      %xmm0,%xmm9,%xmm8
   DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
@@ -10920,20 +11094,20 @@ _sk_to_srgb_sse41 LABEL PROC
   DB  68,15,82,192                        ; rsqrtps       %xmm0,%xmm8
   DB  69,15,83,200                        ; rcpps         %xmm8,%xmm9
   DB  69,15,82,208                        ; rsqrtps       %xmm8,%xmm10
-  DB  68,15,40,29,115,45,0,0              ; movaps        0x2d73(%rip),%xmm11        # 3c30 <_sk_callback_sse41+0x282>
+  DB  68,15,40,29,188,45,0,0              ; movaps        0x2dbc(%rip),%xmm11        # 3d60 <_sk_callback_sse41+0x2cb>
   DB  15,40,200                           ; movaps        %xmm0,%xmm1
   DB  65,15,89,203                        ; mulps         %xmm11,%xmm1
-  DB  68,15,40,37,116,45,0,0              ; movaps        0x2d74(%rip),%xmm12        # 3c40 <_sk_callback_sse41+0x292>
+  DB  68,15,40,37,189,45,0,0              ; movaps        0x2dbd(%rip),%xmm12        # 3d70 <_sk_callback_sse41+0x2db>
   DB  69,15,89,204                        ; mulps         %xmm12,%xmm9
-  DB  68,15,40,45,120,45,0,0              ; movaps        0x2d78(%rip),%xmm13        # 3c50 <_sk_callback_sse41+0x2a2>
+  DB  68,15,40,45,193,45,0,0              ; movaps        0x2dc1(%rip),%xmm13        # 3d80 <_sk_callback_sse41+0x2eb>
   DB  69,15,88,205                        ; addps         %xmm13,%xmm9
-  DB  68,15,40,53,124,45,0,0              ; movaps        0x2d7c(%rip),%xmm14        # 3c60 <_sk_callback_sse41+0x2b2>
+  DB  68,15,40,53,197,45,0,0              ; movaps        0x2dc5(%rip),%xmm14        # 3d90 <_sk_callback_sse41+0x2fb>
   DB  69,15,89,214                        ; mulps         %xmm14,%xmm10
   DB  69,15,88,209                        ; addps         %xmm9,%xmm10
-  DB  68,15,40,5,124,45,0,0               ; movaps        0x2d7c(%rip),%xmm8        # 3c70 <_sk_callback_sse41+0x2c2>
+  DB  68,15,40,5,197,45,0,0               ; movaps        0x2dc5(%rip),%xmm8        # 3da0 <_sk_callback_sse41+0x30b>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,93,202                        ; minps         %xmm10,%xmm9
-  DB  68,15,40,61,124,45,0,0              ; movaps        0x2d7c(%rip),%xmm15        # 3c80 <_sk_callback_sse41+0x2d2>
+  DB  68,15,40,61,197,45,0,0              ; movaps        0x2dc5(%rip),%xmm15        # 3db0 <_sk_callback_sse41+0x31b>
   DB  65,15,194,199,1                     ; cmpltps       %xmm15,%xmm0
   DB  102,68,15,56,20,201                 ; blendvps      %xmm0,%xmm1,%xmm9
   DB  15,82,194                           ; rsqrtps       %xmm2,%xmm0
@@ -10986,7 +11160,7 @@ _sk_rgb_to_hsl_sse41 LABEL PROC
   DB  68,15,93,226                        ; minps         %xmm2,%xmm12
   DB  65,15,40,203                        ; movaps        %xmm11,%xmm1
   DB  65,15,92,204                        ; subps         %xmm12,%xmm1
-  DB  68,15,40,53,202,44,0,0              ; movaps        0x2cca(%rip),%xmm14        # 3c90 <_sk_callback_sse41+0x2e2>
+  DB  68,15,40,53,19,45,0,0               ; movaps        0x2d13(%rip),%xmm14        # 3dc0 <_sk_callback_sse41+0x32b>
   DB  68,15,94,241                        ; divps         %xmm1,%xmm14
   DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
   DB  69,15,194,208,0                     ; cmpeqps       %xmm8,%xmm10
@@ -10995,27 +11169,27 @@ _sk_rgb_to_hsl_sse41 LABEL PROC
   DB  65,15,89,198                        ; mulps         %xmm14,%xmm0
   DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
   DB  68,15,194,250,1                     ; cmpltps       %xmm2,%xmm15
-  DB  68,15,84,61,177,44,0,0              ; andps         0x2cb1(%rip),%xmm15        # 3ca0 <_sk_callback_sse41+0x2f2>
+  DB  68,15,84,61,250,44,0,0              ; andps         0x2cfa(%rip),%xmm15        # 3dd0 <_sk_callback_sse41+0x33b>
   DB  68,15,88,248                        ; addps         %xmm0,%xmm15
   DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
   DB  65,15,194,193,0                     ; cmpeqps       %xmm9,%xmm0
   DB  65,15,92,208                        ; subps         %xmm8,%xmm2
   DB  65,15,89,214                        ; mulps         %xmm14,%xmm2
-  DB  68,15,40,45,164,44,0,0              ; movaps        0x2ca4(%rip),%xmm13        # 3cb0 <_sk_callback_sse41+0x302>
+  DB  68,15,40,45,237,44,0,0              ; movaps        0x2ced(%rip),%xmm13        # 3de0 <_sk_callback_sse41+0x34b>
   DB  65,15,88,213                        ; addps         %xmm13,%xmm2
   DB  69,15,92,193                        ; subps         %xmm9,%xmm8
   DB  69,15,89,198                        ; mulps         %xmm14,%xmm8
-  DB  68,15,88,5,160,44,0,0               ; addps         0x2ca0(%rip),%xmm8        # 3cc0 <_sk_callback_sse41+0x312>
+  DB  68,15,88,5,233,44,0,0               ; addps         0x2ce9(%rip),%xmm8        # 3df0 <_sk_callback_sse41+0x35b>
   DB  102,68,15,56,20,194                 ; blendvps      %xmm0,%xmm2,%xmm8
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  102,69,15,56,20,199                 ; blendvps      %xmm0,%xmm15,%xmm8
-  DB  68,15,89,5,152,44,0,0               ; mulps         0x2c98(%rip),%xmm8        # 3cd0 <_sk_callback_sse41+0x322>
+  DB  68,15,89,5,225,44,0,0               ; mulps         0x2ce1(%rip),%xmm8        # 3e00 <_sk_callback_sse41+0x36b>
   DB  69,15,40,203                        ; movaps        %xmm11,%xmm9
   DB  69,15,194,204,4                     ; cmpneqps      %xmm12,%xmm9
   DB  69,15,84,193                        ; andps         %xmm9,%xmm8
   DB  69,15,92,235                        ; subps         %xmm11,%xmm13
   DB  69,15,88,220                        ; addps         %xmm12,%xmm11
-  DB  15,40,5,140,44,0,0                  ; movaps        0x2c8c(%rip),%xmm0        # 3ce0 <_sk_callback_sse41+0x332>
+  DB  15,40,5,213,44,0,0                  ; movaps        0x2cd5(%rip),%xmm0        # 3e10 <_sk_callback_sse41+0x37b>
   DB  65,15,40,211                        ; movaps        %xmm11,%xmm2
   DB  15,89,208                           ; mulps         %xmm0,%xmm2
   DB  15,194,194,1                        ; cmpltps       %xmm2,%xmm0
@@ -11036,7 +11210,7 @@ _sk_hsl_to_rgb_sse41 LABEL PROC
   DB  15,41,100,36,32                     ; movaps        %xmm4,0x20(%rsp)
   DB  15,41,92,36,16                      ; movaps        %xmm3,0x10(%rsp)
   DB  68,15,40,208                        ; movaps        %xmm0,%xmm10
-  DB  68,15,40,13,78,44,0,0               ; movaps        0x2c4e(%rip),%xmm9        # 3cf0 <_sk_callback_sse41+0x342>
+  DB  68,15,40,13,151,44,0,0              ; movaps        0x2c97(%rip),%xmm9        # 3e20 <_sk_callback_sse41+0x38b>
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  15,194,194,2                        ; cmpleps       %xmm2,%xmm0
   DB  15,40,217                           ; movaps        %xmm1,%xmm3
@@ -11049,19 +11223,19 @@ _sk_hsl_to_rgb_sse41 LABEL PROC
   DB  15,41,20,36                         ; movaps        %xmm2,(%rsp)
   DB  69,15,88,192                        ; addps         %xmm8,%xmm8
   DB  68,15,92,197                        ; subps         %xmm5,%xmm8
-  DB  68,15,40,53,42,44,0,0               ; movaps        0x2c2a(%rip),%xmm14        # 3d00 <_sk_callback_sse41+0x352>
+  DB  68,15,40,53,115,44,0,0              ; movaps        0x2c73(%rip),%xmm14        # 3e30 <_sk_callback_sse41+0x39b>
   DB  69,15,88,242                        ; addps         %xmm10,%xmm14
   DB  102,65,15,58,8,198,1                ; roundps       $0x1,%xmm14,%xmm0
   DB  68,15,92,240                        ; subps         %xmm0,%xmm14
-  DB  68,15,40,29,35,44,0,0               ; movaps        0x2c23(%rip),%xmm11        # 3d10 <_sk_callback_sse41+0x362>
+  DB  68,15,40,29,108,44,0,0              ; movaps        0x2c6c(%rip),%xmm11        # 3e40 <_sk_callback_sse41+0x3ab>
   DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
   DB  65,15,194,198,2                     ; cmpleps       %xmm14,%xmm0
   DB  15,40,245                           ; movaps        %xmm5,%xmm6
   DB  65,15,92,240                        ; subps         %xmm8,%xmm6
-  DB  15,40,61,28,44,0,0                  ; movaps        0x2c1c(%rip),%xmm7        # 3d20 <_sk_callback_sse41+0x372>
+  DB  15,40,61,101,44,0,0                 ; movaps        0x2c65(%rip),%xmm7        # 3e50 <_sk_callback_sse41+0x3bb>
   DB  69,15,40,238                        ; movaps        %xmm14,%xmm13
   DB  68,15,89,239                        ; mulps         %xmm7,%xmm13
-  DB  15,40,29,29,44,0,0                  ; movaps        0x2c1d(%rip),%xmm3        # 3d30 <_sk_callback_sse41+0x382>
+  DB  15,40,29,102,44,0,0                 ; movaps        0x2c66(%rip),%xmm3        # 3e60 <_sk_callback_sse41+0x3cb>
   DB  68,15,40,227                        ; movaps        %xmm3,%xmm12
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
   DB  68,15,89,230                        ; mulps         %xmm6,%xmm12
@@ -11071,7 +11245,7 @@ _sk_hsl_to_rgb_sse41 LABEL PROC
   DB  65,15,194,198,2                     ; cmpleps       %xmm14,%xmm0
   DB  68,15,40,253                        ; movaps        %xmm5,%xmm15
   DB  102,69,15,56,20,252                 ; blendvps      %xmm0,%xmm12,%xmm15
-  DB  68,15,40,37,252,43,0,0              ; movaps        0x2bfc(%rip),%xmm12        # 3d40 <_sk_callback_sse41+0x392>
+  DB  68,15,40,37,69,44,0,0               ; movaps        0x2c45(%rip),%xmm12        # 3e70 <_sk_callback_sse41+0x3db>
   DB  65,15,40,196                        ; movaps        %xmm12,%xmm0
   DB  65,15,194,198,2                     ; cmpleps       %xmm14,%xmm0
   DB  68,15,89,238                        ; mulps         %xmm6,%xmm13
@@ -11105,7 +11279,7 @@ _sk_hsl_to_rgb_sse41 LABEL PROC
   DB  65,15,40,198                        ; movaps        %xmm14,%xmm0
   DB  15,40,20,36                         ; movaps        (%rsp),%xmm2
   DB  102,15,56,20,202                    ; blendvps      %xmm0,%xmm2,%xmm1
-  DB  68,15,88,21,117,43,0,0              ; addps         0x2b75(%rip),%xmm10        # 3d50 <_sk_callback_sse41+0x3a2>
+  DB  68,15,88,21,190,43,0,0              ; addps         0x2bbe(%rip),%xmm10        # 3e80 <_sk_callback_sse41+0x3eb>
   DB  102,65,15,58,8,194,1                ; roundps       $0x1,%xmm10,%xmm0
   DB  68,15,92,208                        ; subps         %xmm0,%xmm10
   DB  69,15,194,218,2                     ; cmpleps       %xmm10,%xmm11
@@ -11154,7 +11328,7 @@ _sk_scale_u8_sse41 LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,68,15,56,49,4,56                ; pmovzxbd      (%rax,%rdi,1),%xmm8
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,206,42,0,0               ; mulps         0x2ace(%rip),%xmm8        # 3d60 <_sk_callback_sse41+0x3b2>
+  DB  68,15,89,5,23,43,0,0                ; mulps         0x2b17(%rip),%xmm8        # 3e90 <_sk_callback_sse41+0x3fb>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
@@ -11188,7 +11362,7 @@ _sk_lerp_u8_sse41 LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,68,15,56,49,4,56                ; pmovzxbd      (%rax,%rdi,1),%xmm8
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,122,42,0,0               ; mulps         0x2a7a(%rip),%xmm8        # 3d70 <_sk_callback_sse41+0x3c2>
+  DB  68,15,89,5,195,42,0,0               ; mulps         0x2ac3(%rip),%xmm8        # 3ea0 <_sk_callback_sse41+0x40b>
   DB  15,92,196                           ; subps         %xmm4,%xmm0
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -11209,17 +11383,17 @@ _sk_lerp_565_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,68,15,56,51,4,120               ; pmovzxwd      (%rax,%rdi,2),%xmm8
-  DB  102,15,111,29,74,42,0,0             ; movdqa        0x2a4a(%rip),%xmm3        # 3d80 <_sk_callback_sse41+0x3d2>
+  DB  102,15,111,29,147,42,0,0            ; movdqa        0x2a93(%rip),%xmm3        # 3eb0 <_sk_callback_sse41+0x41b>
   DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
   DB  68,15,91,203                        ; cvtdq2ps      %xmm3,%xmm9
-  DB  68,15,89,13,73,42,0,0               ; mulps         0x2a49(%rip),%xmm9        # 3d90 <_sk_callback_sse41+0x3e2>
-  DB  102,15,111,29,81,42,0,0             ; movdqa        0x2a51(%rip),%xmm3        # 3da0 <_sk_callback_sse41+0x3f2>
+  DB  68,15,89,13,146,42,0,0              ; mulps         0x2a92(%rip),%xmm9        # 3ec0 <_sk_callback_sse41+0x42b>
+  DB  102,15,111,29,154,42,0,0            ; movdqa        0x2a9a(%rip),%xmm3        # 3ed0 <_sk_callback_sse41+0x43b>
   DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,82,42,0,0                  ; mulps         0x2a52(%rip),%xmm3        # 3db0 <_sk_callback_sse41+0x402>
-  DB  102,68,15,219,5,89,42,0,0           ; pand          0x2a59(%rip),%xmm8        # 3dc0 <_sk_callback_sse41+0x412>
+  DB  15,89,29,155,42,0,0                 ; mulps         0x2a9b(%rip),%xmm3        # 3ee0 <_sk_callback_sse41+0x44b>
+  DB  102,68,15,219,5,162,42,0,0          ; pand          0x2aa2(%rip),%xmm8        # 3ef0 <_sk_callback_sse41+0x45b>
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,93,42,0,0                ; mulps         0x2a5d(%rip),%xmm8        # 3dd0 <_sk_callback_sse41+0x422>
+  DB  68,15,89,5,166,42,0,0               ; mulps         0x2aa6(%rip),%xmm8        # 3f00 <_sk_callback_sse41+0x46b>
   DB  15,92,196                           ; subps         %xmm4,%xmm0
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -11230,7 +11404,7 @@ _sk_lerp_565_sse41 LABEL PROC
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  15,88,214                           ; addps         %xmm6,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,71,42,0,0                  ; movaps        0x2a47(%rip),%xmm3        # 3de0 <_sk_callback_sse41+0x432>
+  DB  15,40,29,144,42,0,0                 ; movaps        0x2a90(%rip),%xmm3        # 3f10 <_sk_callback_sse41+0x47b>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_load_tables_sse41
@@ -11239,7 +11413,7 @@ _sk_load_tables_sse41 LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,139,72,8                         ; mov           0x8(%rax),%r9
   DB  243,69,15,111,4,184                 ; movdqu        (%r8,%rdi,4),%xmm8
-  DB  102,15,111,5,62,42,0,0              ; movdqa        0x2a3e(%rip),%xmm0        # 3df0 <_sk_callback_sse41+0x442>
+  DB  102,15,111,5,135,42,0,0             ; movdqa        0x2a87(%rip),%xmm0        # 3f20 <_sk_callback_sse41+0x48b>
   DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
   DB  102,73,15,58,22,192,1               ; pextrq        $0x1,%xmm0,%r8
   DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
@@ -11254,7 +11428,7 @@ _sk_load_tables_sse41 LABEL PROC
   DB  102,15,58,33,193,48                 ; insertps      $0x30,%xmm1,%xmm0
   DB  76,139,64,16                        ; mov           0x10(%rax),%r8
   DB  102,65,15,111,200                   ; movdqa        %xmm8,%xmm1
-  DB  102,15,56,0,13,249,41,0,0           ; pshufb        0x29f9(%rip),%xmm1        # 3e00 <_sk_callback_sse41+0x452>
+  DB  102,15,56,0,13,66,42,0,0            ; pshufb        0x2a42(%rip),%xmm1        # 3f30 <_sk_callback_sse41+0x49b>
   DB  102,73,15,58,22,201,1               ; pextrq        $0x1,%xmm1,%r9
   DB  102,72,15,126,201                   ; movq          %xmm1,%rcx
   DB  68,15,182,209                       ; movzbl        %cl,%r10d
@@ -11269,7 +11443,7 @@ _sk_load_tables_sse41 LABEL PROC
   DB  102,15,58,33,202,48                 ; insertps      $0x30,%xmm2,%xmm1
   DB  76,139,64,24                        ; mov           0x18(%rax),%r8
   DB  102,65,15,111,208                   ; movdqa        %xmm8,%xmm2
-  DB  102,15,56,0,21,181,41,0,0           ; pshufb        0x29b5(%rip),%xmm2        # 3e10 <_sk_callback_sse41+0x462>
+  DB  102,15,56,0,21,254,41,0,0           ; pshufb        0x29fe(%rip),%xmm2        # 3f40 <_sk_callback_sse41+0x4ab>
   DB  102,72,15,58,22,209,1               ; pextrq        $0x1,%xmm2,%rcx
   DB  102,72,15,126,208                   ; movq          %xmm2,%rax
   DB  68,15,182,200                       ; movzbl        %al,%r9d
@@ -11284,7 +11458,7 @@ _sk_load_tables_sse41 LABEL PROC
   DB  102,15,58,33,211,48                 ; insertps      $0x30,%xmm3,%xmm2
   DB  102,65,15,114,208,24                ; psrld         $0x18,%xmm8
   DB  65,15,91,216                        ; cvtdq2ps      %xmm8,%xmm3
-  DB  15,89,29,114,41,0,0                 ; mulps         0x2972(%rip),%xmm3        # 3e20 <_sk_callback_sse41+0x472>
+  DB  15,89,29,187,41,0,0                 ; mulps         0x29bb(%rip),%xmm3        # 3f50 <_sk_callback_sse41+0x4bb>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -11301,7 +11475,7 @@ _sk_load_tables_u16_be_sse41 LABEL PROC
   DB  102,65,15,111,201                   ; movdqa        %xmm9,%xmm1
   DB  102,15,97,200                       ; punpcklwd     %xmm0,%xmm1
   DB  102,68,15,105,200                   ; punpckhwd     %xmm0,%xmm9
-  DB  102,68,15,111,5,69,41,0,0           ; movdqa        0x2945(%rip),%xmm8        # 3e30 <_sk_callback_sse41+0x482>
+  DB  102,68,15,111,5,142,41,0,0          ; movdqa        0x298e(%rip),%xmm8        # 3f60 <_sk_callback_sse41+0x4cb>
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
   DB  102,15,56,51,192                    ; pmovzxwd      %xmm0,%xmm0
@@ -11318,7 +11492,7 @@ _sk_load_tables_u16_be_sse41 LABEL PROC
   DB  243,67,15,16,20,8                   ; movss         (%r8,%r9,1),%xmm2
   DB  102,15,58,33,194,48                 ; insertps      $0x30,%xmm2,%xmm0
   DB  76,139,64,16                        ; mov           0x10(%rax),%r8
-  DB  102,15,56,0,13,248,40,0,0           ; pshufb        0x28f8(%rip),%xmm1        # 3e40 <_sk_callback_sse41+0x492>
+  DB  102,15,56,0,13,65,41,0,0            ; pshufb        0x2941(%rip),%xmm1        # 3f70 <_sk_callback_sse41+0x4db>
   DB  102,15,56,51,201                    ; pmovzxwd      %xmm1,%xmm1
   DB  102,73,15,58,22,201,1               ; pextrq        $0x1,%xmm1,%r9
   DB  102,72,15,126,201                   ; movq          %xmm1,%rcx
@@ -11354,7 +11528,7 @@ _sk_load_tables_u16_be_sse41 LABEL PROC
   DB  102,65,15,235,216                   ; por           %xmm8,%xmm3
   DB  102,15,56,51,219                    ; pmovzxwd      %xmm3,%xmm3
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,70,40,0,0                  ; mulps         0x2846(%rip),%xmm3        # 3e50 <_sk_callback_sse41+0x4a2>
+  DB  15,89,29,143,40,0,0                 ; mulps         0x288f(%rip),%xmm3        # 3f80 <_sk_callback_sse41+0x4eb>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -11374,7 +11548,7 @@ _sk_load_tables_rgb_u16_be_sse41 LABEL PROC
   DB  102,68,15,97,200                    ; punpcklwd     %xmm0,%xmm9
   DB  102,15,111,202                      ; movdqa        %xmm2,%xmm1
   DB  102,65,15,97,201                    ; punpcklwd     %xmm9,%xmm1
-  DB  102,68,15,111,5,8,40,0,0            ; movdqa        0x2808(%rip),%xmm8        # 3e60 <_sk_callback_sse41+0x4b2>
+  DB  102,68,15,111,5,81,40,0,0           ; movdqa        0x2851(%rip),%xmm8        # 3f90 <_sk_callback_sse41+0x4fb>
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
   DB  102,15,56,51,192                    ; pmovzxwd      %xmm0,%xmm0
@@ -11391,7 +11565,7 @@ _sk_load_tables_rgb_u16_be_sse41 LABEL PROC
   DB  243,67,15,16,28,8                   ; movss         (%r8,%r9,1),%xmm3
   DB  102,15,58,33,195,48                 ; insertps      $0x30,%xmm3,%xmm0
   DB  76,139,64,16                        ; mov           0x10(%rax),%r8
-  DB  102,15,56,0,13,187,39,0,0           ; pshufb        0x27bb(%rip),%xmm1        # 3e70 <_sk_callback_sse41+0x4c2>
+  DB  102,15,56,0,13,4,40,0,0             ; pshufb        0x2804(%rip),%xmm1        # 3fa0 <_sk_callback_sse41+0x50b>
   DB  102,15,56,51,201                    ; pmovzxwd      %xmm1,%xmm1
   DB  102,73,15,58,22,201,1               ; pextrq        $0x1,%xmm1,%r9
   DB  102,72,15,126,201                   ; movq          %xmm1,%rcx
@@ -11422,7 +11596,7 @@ _sk_load_tables_rgb_u16_be_sse41 LABEL PROC
   DB  243,65,15,16,28,8                   ; movss         (%r8,%rcx,1),%xmm3
   DB  102,15,58,33,211,48                 ; insertps      $0x30,%xmm3,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,38,39,0,0                  ; movaps        0x2726(%rip),%xmm3        # 3e80 <_sk_callback_sse41+0x4d2>
+  DB  15,40,29,111,39,0,0                 ; movaps        0x276f(%rip),%xmm3        # 3fb0 <_sk_callback_sse41+0x51b>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_byte_tables_sse41
@@ -11430,7 +11604,7 @@ _sk_byte_tables_sse41 LABEL PROC
   DB  65,86                               ; push          %r14
   DB  83                                  ; push          %rbx
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,39,39,0,0                ; movaps        0x2727(%rip),%xmm8        # 3e90 <_sk_callback_sse41+0x4e2>
+  DB  68,15,40,5,112,39,0,0               ; movaps        0x2770(%rip),%xmm8        # 3fc0 <_sk_callback_sse41+0x52b>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,91,192                       ; cvtps2dq      %xmm0,%xmm0
   DB  102,72,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%rcx
@@ -11449,7 +11623,7 @@ _sk_byte_tables_sse41 LABEL PROC
   DB  102,15,58,32,193,3                  ; pinsrb        $0x3,%ecx,%xmm0
   DB  102,15,56,49,192                    ; pmovzxbd      %xmm0,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,13,216,38,0,0              ; movaps        0x26d8(%rip),%xmm9        # 3ea0 <_sk_callback_sse41+0x4f2>
+  DB  68,15,40,13,33,39,0,0               ; movaps        0x2721(%rip),%xmm9        # 3fd0 <_sk_callback_sse41+0x53b>
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,91,201                       ; cvtps2dq      %xmm1,%xmm1
@@ -11538,7 +11712,7 @@ _sk_byte_tables_rgb_sse41 LABEL PROC
   DB  102,15,58,32,193,3                  ; pinsrb        $0x3,%ecx,%xmm0
   DB  102,15,56,49,192                    ; pmovzxbd      %xmm0,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,13,96,37,0,0               ; movaps        0x2560(%rip),%xmm9        # 3eb0 <_sk_callback_sse41+0x502>
+  DB  68,15,40,13,169,37,0,0              ; movaps        0x25a9(%rip),%xmm9        # 3fe0 <_sk_callback_sse41+0x54b>
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,91,201                       ; cvtps2dq      %xmm1,%xmm1
@@ -11705,31 +11879,31 @@ _sk_parametric_r_sse41 LABEL PROC
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,91,194                        ; cvtdq2ps      %xmm10,%xmm8
-  DB  68,15,89,5,183,34,0,0               ; mulps         0x22b7(%rip),%xmm8        # 3ec0 <_sk_callback_sse41+0x512>
-  DB  68,15,84,21,191,34,0,0              ; andps         0x22bf(%rip),%xmm10        # 3ed0 <_sk_callback_sse41+0x522>
-  DB  68,15,86,21,199,34,0,0              ; orps          0x22c7(%rip),%xmm10        # 3ee0 <_sk_callback_sse41+0x532>
-  DB  68,15,88,5,207,34,0,0               ; addps         0x22cf(%rip),%xmm8        # 3ef0 <_sk_callback_sse41+0x542>
-  DB  68,15,40,37,215,34,0,0              ; movaps        0x22d7(%rip),%xmm12        # 3f00 <_sk_callback_sse41+0x552>
+  DB  68,15,89,5,0,35,0,0                 ; mulps         0x2300(%rip),%xmm8        # 3ff0 <_sk_callback_sse41+0x55b>
+  DB  68,15,84,21,8,35,0,0                ; andps         0x2308(%rip),%xmm10        # 4000 <_sk_callback_sse41+0x56b>
+  DB  68,15,86,21,16,35,0,0               ; orps          0x2310(%rip),%xmm10        # 4010 <_sk_callback_sse41+0x57b>
+  DB  68,15,88,5,24,35,0,0                ; addps         0x2318(%rip),%xmm8        # 4020 <_sk_callback_sse41+0x58b>
+  DB  68,15,40,37,32,35,0,0               ; movaps        0x2320(%rip),%xmm12        # 4030 <_sk_callback_sse41+0x59b>
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  69,15,92,196                        ; subps         %xmm12,%xmm8
-  DB  68,15,88,21,215,34,0,0              ; addps         0x22d7(%rip),%xmm10        # 3f10 <_sk_callback_sse41+0x562>
-  DB  68,15,40,37,223,34,0,0              ; movaps        0x22df(%rip),%xmm12        # 3f20 <_sk_callback_sse41+0x572>
+  DB  68,15,88,21,32,35,0,0               ; addps         0x2320(%rip),%xmm10        # 4040 <_sk_callback_sse41+0x5ab>
+  DB  68,15,40,37,40,35,0,0               ; movaps        0x2328(%rip),%xmm12        # 4050 <_sk_callback_sse41+0x5bb>
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,92,196                        ; subps         %xmm12,%xmm8
   DB  69,15,89,195                        ; mulps         %xmm11,%xmm8
   DB  102,69,15,58,8,208,1                ; roundps       $0x1,%xmm8,%xmm10
   DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
-  DB  68,15,88,5,204,34,0,0               ; addps         0x22cc(%rip),%xmm8        # 3f30 <_sk_callback_sse41+0x582>
-  DB  68,15,40,21,212,34,0,0              ; movaps        0x22d4(%rip),%xmm10        # 3f40 <_sk_callback_sse41+0x592>
+  DB  68,15,88,5,21,35,0,0                ; addps         0x2315(%rip),%xmm8        # 4060 <_sk_callback_sse41+0x5cb>
+  DB  68,15,40,21,29,35,0,0               ; movaps        0x231d(%rip),%xmm10        # 4070 <_sk_callback_sse41+0x5db>
   DB  69,15,89,211                        ; mulps         %xmm11,%xmm10
   DB  69,15,92,194                        ; subps         %xmm10,%xmm8
-  DB  68,15,40,21,212,34,0,0              ; movaps        0x22d4(%rip),%xmm10        # 3f50 <_sk_callback_sse41+0x5a2>
+  DB  68,15,40,21,29,35,0,0               ; movaps        0x231d(%rip),%xmm10        # 4080 <_sk_callback_sse41+0x5eb>
   DB  69,15,92,211                        ; subps         %xmm11,%xmm10
-  DB  68,15,40,29,216,34,0,0              ; movaps        0x22d8(%rip),%xmm11        # 3f60 <_sk_callback_sse41+0x5b2>
+  DB  68,15,40,29,33,35,0,0               ; movaps        0x2321(%rip),%xmm11        # 4090 <_sk_callback_sse41+0x5fb>
   DB  69,15,94,218                        ; divps         %xmm10,%xmm11
   DB  69,15,88,216                        ; addps         %xmm8,%xmm11
-  DB  68,15,89,29,216,34,0,0              ; mulps         0x22d8(%rip),%xmm11        # 3f70 <_sk_callback_sse41+0x5c2>
+  DB  68,15,89,29,33,35,0,0               ; mulps         0x2321(%rip),%xmm11        # 40a0 <_sk_callback_sse41+0x60b>
   DB  102,69,15,91,211                    ; cvtps2dq      %xmm11,%xmm10
   DB  243,68,15,16,64,20                  ; movss         0x14(%rax),%xmm8
   DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
@@ -11737,7 +11911,7 @@ _sk_parametric_r_sse41 LABEL PROC
   DB  102,69,15,56,20,193                 ; blendvps      %xmm0,%xmm9,%xmm8
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  68,15,95,192                        ; maxps         %xmm0,%xmm8
-  DB  68,15,93,5,191,34,0,0               ; minps         0x22bf(%rip),%xmm8        # 3f80 <_sk_callback_sse41+0x5d2>
+  DB  68,15,93,5,8,35,0,0                 ; minps         0x2308(%rip),%xmm8        # 40b0 <_sk_callback_sse41+0x61b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  255,224                             ; jmpq          *%rax
@@ -11765,31 +11939,31 @@ _sk_parametric_g_sse41 LABEL PROC
   DB  68,15,88,217                        ; addps         %xmm1,%xmm11
   DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
   DB  69,15,91,227                        ; cvtdq2ps      %xmm11,%xmm12
-  DB  68,15,89,37,96,34,0,0               ; mulps         0x2260(%rip),%xmm12        # 3f90 <_sk_callback_sse41+0x5e2>
-  DB  68,15,84,29,104,34,0,0              ; andps         0x2268(%rip),%xmm11        # 3fa0 <_sk_callback_sse41+0x5f2>
-  DB  68,15,86,29,112,34,0,0              ; orps          0x2270(%rip),%xmm11        # 3fb0 <_sk_callback_sse41+0x602>
-  DB  68,15,88,37,120,34,0,0              ; addps         0x2278(%rip),%xmm12        # 3fc0 <_sk_callback_sse41+0x612>
-  DB  15,40,13,129,34,0,0                 ; movaps        0x2281(%rip),%xmm1        # 3fd0 <_sk_callback_sse41+0x622>
+  DB  68,15,89,37,169,34,0,0              ; mulps         0x22a9(%rip),%xmm12        # 40c0 <_sk_callback_sse41+0x62b>
+  DB  68,15,84,29,177,34,0,0              ; andps         0x22b1(%rip),%xmm11        # 40d0 <_sk_callback_sse41+0x63b>
+  DB  68,15,86,29,185,34,0,0              ; orps          0x22b9(%rip),%xmm11        # 40e0 <_sk_callback_sse41+0x64b>
+  DB  68,15,88,37,193,34,0,0              ; addps         0x22c1(%rip),%xmm12        # 40f0 <_sk_callback_sse41+0x65b>
+  DB  15,40,13,202,34,0,0                 ; movaps        0x22ca(%rip),%xmm1        # 4100 <_sk_callback_sse41+0x66b>
   DB  65,15,89,203                        ; mulps         %xmm11,%xmm1
   DB  68,15,92,225                        ; subps         %xmm1,%xmm12
-  DB  68,15,88,29,129,34,0,0              ; addps         0x2281(%rip),%xmm11        # 3fe0 <_sk_callback_sse41+0x632>
-  DB  15,40,13,138,34,0,0                 ; movaps        0x228a(%rip),%xmm1        # 3ff0 <_sk_callback_sse41+0x642>
+  DB  68,15,88,29,202,34,0,0              ; addps         0x22ca(%rip),%xmm11        # 4110 <_sk_callback_sse41+0x67b>
+  DB  15,40,13,211,34,0,0                 ; movaps        0x22d3(%rip),%xmm1        # 4120 <_sk_callback_sse41+0x68b>
   DB  65,15,94,203                        ; divps         %xmm11,%xmm1
   DB  68,15,92,225                        ; subps         %xmm1,%xmm12
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  102,69,15,58,8,212,1                ; roundps       $0x1,%xmm12,%xmm10
   DB  69,15,40,220                        ; movaps        %xmm12,%xmm11
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
-  DB  68,15,88,37,119,34,0,0              ; addps         0x2277(%rip),%xmm12        # 4000 <_sk_callback_sse41+0x652>
-  DB  15,40,13,128,34,0,0                 ; movaps        0x2280(%rip),%xmm1        # 4010 <_sk_callback_sse41+0x662>
+  DB  68,15,88,37,192,34,0,0              ; addps         0x22c0(%rip),%xmm12        # 4130 <_sk_callback_sse41+0x69b>
+  DB  15,40,13,201,34,0,0                 ; movaps        0x22c9(%rip),%xmm1        # 4140 <_sk_callback_sse41+0x6ab>
   DB  65,15,89,203                        ; mulps         %xmm11,%xmm1
   DB  68,15,92,225                        ; subps         %xmm1,%xmm12
-  DB  68,15,40,21,128,34,0,0              ; movaps        0x2280(%rip),%xmm10        # 4020 <_sk_callback_sse41+0x672>
+  DB  68,15,40,21,201,34,0,0              ; movaps        0x22c9(%rip),%xmm10        # 4150 <_sk_callback_sse41+0x6bb>
   DB  69,15,92,211                        ; subps         %xmm11,%xmm10
-  DB  15,40,13,133,34,0,0                 ; movaps        0x2285(%rip),%xmm1        # 4030 <_sk_callback_sse41+0x682>
+  DB  15,40,13,206,34,0,0                 ; movaps        0x22ce(%rip),%xmm1        # 4160 <_sk_callback_sse41+0x6cb>
   DB  65,15,94,202                        ; divps         %xmm10,%xmm1
   DB  65,15,88,204                        ; addps         %xmm12,%xmm1
-  DB  15,89,13,134,34,0,0                 ; mulps         0x2286(%rip),%xmm1        # 4040 <_sk_callback_sse41+0x692>
+  DB  15,89,13,207,34,0,0                 ; mulps         0x22cf(%rip),%xmm1        # 4170 <_sk_callback_sse41+0x6db>
   DB  102,68,15,91,209                    ; cvtps2dq      %xmm1,%xmm10
   DB  243,15,16,72,20                     ; movss         0x14(%rax),%xmm1
   DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
@@ -11797,7 +11971,7 @@ _sk_parametric_g_sse41 LABEL PROC
   DB  102,65,15,56,20,201                 ; blendvps      %xmm0,%xmm9,%xmm1
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  15,95,200                           ; maxps         %xmm0,%xmm1
-  DB  15,93,13,113,34,0,0                 ; minps         0x2271(%rip),%xmm1        # 4050 <_sk_callback_sse41+0x6a2>
+  DB  15,93,13,186,34,0,0                 ; minps         0x22ba(%rip),%xmm1        # 4180 <_sk_callback_sse41+0x6eb>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  255,224                             ; jmpq          *%rax
@@ -11825,31 +11999,31 @@ _sk_parametric_b_sse41 LABEL PROC
   DB  68,15,88,218                        ; addps         %xmm2,%xmm11
   DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
   DB  69,15,91,227                        ; cvtdq2ps      %xmm11,%xmm12
-  DB  68,15,89,37,18,34,0,0               ; mulps         0x2212(%rip),%xmm12        # 4060 <_sk_callback_sse41+0x6b2>
-  DB  68,15,84,29,26,34,0,0               ; andps         0x221a(%rip),%xmm11        # 4070 <_sk_callback_sse41+0x6c2>
-  DB  68,15,86,29,34,34,0,0               ; orps          0x2222(%rip),%xmm11        # 4080 <_sk_callback_sse41+0x6d2>
-  DB  68,15,88,37,42,34,0,0               ; addps         0x222a(%rip),%xmm12        # 4090 <_sk_callback_sse41+0x6e2>
-  DB  15,40,21,51,34,0,0                  ; movaps        0x2233(%rip),%xmm2        # 40a0 <_sk_callback_sse41+0x6f2>
+  DB  68,15,89,37,91,34,0,0               ; mulps         0x225b(%rip),%xmm12        # 4190 <_sk_callback_sse41+0x6fb>
+  DB  68,15,84,29,99,34,0,0               ; andps         0x2263(%rip),%xmm11        # 41a0 <_sk_callback_sse41+0x70b>
+  DB  68,15,86,29,107,34,0,0              ; orps          0x226b(%rip),%xmm11        # 41b0 <_sk_callback_sse41+0x71b>
+  DB  68,15,88,37,115,34,0,0              ; addps         0x2273(%rip),%xmm12        # 41c0 <_sk_callback_sse41+0x72b>
+  DB  15,40,21,124,34,0,0                 ; movaps        0x227c(%rip),%xmm2        # 41d0 <_sk_callback_sse41+0x73b>
   DB  65,15,89,211                        ; mulps         %xmm11,%xmm2
   DB  68,15,92,226                        ; subps         %xmm2,%xmm12
-  DB  68,15,88,29,51,34,0,0               ; addps         0x2233(%rip),%xmm11        # 40b0 <_sk_callback_sse41+0x702>
-  DB  15,40,21,60,34,0,0                  ; movaps        0x223c(%rip),%xmm2        # 40c0 <_sk_callback_sse41+0x712>
+  DB  68,15,88,29,124,34,0,0              ; addps         0x227c(%rip),%xmm11        # 41e0 <_sk_callback_sse41+0x74b>
+  DB  15,40,21,133,34,0,0                 ; movaps        0x2285(%rip),%xmm2        # 41f0 <_sk_callback_sse41+0x75b>
   DB  65,15,94,211                        ; divps         %xmm11,%xmm2
   DB  68,15,92,226                        ; subps         %xmm2,%xmm12
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  102,69,15,58,8,212,1                ; roundps       $0x1,%xmm12,%xmm10
   DB  69,15,40,220                        ; movaps        %xmm12,%xmm11
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
-  DB  68,15,88,37,41,34,0,0               ; addps         0x2229(%rip),%xmm12        # 40d0 <_sk_callback_sse41+0x722>
-  DB  15,40,21,50,34,0,0                  ; movaps        0x2232(%rip),%xmm2        # 40e0 <_sk_callback_sse41+0x732>
+  DB  68,15,88,37,114,34,0,0              ; addps         0x2272(%rip),%xmm12        # 4200 <_sk_callback_sse41+0x76b>
+  DB  15,40,21,123,34,0,0                 ; movaps        0x227b(%rip),%xmm2        # 4210 <_sk_callback_sse41+0x77b>
   DB  65,15,89,211                        ; mulps         %xmm11,%xmm2
   DB  68,15,92,226                        ; subps         %xmm2,%xmm12
-  DB  68,15,40,21,50,34,0,0               ; movaps        0x2232(%rip),%xmm10        # 40f0 <_sk_callback_sse41+0x742>
+  DB  68,15,40,21,123,34,0,0              ; movaps        0x227b(%rip),%xmm10        # 4220 <_sk_callback_sse41+0x78b>
   DB  69,15,92,211                        ; subps         %xmm11,%xmm10
-  DB  15,40,21,55,34,0,0                  ; movaps        0x2237(%rip),%xmm2        # 4100 <_sk_callback_sse41+0x752>
+  DB  15,40,21,128,34,0,0                 ; movaps        0x2280(%rip),%xmm2        # 4230 <_sk_callback_sse41+0x79b>
   DB  65,15,94,210                        ; divps         %xmm10,%xmm2
   DB  65,15,88,212                        ; addps         %xmm12,%xmm2
-  DB  15,89,21,56,34,0,0                  ; mulps         0x2238(%rip),%xmm2        # 4110 <_sk_callback_sse41+0x762>
+  DB  15,89,21,129,34,0,0                 ; mulps         0x2281(%rip),%xmm2        # 4240 <_sk_callback_sse41+0x7ab>
   DB  102,68,15,91,210                    ; cvtps2dq      %xmm2,%xmm10
   DB  243,15,16,80,20                     ; movss         0x14(%rax),%xmm2
   DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
@@ -11857,7 +12031,7 @@ _sk_parametric_b_sse41 LABEL PROC
   DB  102,65,15,56,20,209                 ; blendvps      %xmm0,%xmm9,%xmm2
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  15,95,208                           ; maxps         %xmm0,%xmm2
-  DB  15,93,21,35,34,0,0                  ; minps         0x2223(%rip),%xmm2        # 4120 <_sk_callback_sse41+0x772>
+  DB  15,93,21,108,34,0,0                 ; minps         0x226c(%rip),%xmm2        # 4250 <_sk_callback_sse41+0x7bb>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  255,224                             ; jmpq          *%rax
@@ -11885,31 +12059,31 @@ _sk_parametric_a_sse41 LABEL PROC
   DB  68,15,88,219                        ; addps         %xmm3,%xmm11
   DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
   DB  69,15,91,227                        ; cvtdq2ps      %xmm11,%xmm12
-  DB  68,15,89,37,196,33,0,0              ; mulps         0x21c4(%rip),%xmm12        # 4130 <_sk_callback_sse41+0x782>
-  DB  68,15,84,29,204,33,0,0              ; andps         0x21cc(%rip),%xmm11        # 4140 <_sk_callback_sse41+0x792>
-  DB  68,15,86,29,212,33,0,0              ; orps          0x21d4(%rip),%xmm11        # 4150 <_sk_callback_sse41+0x7a2>
-  DB  68,15,88,37,220,33,0,0              ; addps         0x21dc(%rip),%xmm12        # 4160 <_sk_callback_sse41+0x7b2>
-  DB  15,40,29,229,33,0,0                 ; movaps        0x21e5(%rip),%xmm3        # 4170 <_sk_callback_sse41+0x7c2>
+  DB  68,15,89,37,13,34,0,0               ; mulps         0x220d(%rip),%xmm12        # 4260 <_sk_callback_sse41+0x7cb>
+  DB  68,15,84,29,21,34,0,0               ; andps         0x2215(%rip),%xmm11        # 4270 <_sk_callback_sse41+0x7db>
+  DB  68,15,86,29,29,34,0,0               ; orps          0x221d(%rip),%xmm11        # 4280 <_sk_callback_sse41+0x7eb>
+  DB  68,15,88,37,37,34,0,0               ; addps         0x2225(%rip),%xmm12        # 4290 <_sk_callback_sse41+0x7fb>
+  DB  15,40,29,46,34,0,0                  ; movaps        0x222e(%rip),%xmm3        # 42a0 <_sk_callback_sse41+0x80b>
   DB  65,15,89,219                        ; mulps         %xmm11,%xmm3
   DB  68,15,92,227                        ; subps         %xmm3,%xmm12
-  DB  68,15,88,29,229,33,0,0              ; addps         0x21e5(%rip),%xmm11        # 4180 <_sk_callback_sse41+0x7d2>
-  DB  15,40,29,238,33,0,0                 ; movaps        0x21ee(%rip),%xmm3        # 4190 <_sk_callback_sse41+0x7e2>
+  DB  68,15,88,29,46,34,0,0               ; addps         0x222e(%rip),%xmm11        # 42b0 <_sk_callback_sse41+0x81b>
+  DB  15,40,29,55,34,0,0                  ; movaps        0x2237(%rip),%xmm3        # 42c0 <_sk_callback_sse41+0x82b>
   DB  65,15,94,219                        ; divps         %xmm11,%xmm3
   DB  68,15,92,227                        ; subps         %xmm3,%xmm12
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  102,69,15,58,8,212,1                ; roundps       $0x1,%xmm12,%xmm10
   DB  69,15,40,220                        ; movaps        %xmm12,%xmm11
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
-  DB  68,15,88,37,219,33,0,0              ; addps         0x21db(%rip),%xmm12        # 41a0 <_sk_callback_sse41+0x7f2>
-  DB  15,40,29,228,33,0,0                 ; movaps        0x21e4(%rip),%xmm3        # 41b0 <_sk_callback_sse41+0x802>
+  DB  68,15,88,37,36,34,0,0               ; addps         0x2224(%rip),%xmm12        # 42d0 <_sk_callback_sse41+0x83b>
+  DB  15,40,29,45,34,0,0                  ; movaps        0x222d(%rip),%xmm3        # 42e0 <_sk_callback_sse41+0x84b>
   DB  65,15,89,219                        ; mulps         %xmm11,%xmm3
   DB  68,15,92,227                        ; subps         %xmm3,%xmm12
-  DB  68,15,40,21,228,33,0,0              ; movaps        0x21e4(%rip),%xmm10        # 41c0 <_sk_callback_sse41+0x812>
+  DB  68,15,40,21,45,34,0,0               ; movaps        0x222d(%rip),%xmm10        # 42f0 <_sk_callback_sse41+0x85b>
   DB  69,15,92,211                        ; subps         %xmm11,%xmm10
-  DB  15,40,29,233,33,0,0                 ; movaps        0x21e9(%rip),%xmm3        # 41d0 <_sk_callback_sse41+0x822>
+  DB  15,40,29,50,34,0,0                  ; movaps        0x2232(%rip),%xmm3        # 4300 <_sk_callback_sse41+0x86b>
   DB  65,15,94,218                        ; divps         %xmm10,%xmm3
   DB  65,15,88,220                        ; addps         %xmm12,%xmm3
-  DB  15,89,29,234,33,0,0                 ; mulps         0x21ea(%rip),%xmm3        # 41e0 <_sk_callback_sse41+0x832>
+  DB  15,89,29,51,34,0,0                  ; mulps         0x2233(%rip),%xmm3        # 4310 <_sk_callback_sse41+0x87b>
   DB  102,68,15,91,211                    ; cvtps2dq      %xmm3,%xmm10
   DB  243,15,16,88,20                     ; movss         0x14(%rax),%xmm3
   DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
@@ -11917,7 +12091,7 @@ _sk_parametric_a_sse41 LABEL PROC
   DB  102,65,15,56,20,217                 ; blendvps      %xmm0,%xmm9,%xmm3
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  15,95,216                           ; maxps         %xmm0,%xmm3
-  DB  15,93,29,213,33,0,0                 ; minps         0x21d5(%rip),%xmm3        # 41f0 <_sk_callback_sse41+0x842>
+  DB  15,93,29,30,34,0,0                  ; minps         0x221e(%rip),%xmm3        # 4320 <_sk_callback_sse41+0x88b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  255,224                             ; jmpq          *%rax
@@ -11925,29 +12099,29 @@ _sk_parametric_a_sse41 LABEL PROC
 PUBLIC _sk_lab_to_xyz_sse41
 _sk_lab_to_xyz_sse41 LABEL PROC
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
-  DB  68,15,89,5,209,33,0,0               ; mulps         0x21d1(%rip),%xmm8        # 4200 <_sk_callback_sse41+0x852>
-  DB  68,15,40,13,217,33,0,0              ; movaps        0x21d9(%rip),%xmm9        # 4210 <_sk_callback_sse41+0x862>
+  DB  68,15,89,5,26,34,0,0                ; mulps         0x221a(%rip),%xmm8        # 4330 <_sk_callback_sse41+0x89b>
+  DB  68,15,40,13,34,34,0,0               ; movaps        0x2222(%rip),%xmm9        # 4340 <_sk_callback_sse41+0x8ab>
   DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
-  DB  15,40,5,222,33,0,0                  ; movaps        0x21de(%rip),%xmm0        # 4220 <_sk_callback_sse41+0x872>
+  DB  15,40,5,39,34,0,0                   ; movaps        0x2227(%rip),%xmm0        # 4350 <_sk_callback_sse41+0x8bb>
   DB  15,88,200                           ; addps         %xmm0,%xmm1
   DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
   DB  15,88,208                           ; addps         %xmm0,%xmm2
-  DB  68,15,88,5,220,33,0,0               ; addps         0x21dc(%rip),%xmm8        # 4230 <_sk_callback_sse41+0x882>
-  DB  68,15,89,5,228,33,0,0               ; mulps         0x21e4(%rip),%xmm8        # 4240 <_sk_callback_sse41+0x892>
-  DB  15,89,13,237,33,0,0                 ; mulps         0x21ed(%rip),%xmm1        # 4250 <_sk_callback_sse41+0x8a2>
+  DB  68,15,88,5,37,34,0,0                ; addps         0x2225(%rip),%xmm8        # 4360 <_sk_callback_sse41+0x8cb>
+  DB  68,15,89,5,45,34,0,0                ; mulps         0x222d(%rip),%xmm8        # 4370 <_sk_callback_sse41+0x8db>
+  DB  15,89,13,54,34,0,0                  ; mulps         0x2236(%rip),%xmm1        # 4380 <_sk_callback_sse41+0x8eb>
   DB  65,15,88,200                        ; addps         %xmm8,%xmm1
-  DB  15,89,21,242,33,0,0                 ; mulps         0x21f2(%rip),%xmm2        # 4260 <_sk_callback_sse41+0x8b2>
+  DB  15,89,21,59,34,0,0                  ; mulps         0x223b(%rip),%xmm2        # 4390 <_sk_callback_sse41+0x8fb>
   DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
   DB  68,15,92,210                        ; subps         %xmm2,%xmm10
   DB  68,15,40,217                        ; movaps        %xmm1,%xmm11
   DB  69,15,89,219                        ; mulps         %xmm11,%xmm11
   DB  68,15,89,217                        ; mulps         %xmm1,%xmm11
-  DB  68,15,40,13,230,33,0,0              ; movaps        0x21e6(%rip),%xmm9        # 4270 <_sk_callback_sse41+0x8c2>
+  DB  68,15,40,13,47,34,0,0               ; movaps        0x222f(%rip),%xmm9        # 43a0 <_sk_callback_sse41+0x90b>
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  65,15,194,195,1                     ; cmpltps       %xmm11,%xmm0
-  DB  15,40,21,230,33,0,0                 ; movaps        0x21e6(%rip),%xmm2        # 4280 <_sk_callback_sse41+0x8d2>
+  DB  15,40,21,47,34,0,0                  ; movaps        0x222f(%rip),%xmm2        # 43b0 <_sk_callback_sse41+0x91b>
   DB  15,88,202                           ; addps         %xmm2,%xmm1
-  DB  68,15,40,37,235,33,0,0              ; movaps        0x21eb(%rip),%xmm12        # 4290 <_sk_callback_sse41+0x8e2>
+  DB  68,15,40,37,52,34,0,0               ; movaps        0x2234(%rip),%xmm12        # 43c0 <_sk_callback_sse41+0x92b>
   DB  65,15,89,204                        ; mulps         %xmm12,%xmm1
   DB  102,65,15,56,20,203                 ; blendvps      %xmm0,%xmm11,%xmm1
   DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
@@ -11966,8 +12140,8 @@ _sk_lab_to_xyz_sse41 LABEL PROC
   DB  65,15,89,212                        ; mulps         %xmm12,%xmm2
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  102,65,15,56,20,211                 ; blendvps      %xmm0,%xmm11,%xmm2
-  DB  15,89,13,164,33,0,0                 ; mulps         0x21a4(%rip),%xmm1        # 42a0 <_sk_callback_sse41+0x8f2>
-  DB  15,89,21,173,33,0,0                 ; mulps         0x21ad(%rip),%xmm2        # 42b0 <_sk_callback_sse41+0x902>
+  DB  15,89,13,237,33,0,0                 ; mulps         0x21ed(%rip),%xmm1        # 43d0 <_sk_callback_sse41+0x93b>
+  DB  15,89,21,246,33,0,0                 ; mulps         0x21f6(%rip),%xmm2        # 43e0 <_sk_callback_sse41+0x94b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,40,193                           ; movaps        %xmm1,%xmm0
   DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
@@ -11979,7 +12153,7 @@ _sk_load_a8_sse41 LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,15,56,49,4,56                   ; pmovzxbd      (%rax,%rdi,1),%xmm0
   DB  15,91,216                           ; cvtdq2ps      %xmm0,%xmm3
-  DB  15,89,29,157,33,0,0                 ; mulps         0x219d(%rip),%xmm3        # 42c0 <_sk_callback_sse41+0x912>
+  DB  15,89,29,230,33,0,0                 ; mulps         0x21e6(%rip),%xmm3        # 43f0 <_sk_callback_sse41+0x95b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  15,87,201                           ; xorps         %xmm1,%xmm1
@@ -12010,7 +12184,7 @@ _sk_gather_a8_sse41 LABEL PROC
   DB  102,15,58,32,192,3                  ; pinsrb        $0x3,%eax,%xmm0
   DB  102,15,56,49,192                    ; pmovzxbd      %xmm0,%xmm0
   DB  15,91,216                           ; cvtdq2ps      %xmm0,%xmm3
-  DB  15,89,29,49,33,0,0                  ; mulps         0x2131(%rip),%xmm3        # 42d0 <_sk_callback_sse41+0x922>
+  DB  15,89,29,122,33,0,0                 ; mulps         0x217a(%rip),%xmm3        # 4400 <_sk_callback_sse41+0x96b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
@@ -12021,7 +12195,7 @@ PUBLIC _sk_store_a8_sse41
 _sk_store_a8_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,37,33,0,0                ; movaps        0x2125(%rip),%xmm8        # 42e0 <_sk_callback_sse41+0x932>
+  DB  68,15,40,5,110,33,0,0               ; movaps        0x216e(%rip),%xmm8        # 4410 <_sk_callback_sse41+0x97b>
   DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
   DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
   DB  102,69,15,56,43,192                 ; packusdw      %xmm8,%xmm8
@@ -12036,9 +12210,9 @@ _sk_load_g8_sse41 LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,15,56,49,4,56                   ; pmovzxbd      (%rax,%rdi,1),%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,2,33,0,0                    ; mulps         0x2102(%rip),%xmm0        # 42f0 <_sk_callback_sse41+0x942>
+  DB  15,89,5,75,33,0,0                   ; mulps         0x214b(%rip),%xmm0        # 4420 <_sk_callback_sse41+0x98b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,9,33,0,0                   ; movaps        0x2109(%rip),%xmm3        # 4300 <_sk_callback_sse41+0x952>
+  DB  15,40,29,82,33,0,0                  ; movaps        0x2152(%rip),%xmm3        # 4430 <_sk_callback_sse41+0x99b>
   DB  15,40,200                           ; movaps        %xmm0,%xmm1
   DB  15,40,208                           ; movaps        %xmm0,%xmm2
   DB  255,224                             ; jmpq          *%rax
@@ -12067,9 +12241,9 @@ _sk_gather_g8_sse41 LABEL PROC
   DB  102,15,58,32,192,3                  ; pinsrb        $0x3,%eax,%xmm0
   DB  102,15,56,49,192                    ; pmovzxbd      %xmm0,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,162,32,0,0                  ; mulps         0x20a2(%rip),%xmm0        # 4310 <_sk_callback_sse41+0x962>
+  DB  15,89,5,235,32,0,0                  ; mulps         0x20eb(%rip),%xmm0        # 4440 <_sk_callback_sse41+0x9ab>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,169,32,0,0                 ; movaps        0x20a9(%rip),%xmm3        # 4320 <_sk_callback_sse41+0x972>
+  DB  15,40,29,242,32,0,0                 ; movaps        0x20f2(%rip),%xmm3        # 4450 <_sk_callback_sse41+0x9bb>
   DB  15,40,200                           ; movaps        %xmm0,%xmm1
   DB  15,40,208                           ; movaps        %xmm0,%xmm2
   DB  255,224                             ; jmpq          *%rax
@@ -12079,9 +12253,9 @@ _sk_gather_i8_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  73,137,192                          ; mov           %rax,%r8
   DB  77,133,192                          ; test          %r8,%r8
-  DB  116,5                               ; je            228e <_sk_gather_i8_sse41+0xf>
+  DB  116,5                               ; je            2375 <_sk_gather_i8_sse41+0xf>
   DB  76,137,192                          ; mov           %r8,%rax
-  DB  235,2                               ; jmp           2290 <_sk_gather_i8_sse41+0x11>
+  DB  235,2                               ; jmp           2377 <_sk_gather_i8_sse41+0x11>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
@@ -12112,17 +12286,17 @@ _sk_gather_i8_sse41 LABEL PROC
   DB  102,15,58,34,28,8,1                 ; pinsrd        $0x1,(%rax,%rcx,1),%xmm3
   DB  102,66,15,58,34,28,144,2            ; pinsrd        $0x2,(%rax,%r10,4),%xmm3
   DB  102,66,15,58,34,28,8,3              ; pinsrd        $0x3,(%rax,%r9,1),%xmm3
-  DB  102,15,111,5,0,32,0,0               ; movdqa        0x2000(%rip),%xmm0        # 4330 <_sk_callback_sse41+0x982>
+  DB  102,15,111,5,73,32,0,0              ; movdqa        0x2049(%rip),%xmm0        # 4460 <_sk_callback_sse41+0x9cb>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,1,32,0,0                 ; movaps        0x2001(%rip),%xmm8        # 4340 <_sk_callback_sse41+0x992>
+  DB  68,15,40,5,74,32,0,0                ; movaps        0x204a(%rip),%xmm8        # 4470 <_sk_callback_sse41+0x9db>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
-  DB  102,15,56,0,13,0,32,0,0             ; pshufb        0x2000(%rip),%xmm1        # 4350 <_sk_callback_sse41+0x9a2>
+  DB  102,15,56,0,13,73,32,0,0            ; pshufb        0x2049(%rip),%xmm1        # 4480 <_sk_callback_sse41+0x9eb>
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
-  DB  102,15,56,0,21,252,31,0,0           ; pshufb        0x1ffc(%rip),%xmm2        # 4360 <_sk_callback_sse41+0x9b2>
+  DB  102,15,56,0,21,69,32,0,0            ; pshufb        0x2045(%rip),%xmm2        # 4490 <_sk_callback_sse41+0x9fb>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
@@ -12136,19 +12310,19 @@ _sk_load_565_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,15,56,51,20,120                 ; pmovzxwd      (%rax,%rdi,2),%xmm2
-  DB  102,15,111,5,226,31,0,0             ; movdqa        0x1fe2(%rip),%xmm0        # 4370 <_sk_callback_sse41+0x9c2>
+  DB  102,15,111,5,43,32,0,0              ; movdqa        0x202b(%rip),%xmm0        # 44a0 <_sk_callback_sse41+0xa0b>
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,228,31,0,0                  ; mulps         0x1fe4(%rip),%xmm0        # 4380 <_sk_callback_sse41+0x9d2>
-  DB  102,15,111,13,236,31,0,0            ; movdqa        0x1fec(%rip),%xmm1        # 4390 <_sk_callback_sse41+0x9e2>
+  DB  15,89,5,45,32,0,0                   ; mulps         0x202d(%rip),%xmm0        # 44b0 <_sk_callback_sse41+0xa1b>
+  DB  102,15,111,13,53,32,0,0             ; movdqa        0x2035(%rip),%xmm1        # 44c0 <_sk_callback_sse41+0xa2b>
   DB  102,15,219,202                      ; pand          %xmm2,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,238,31,0,0                 ; mulps         0x1fee(%rip),%xmm1        # 43a0 <_sk_callback_sse41+0x9f2>
-  DB  102,15,219,21,246,31,0,0            ; pand          0x1ff6(%rip),%xmm2        # 43b0 <_sk_callback_sse41+0xa02>
+  DB  15,89,13,55,32,0,0                  ; mulps         0x2037(%rip),%xmm1        # 44d0 <_sk_callback_sse41+0xa3b>
+  DB  102,15,219,21,63,32,0,0             ; pand          0x203f(%rip),%xmm2        # 44e0 <_sk_callback_sse41+0xa4b>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,252,31,0,0                 ; mulps         0x1ffc(%rip),%xmm2        # 43c0 <_sk_callback_sse41+0xa12>
+  DB  15,89,21,69,32,0,0                  ; mulps         0x2045(%rip),%xmm2        # 44f0 <_sk_callback_sse41+0xa5b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,3,32,0,0                   ; movaps        0x2003(%rip),%xmm3        # 43d0 <_sk_callback_sse41+0xa22>
+  DB  15,40,29,76,32,0,0                  ; movaps        0x204c(%rip),%xmm3        # 4500 <_sk_callback_sse41+0xa6b>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_gather_565_sse41
@@ -12174,31 +12348,31 @@ _sk_gather_565_sse41 LABEL PROC
   DB  65,15,183,4,65                      ; movzwl        (%r9,%rax,2),%eax
   DB  102,15,196,192,3                    ; pinsrw        $0x3,%eax,%xmm0
   DB  102,15,56,51,208                    ; pmovzxwd      %xmm0,%xmm2
-  DB  102,15,111,5,168,31,0,0             ; movdqa        0x1fa8(%rip),%xmm0        # 43e0 <_sk_callback_sse41+0xa32>
+  DB  102,15,111,5,241,31,0,0             ; movdqa        0x1ff1(%rip),%xmm0        # 4510 <_sk_callback_sse41+0xa7b>
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,170,31,0,0                  ; mulps         0x1faa(%rip),%xmm0        # 43f0 <_sk_callback_sse41+0xa42>
-  DB  102,15,111,13,178,31,0,0            ; movdqa        0x1fb2(%rip),%xmm1        # 4400 <_sk_callback_sse41+0xa52>
+  DB  15,89,5,243,31,0,0                  ; mulps         0x1ff3(%rip),%xmm0        # 4520 <_sk_callback_sse41+0xa8b>
+  DB  102,15,111,13,251,31,0,0            ; movdqa        0x1ffb(%rip),%xmm1        # 4530 <_sk_callback_sse41+0xa9b>
   DB  102,15,219,202                      ; pand          %xmm2,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,180,31,0,0                 ; mulps         0x1fb4(%rip),%xmm1        # 4410 <_sk_callback_sse41+0xa62>
-  DB  102,15,219,21,188,31,0,0            ; pand          0x1fbc(%rip),%xmm2        # 4420 <_sk_callback_sse41+0xa72>
+  DB  15,89,13,253,31,0,0                 ; mulps         0x1ffd(%rip),%xmm1        # 4540 <_sk_callback_sse41+0xaab>
+  DB  102,15,219,21,5,32,0,0              ; pand          0x2005(%rip),%xmm2        # 4550 <_sk_callback_sse41+0xabb>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,194,31,0,0                 ; mulps         0x1fc2(%rip),%xmm2        # 4430 <_sk_callback_sse41+0xa82>
+  DB  15,89,21,11,32,0,0                  ; mulps         0x200b(%rip),%xmm2        # 4560 <_sk_callback_sse41+0xacb>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,201,31,0,0                 ; movaps        0x1fc9(%rip),%xmm3        # 4440 <_sk_callback_sse41+0xa92>
+  DB  15,40,29,18,32,0,0                  ; movaps        0x2012(%rip),%xmm3        # 4570 <_sk_callback_sse41+0xadb>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_store_565_sse41
 _sk_store_565_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,202,31,0,0               ; movaps        0x1fca(%rip),%xmm8        # 4450 <_sk_callback_sse41+0xaa2>
+  DB  68,15,40,5,19,32,0,0                ; movaps        0x2013(%rip),%xmm8        # 4580 <_sk_callback_sse41+0xaeb>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
   DB  102,65,15,114,241,11                ; pslld         $0xb,%xmm9
-  DB  68,15,40,21,191,31,0,0              ; movaps        0x1fbf(%rip),%xmm10        # 4460 <_sk_callback_sse41+0xab2>
+  DB  68,15,40,21,8,32,0,0                ; movaps        0x2008(%rip),%xmm10        # 4590 <_sk_callback_sse41+0xafb>
   DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
   DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
   DB  102,65,15,114,242,5                 ; pslld         $0x5,%xmm10
@@ -12216,21 +12390,21 @@ _sk_load_4444_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,15,56,51,28,120                 ; pmovzxwd      (%rax,%rdi,2),%xmm3
-  DB  102,15,111,5,138,31,0,0             ; movdqa        0x1f8a(%rip),%xmm0        # 4470 <_sk_callback_sse41+0xac2>
+  DB  102,15,111,5,211,31,0,0             ; movdqa        0x1fd3(%rip),%xmm0        # 45a0 <_sk_callback_sse41+0xb0b>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,140,31,0,0                  ; mulps         0x1f8c(%rip),%xmm0        # 4480 <_sk_callback_sse41+0xad2>
-  DB  102,15,111,13,148,31,0,0            ; movdqa        0x1f94(%rip),%xmm1        # 4490 <_sk_callback_sse41+0xae2>
+  DB  15,89,5,213,31,0,0                  ; mulps         0x1fd5(%rip),%xmm0        # 45b0 <_sk_callback_sse41+0xb1b>
+  DB  102,15,111,13,221,31,0,0            ; movdqa        0x1fdd(%rip),%xmm1        # 45c0 <_sk_callback_sse41+0xb2b>
   DB  102,15,219,203                      ; pand          %xmm3,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,150,31,0,0                 ; mulps         0x1f96(%rip),%xmm1        # 44a0 <_sk_callback_sse41+0xaf2>
-  DB  102,15,111,21,158,31,0,0            ; movdqa        0x1f9e(%rip),%xmm2        # 44b0 <_sk_callback_sse41+0xb02>
+  DB  15,89,13,223,31,0,0                 ; mulps         0x1fdf(%rip),%xmm1        # 45d0 <_sk_callback_sse41+0xb3b>
+  DB  102,15,111,21,231,31,0,0            ; movdqa        0x1fe7(%rip),%xmm2        # 45e0 <_sk_callback_sse41+0xb4b>
   DB  102,15,219,211                      ; pand          %xmm3,%xmm2
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,160,31,0,0                 ; mulps         0x1fa0(%rip),%xmm2        # 44c0 <_sk_callback_sse41+0xb12>
-  DB  102,15,219,29,168,31,0,0            ; pand          0x1fa8(%rip),%xmm3        # 44d0 <_sk_callback_sse41+0xb22>
+  DB  15,89,21,233,31,0,0                 ; mulps         0x1fe9(%rip),%xmm2        # 45f0 <_sk_callback_sse41+0xb5b>
+  DB  102,15,219,29,241,31,0,0            ; pand          0x1ff1(%rip),%xmm3        # 4600 <_sk_callback_sse41+0xb6b>
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,174,31,0,0                 ; mulps         0x1fae(%rip),%xmm3        # 44e0 <_sk_callback_sse41+0xb32>
+  DB  15,89,29,247,31,0,0                 ; mulps         0x1ff7(%rip),%xmm3        # 4610 <_sk_callback_sse41+0xb7b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -12257,21 +12431,21 @@ _sk_gather_4444_sse41 LABEL PROC
   DB  65,15,183,4,65                      ; movzwl        (%r9,%rax,2),%eax
   DB  102,15,196,192,3                    ; pinsrw        $0x3,%eax,%xmm0
   DB  102,15,56,51,216                    ; pmovzxwd      %xmm0,%xmm3
-  DB  102,15,111,5,81,31,0,0              ; movdqa        0x1f51(%rip),%xmm0        # 44f0 <_sk_callback_sse41+0xb42>
+  DB  102,15,111,5,154,31,0,0             ; movdqa        0x1f9a(%rip),%xmm0        # 4620 <_sk_callback_sse41+0xb8b>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,83,31,0,0                   ; mulps         0x1f53(%rip),%xmm0        # 4500 <_sk_callback_sse41+0xb52>
-  DB  102,15,111,13,91,31,0,0             ; movdqa        0x1f5b(%rip),%xmm1        # 4510 <_sk_callback_sse41+0xb62>
+  DB  15,89,5,156,31,0,0                  ; mulps         0x1f9c(%rip),%xmm0        # 4630 <_sk_callback_sse41+0xb9b>
+  DB  102,15,111,13,164,31,0,0            ; movdqa        0x1fa4(%rip),%xmm1        # 4640 <_sk_callback_sse41+0xbab>
   DB  102,15,219,203                      ; pand          %xmm3,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,93,31,0,0                  ; mulps         0x1f5d(%rip),%xmm1        # 4520 <_sk_callback_sse41+0xb72>
-  DB  102,15,111,21,101,31,0,0            ; movdqa        0x1f65(%rip),%xmm2        # 4530 <_sk_callback_sse41+0xb82>
+  DB  15,89,13,166,31,0,0                 ; mulps         0x1fa6(%rip),%xmm1        # 4650 <_sk_callback_sse41+0xbbb>
+  DB  102,15,111,21,174,31,0,0            ; movdqa        0x1fae(%rip),%xmm2        # 4660 <_sk_callback_sse41+0xbcb>
   DB  102,15,219,211                      ; pand          %xmm3,%xmm2
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,103,31,0,0                 ; mulps         0x1f67(%rip),%xmm2        # 4540 <_sk_callback_sse41+0xb92>
-  DB  102,15,219,29,111,31,0,0            ; pand          0x1f6f(%rip),%xmm3        # 4550 <_sk_callback_sse41+0xba2>
+  DB  15,89,21,176,31,0,0                 ; mulps         0x1fb0(%rip),%xmm2        # 4670 <_sk_callback_sse41+0xbdb>
+  DB  102,15,219,29,184,31,0,0            ; pand          0x1fb8(%rip),%xmm3        # 4680 <_sk_callback_sse41+0xbeb>
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,117,31,0,0                 ; mulps         0x1f75(%rip),%xmm3        # 4560 <_sk_callback_sse41+0xbb2>
+  DB  15,89,29,190,31,0,0                 ; mulps         0x1fbe(%rip),%xmm3        # 4690 <_sk_callback_sse41+0xbfb>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -12279,7 +12453,7 @@ PUBLIC _sk_store_4444_sse41
 _sk_store_4444_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,116,31,0,0               ; movaps        0x1f74(%rip),%xmm8        # 4570 <_sk_callback_sse41+0xbc2>
+  DB  68,15,40,5,189,31,0,0               ; movaps        0x1fbd(%rip),%xmm8        # 46a0 <_sk_callback_sse41+0xc0b>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
@@ -12307,17 +12481,17 @@ _sk_load_8888_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  15,16,28,184                        ; movups        (%rax,%rdi,4),%xmm3
-  DB  15,40,5,19,31,0,0                   ; movaps        0x1f13(%rip),%xmm0        # 4580 <_sk_callback_sse41+0xbd2>
+  DB  15,40,5,92,31,0,0                   ; movaps        0x1f5c(%rip),%xmm0        # 46b0 <_sk_callback_sse41+0xc1b>
   DB  15,84,195                           ; andps         %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,21,31,0,0                ; movaps        0x1f15(%rip),%xmm8        # 4590 <_sk_callback_sse41+0xbe2>
+  DB  68,15,40,5,94,31,0,0                ; movaps        0x1f5e(%rip),%xmm8        # 46c0 <_sk_callback_sse41+0xc2b>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,40,203                           ; movaps        %xmm3,%xmm1
-  DB  102,15,56,0,13,21,31,0,0            ; pshufb        0x1f15(%rip),%xmm1        # 45a0 <_sk_callback_sse41+0xbf2>
+  DB  102,15,56,0,13,94,31,0,0            ; pshufb        0x1f5e(%rip),%xmm1        # 46d0 <_sk_callback_sse41+0xc3b>
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  15,40,211                           ; movaps        %xmm3,%xmm2
-  DB  102,15,56,0,21,18,31,0,0            ; pshufb        0x1f12(%rip),%xmm2        # 45b0 <_sk_callback_sse41+0xc02>
+  DB  102,15,56,0,21,91,31,0,0            ; pshufb        0x1f5b(%rip),%xmm2        # 46e0 <_sk_callback_sse41+0xc4b>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
@@ -12346,17 +12520,17 @@ _sk_gather_8888_sse41 LABEL PROC
   DB  102,65,15,58,34,28,129,1            ; pinsrd        $0x1,(%r9,%rax,4),%xmm3
   DB  102,67,15,58,34,28,145,2            ; pinsrd        $0x2,(%r9,%r10,4),%xmm3
   DB  102,65,15,58,34,28,137,3            ; pinsrd        $0x3,(%r9,%rcx,4),%xmm3
-  DB  102,15,111,5,171,30,0,0             ; movdqa        0x1eab(%rip),%xmm0        # 45c0 <_sk_callback_sse41+0xc12>
+  DB  102,15,111,5,244,30,0,0             ; movdqa        0x1ef4(%rip),%xmm0        # 46f0 <_sk_callback_sse41+0xc5b>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,172,30,0,0               ; movaps        0x1eac(%rip),%xmm8        # 45d0 <_sk_callback_sse41+0xc22>
+  DB  68,15,40,5,245,30,0,0               ; movaps        0x1ef5(%rip),%xmm8        # 4700 <_sk_callback_sse41+0xc6b>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
-  DB  102,15,56,0,13,171,30,0,0           ; pshufb        0x1eab(%rip),%xmm1        # 45e0 <_sk_callback_sse41+0xc32>
+  DB  102,15,56,0,13,244,30,0,0           ; pshufb        0x1ef4(%rip),%xmm1        # 4710 <_sk_callback_sse41+0xc7b>
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
-  DB  102,15,56,0,21,167,30,0,0           ; pshufb        0x1ea7(%rip),%xmm2        # 45f0 <_sk_callback_sse41+0xc42>
+  DB  102,15,56,0,21,240,30,0,0           ; pshufb        0x1ef0(%rip),%xmm2        # 4720 <_sk_callback_sse41+0xc8b>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
@@ -12369,7 +12543,7 @@ PUBLIC _sk_store_8888_sse41
 _sk_store_8888_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,147,30,0,0               ; movaps        0x1e93(%rip),%xmm8        # 4600 <_sk_callback_sse41+0xc52>
+  DB  68,15,40,5,220,30,0,0               ; movaps        0x1edc(%rip),%xmm8        # 4730 <_sk_callback_sse41+0xc9b>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
@@ -12404,18 +12578,18 @@ _sk_load_f16_sse41 LABEL PROC
   DB  102,68,15,97,216                    ; punpcklwd     %xmm0,%xmm11
   DB  102,68,15,105,200                   ; punpckhwd     %xmm0,%xmm9
   DB  102,65,15,56,51,203                 ; pmovzxwd      %xmm11,%xmm1
-  DB  102,68,15,111,5,12,30,0,0           ; movdqa        0x1e0c(%rip),%xmm8        # 4610 <_sk_callback_sse41+0xc62>
+  DB  102,68,15,111,5,85,30,0,0           ; movdqa        0x1e55(%rip),%xmm8        # 4740 <_sk_callback_sse41+0xcab>
   DB  102,15,111,209                      ; movdqa        %xmm1,%xmm2
   DB  102,65,15,219,208                   ; pand          %xmm8,%xmm2
   DB  102,15,239,202                      ; pxor          %xmm2,%xmm1
-  DB  102,15,111,29,7,30,0,0              ; movdqa        0x1e07(%rip),%xmm3        # 4620 <_sk_callback_sse41+0xc72>
+  DB  102,15,111,29,80,30,0,0             ; movdqa        0x1e50(%rip),%xmm3        # 4750 <_sk_callback_sse41+0xcbb>
   DB  102,15,114,242,16                   ; pslld         $0x10,%xmm2
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,15,56,63,195                    ; pmaxud        %xmm3,%xmm0
   DB  102,15,118,193                      ; pcmpeqd       %xmm1,%xmm0
   DB  102,15,114,241,13                   ; pslld         $0xd,%xmm1
   DB  102,15,235,202                      ; por           %xmm2,%xmm1
-  DB  102,68,15,111,21,243,29,0,0         ; movdqa        0x1df3(%rip),%xmm10        # 4630 <_sk_callback_sse41+0xc82>
+  DB  102,68,15,111,21,60,30,0,0          ; movdqa        0x1e3c(%rip),%xmm10        # 4760 <_sk_callback_sse41+0xccb>
   DB  102,65,15,254,202                   ; paddd         %xmm10,%xmm1
   DB  102,15,219,193                      ; pand          %xmm1,%xmm0
   DB  102,65,15,115,219,8                 ; psrldq        $0x8,%xmm11
@@ -12486,18 +12660,18 @@ _sk_gather_f16_sse41 LABEL PROC
   DB  102,68,15,97,218                    ; punpcklwd     %xmm2,%xmm11
   DB  102,68,15,105,202                   ; punpckhwd     %xmm2,%xmm9
   DB  102,65,15,56,51,203                 ; pmovzxwd      %xmm11,%xmm1
-  DB  102,68,15,111,5,177,28,0,0          ; movdqa        0x1cb1(%rip),%xmm8        # 4640 <_sk_callback_sse41+0xc92>
+  DB  102,68,15,111,5,250,28,0,0          ; movdqa        0x1cfa(%rip),%xmm8        # 4770 <_sk_callback_sse41+0xcdb>
   DB  102,15,111,209                      ; movdqa        %xmm1,%xmm2
   DB  102,65,15,219,208                   ; pand          %xmm8,%xmm2
   DB  102,15,239,202                      ; pxor          %xmm2,%xmm1
-  DB  102,15,111,29,172,28,0,0            ; movdqa        0x1cac(%rip),%xmm3        # 4650 <_sk_callback_sse41+0xca2>
+  DB  102,15,111,29,245,28,0,0            ; movdqa        0x1cf5(%rip),%xmm3        # 4780 <_sk_callback_sse41+0xceb>
   DB  102,15,114,242,16                   ; pslld         $0x10,%xmm2
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,15,56,63,195                    ; pmaxud        %xmm3,%xmm0
   DB  102,15,118,193                      ; pcmpeqd       %xmm1,%xmm0
   DB  102,15,114,241,13                   ; pslld         $0xd,%xmm1
   DB  102,15,235,202                      ; por           %xmm2,%xmm1
-  DB  102,68,15,111,21,152,28,0,0         ; movdqa        0x1c98(%rip),%xmm10        # 4660 <_sk_callback_sse41+0xcb2>
+  DB  102,68,15,111,21,225,28,0,0         ; movdqa        0x1ce1(%rip),%xmm10        # 4790 <_sk_callback_sse41+0xcfb>
   DB  102,65,15,254,202                   ; paddd         %xmm10,%xmm1
   DB  102,15,219,193                      ; pand          %xmm1,%xmm0
   DB  102,65,15,115,219,8                 ; psrldq        $0x8,%xmm11
@@ -12543,17 +12717,17 @@ PUBLIC _sk_store_f16_sse41
 _sk_store_f16_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  102,68,15,111,21,206,27,0,0         ; movdqa        0x1bce(%rip),%xmm10        # 4670 <_sk_callback_sse41+0xcc2>
+  DB  102,68,15,111,21,23,28,0,0          ; movdqa        0x1c17(%rip),%xmm10        # 47a0 <_sk_callback_sse41+0xd0b>
   DB  102,68,15,111,224                   ; movdqa        %xmm0,%xmm12
   DB  102,68,15,111,232                   ; movdqa        %xmm0,%xmm13
   DB  102,69,15,219,234                   ; pand          %xmm10,%xmm13
   DB  102,69,15,239,229                   ; pxor          %xmm13,%xmm12
-  DB  102,68,15,111,13,193,27,0,0         ; movdqa        0x1bc1(%rip),%xmm9        # 4680 <_sk_callback_sse41+0xcd2>
+  DB  102,68,15,111,13,10,28,0,0          ; movdqa        0x1c0a(%rip),%xmm9        # 47b0 <_sk_callback_sse41+0xd1b>
   DB  102,65,15,114,213,16                ; psrld         $0x10,%xmm13
   DB  102,69,15,111,193                   ; movdqa        %xmm9,%xmm8
   DB  102,69,15,102,196                   ; pcmpgtd       %xmm12,%xmm8
   DB  102,65,15,114,212,13                ; psrld         $0xd,%xmm12
-  DB  102,68,15,111,29,178,27,0,0         ; movdqa        0x1bb2(%rip),%xmm11        # 4690 <_sk_callback_sse41+0xce2>
+  DB  102,68,15,111,29,251,27,0,0         ; movdqa        0x1bfb(%rip),%xmm11        # 47c0 <_sk_callback_sse41+0xd2b>
   DB  102,69,15,235,235                   ; por           %xmm11,%xmm13
   DB  102,69,15,254,236                   ; paddd         %xmm12,%xmm13
   DB  102,69,15,223,197                   ; pandn         %xmm13,%xmm8
@@ -12621,7 +12795,7 @@ _sk_load_u16_be_sse41 LABEL PROC
   DB  102,15,235,200                      ; por           %xmm0,%xmm1
   DB  102,15,56,51,193                    ; pmovzxwd      %xmm1,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,129,26,0,0               ; movaps        0x1a81(%rip),%xmm8        # 46a0 <_sk_callback_sse41+0xcf2>
+  DB  68,15,40,5,202,26,0,0               ; movaps        0x1aca(%rip),%xmm8        # 47d0 <_sk_callback_sse41+0xd3b>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
   DB  102,15,113,241,8                    ; psllw         $0x8,%xmm1
@@ -12671,7 +12845,7 @@ _sk_load_rgb_u16_be_sse41 LABEL PROC
   DB  102,15,235,193                      ; por           %xmm1,%xmm0
   DB  102,15,56,51,192                    ; pmovzxwd      %xmm0,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,194,25,0,0               ; movaps        0x19c2(%rip),%xmm8        # 46b0 <_sk_callback_sse41+0xd02>
+  DB  68,15,40,5,11,26,0,0                ; movaps        0x1a0b(%rip),%xmm8        # 47e0 <_sk_callback_sse41+0xd4b>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
   DB  102,15,113,241,8                    ; psllw         $0x8,%xmm1
@@ -12688,14 +12862,14 @@ _sk_load_rgb_u16_be_sse41 LABEL PROC
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,137,25,0,0                 ; movaps        0x1989(%rip),%xmm3        # 46c0 <_sk_callback_sse41+0xd12>
+  DB  15,40,29,210,25,0,0                 ; movaps        0x19d2(%rip),%xmm3        # 47f0 <_sk_callback_sse41+0xd5b>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_store_u16_be_sse41
 _sk_store_u16_be_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,13,138,25,0,0              ; movaps        0x198a(%rip),%xmm9        # 46d0 <_sk_callback_sse41+0xd22>
+  DB  68,15,40,13,211,25,0,0              ; movaps        0x19d3(%rip),%xmm9        # 4800 <_sk_callback_sse41+0xd6b>
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
@@ -12900,10 +13074,10 @@ _sk_mirror_y_sse41 LABEL PROC
 PUBLIC _sk_luminance_to_alpha_sse41
 _sk_luminance_to_alpha_sse41 LABEL PROC
   DB  15,40,218                           ; movaps        %xmm2,%xmm3
-  DB  15,89,5,168,22,0,0                  ; mulps         0x16a8(%rip),%xmm0        # 46e0 <_sk_callback_sse41+0xd32>
-  DB  15,89,13,177,22,0,0                 ; mulps         0x16b1(%rip),%xmm1        # 46f0 <_sk_callback_sse41+0xd42>
+  DB  15,89,5,241,22,0,0                  ; mulps         0x16f1(%rip),%xmm0        # 4810 <_sk_callback_sse41+0xd7b>
+  DB  15,89,13,250,22,0,0                 ; mulps         0x16fa(%rip),%xmm1        # 4820 <_sk_callback_sse41+0xd8b>
   DB  15,88,200                           ; addps         %xmm0,%xmm1
-  DB  15,89,29,183,22,0,0                 ; mulps         0x16b7(%rip),%xmm3        # 4700 <_sk_callback_sse41+0xd52>
+  DB  15,89,29,0,23,0,0                   ; mulps         0x1700(%rip),%xmm3        # 4830 <_sk_callback_sse41+0xd9b>
   DB  15,88,217                           ; addps         %xmm1,%xmm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
@@ -13126,7 +13300,7 @@ _sk_linear_gradient_sse41 LABEL PROC
   DB  69,15,198,237,0                     ; shufps        $0x0,%xmm13,%xmm13
   DB  72,139,8                            ; mov           (%rax),%rcx
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,132,4,1,0,0                      ; je            34f1 <_sk_linear_gradient_sse41+0x13e>
+  DB  15,132,4,1,0,0                      ; je            35d8 <_sk_linear_gradient_sse41+0x13e>
   DB  72,131,236,88                       ; sub           $0x58,%rsp
   DB  15,41,36,36                         ; movaps        %xmm4,(%rsp)
   DB  15,41,108,36,16                     ; movaps        %xmm5,0x10(%rsp)
@@ -13177,13 +13351,13 @@ _sk_linear_gradient_sse41 LABEL PROC
   DB  15,40,196                           ; movaps        %xmm4,%xmm0
   DB  72,131,192,36                       ; add           $0x24,%rax
   DB  72,255,201                          ; dec           %rcx
-  DB  15,133,65,255,255,255               ; jne           3419 <_sk_linear_gradient_sse41+0x66>
+  DB  15,133,65,255,255,255               ; jne           3500 <_sk_linear_gradient_sse41+0x66>
   DB  15,40,124,36,48                     ; movaps        0x30(%rsp),%xmm7
   DB  15,40,116,36,32                     ; movaps        0x20(%rsp),%xmm6
   DB  15,40,108,36,16                     ; movaps        0x10(%rsp),%xmm5
   DB  15,40,36,36                         ; movaps        (%rsp),%xmm4
   DB  72,131,196,88                       ; add           $0x58,%rsp
-  DB  235,13                              ; jmp           34fe <_sk_linear_gradient_sse41+0x14b>
+  DB  235,13                              ; jmp           35e5 <_sk_linear_gradient_sse41+0x14b>
   DB  15,87,201                           ; xorps         %xmm1,%xmm1
   DB  15,87,210                           ; xorps         %xmm2,%xmm2
   DB  15,87,219                           ; xorps         %xmm3,%xmm3
@@ -13248,26 +13422,26 @@ _sk_xy_to_polar_unit_sse41 LABEL PROC
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,40,236                        ; movaps        %xmm12,%xmm13
   DB  69,15,89,237                        ; mulps         %xmm13,%xmm13
-  DB  68,15,40,21,58,17,0,0               ; movaps        0x113a(%rip),%xmm10        # 4710 <_sk_callback_sse41+0xd62>
+  DB  68,15,40,21,131,17,0,0              ; movaps        0x1183(%rip),%xmm10        # 4840 <_sk_callback_sse41+0xdab>
   DB  69,15,89,213                        ; mulps         %xmm13,%xmm10
-  DB  68,15,88,21,62,17,0,0               ; addps         0x113e(%rip),%xmm10        # 4720 <_sk_callback_sse41+0xd72>
+  DB  68,15,88,21,135,17,0,0              ; addps         0x1187(%rip),%xmm10        # 4850 <_sk_callback_sse41+0xdbb>
   DB  69,15,89,213                        ; mulps         %xmm13,%xmm10
-  DB  68,15,88,21,66,17,0,0               ; addps         0x1142(%rip),%xmm10        # 4730 <_sk_callback_sse41+0xd82>
+  DB  68,15,88,21,139,17,0,0              ; addps         0x118b(%rip),%xmm10        # 4860 <_sk_callback_sse41+0xdcb>
   DB  69,15,89,213                        ; mulps         %xmm13,%xmm10
-  DB  68,15,88,21,70,17,0,0               ; addps         0x1146(%rip),%xmm10        # 4740 <_sk_callback_sse41+0xd92>
+  DB  68,15,88,21,143,17,0,0              ; addps         0x118f(%rip),%xmm10        # 4870 <_sk_callback_sse41+0xddb>
   DB  69,15,89,212                        ; mulps         %xmm12,%xmm10
   DB  65,15,194,195,1                     ; cmpltps       %xmm11,%xmm0
-  DB  68,15,40,29,69,17,0,0               ; movaps        0x1145(%rip),%xmm11        # 4750 <_sk_callback_sse41+0xda2>
+  DB  68,15,40,29,142,17,0,0              ; movaps        0x118e(%rip),%xmm11        # 4880 <_sk_callback_sse41+0xdeb>
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
   DB  102,69,15,56,20,211                 ; blendvps      %xmm0,%xmm11,%xmm10
   DB  69,15,194,200,1                     ; cmpltps       %xmm8,%xmm9
-  DB  68,15,40,29,62,17,0,0               ; movaps        0x113e(%rip),%xmm11        # 4760 <_sk_callback_sse41+0xdb2>
+  DB  68,15,40,29,135,17,0,0              ; movaps        0x1187(%rip),%xmm11        # 4890 <_sk_callback_sse41+0xdfb>
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  102,69,15,56,20,211                 ; blendvps      %xmm0,%xmm11,%xmm10
   DB  15,40,193                           ; movaps        %xmm1,%xmm0
   DB  65,15,194,192,1                     ; cmpltps       %xmm8,%xmm0
-  DB  68,15,40,13,48,17,0,0               ; movaps        0x1130(%rip),%xmm9        # 4770 <_sk_callback_sse41+0xdc2>
+  DB  68,15,40,13,121,17,0,0              ; movaps        0x1179(%rip),%xmm9        # 48a0 <_sk_callback_sse41+0xe0b>
   DB  69,15,92,202                        ; subps         %xmm10,%xmm9
   DB  102,69,15,56,20,209                 ; blendvps      %xmm0,%xmm9,%xmm10
   DB  69,15,194,194,7                     ; cmpordps      %xmm10,%xmm8
@@ -13279,7 +13453,7 @@ _sk_xy_to_polar_unit_sse41 LABEL PROC
 PUBLIC _sk_save_xy_sse41
 _sk_save_xy_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,27,17,0,0                ; movaps        0x111b(%rip),%xmm8        # 4780 <_sk_callback_sse41+0xdd2>
+  DB  68,15,40,5,100,17,0,0               ; movaps        0x1164(%rip),%xmm8        # 48b0 <_sk_callback_sse41+0xe1b>
   DB  15,17,0                             ; movups        %xmm0,(%rax)
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,88,200                        ; addps         %xmm8,%xmm9
@@ -13319,8 +13493,8 @@ _sk_bilinear_nx_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,157,16,0,0                  ; addps         0x109d(%rip),%xmm0        # 4790 <_sk_callback_sse41+0xde2>
-  DB  68,15,40,13,165,16,0,0              ; movaps        0x10a5(%rip),%xmm9        # 47a0 <_sk_callback_sse41+0xdf2>
+  DB  15,88,5,230,16,0,0                  ; addps         0x10e6(%rip),%xmm0        # 48c0 <_sk_callback_sse41+0xe2b>
+  DB  68,15,40,13,238,16,0,0              ; movaps        0x10ee(%rip),%xmm9        # 48d0 <_sk_callback_sse41+0xe3b>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,128,0,0,0              ; movups        %xmm9,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -13331,7 +13505,7 @@ _sk_bilinear_px_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,148,16,0,0                  ; addps         0x1094(%rip),%xmm0        # 47b0 <_sk_callback_sse41+0xe02>
+  DB  15,88,5,221,16,0,0                  ; addps         0x10dd(%rip),%xmm0        # 48e0 <_sk_callback_sse41+0xe4b>
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -13341,8 +13515,8 @@ _sk_bilinear_ny_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,134,16,0,0                 ; addps         0x1086(%rip),%xmm1        # 47c0 <_sk_callback_sse41+0xe12>
-  DB  68,15,40,13,142,16,0,0              ; movaps        0x108e(%rip),%xmm9        # 47d0 <_sk_callback_sse41+0xe22>
+  DB  15,88,13,207,16,0,0                 ; addps         0x10cf(%rip),%xmm1        # 48f0 <_sk_callback_sse41+0xe5b>
+  DB  68,15,40,13,215,16,0,0              ; movaps        0x10d7(%rip),%xmm9        # 4900 <_sk_callback_sse41+0xe6b>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,160,0,0,0              ; movups        %xmm9,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -13353,7 +13527,7 @@ _sk_bilinear_py_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,124,16,0,0                 ; addps         0x107c(%rip),%xmm1        # 47e0 <_sk_callback_sse41+0xe32>
+  DB  15,88,13,197,16,0,0                 ; addps         0x10c5(%rip),%xmm1        # 4910 <_sk_callback_sse41+0xe7b>
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -13363,13 +13537,13 @@ _sk_bicubic_n3x_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,111,16,0,0                  ; addps         0x106f(%rip),%xmm0        # 47f0 <_sk_callback_sse41+0xe42>
-  DB  68,15,40,13,119,16,0,0              ; movaps        0x1077(%rip),%xmm9        # 4800 <_sk_callback_sse41+0xe52>
+  DB  15,88,5,184,16,0,0                  ; addps         0x10b8(%rip),%xmm0        # 4920 <_sk_callback_sse41+0xe8b>
+  DB  68,15,40,13,192,16,0,0              ; movaps        0x10c0(%rip),%xmm9        # 4930 <_sk_callback_sse41+0xe9b>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,115,16,0,0              ; mulps         0x1073(%rip),%xmm9        # 4810 <_sk_callback_sse41+0xe62>
-  DB  68,15,88,13,123,16,0,0              ; addps         0x107b(%rip),%xmm9        # 4820 <_sk_callback_sse41+0xe72>
+  DB  68,15,89,13,188,16,0,0              ; mulps         0x10bc(%rip),%xmm9        # 4940 <_sk_callback_sse41+0xeab>
+  DB  68,15,88,13,196,16,0,0              ; addps         0x10c4(%rip),%xmm9        # 4950 <_sk_callback_sse41+0xebb>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,128,0,0,0              ; movups        %xmm9,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -13380,16 +13554,16 @@ _sk_bicubic_n1x_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,106,16,0,0                  ; addps         0x106a(%rip),%xmm0        # 4830 <_sk_callback_sse41+0xe82>
-  DB  68,15,40,13,114,16,0,0              ; movaps        0x1072(%rip),%xmm9        # 4840 <_sk_callback_sse41+0xe92>
+  DB  15,88,5,179,16,0,0                  ; addps         0x10b3(%rip),%xmm0        # 4960 <_sk_callback_sse41+0xecb>
+  DB  68,15,40,13,187,16,0,0              ; movaps        0x10bb(%rip),%xmm9        # 4970 <_sk_callback_sse41+0xedb>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,118,16,0,0               ; movaps        0x1076(%rip),%xmm8        # 4850 <_sk_callback_sse41+0xea2>
+  DB  68,15,40,5,191,16,0,0               ; movaps        0x10bf(%rip),%xmm8        # 4980 <_sk_callback_sse41+0xeeb>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,122,16,0,0               ; addps         0x107a(%rip),%xmm8        # 4860 <_sk_callback_sse41+0xeb2>
+  DB  68,15,88,5,195,16,0,0               ; addps         0x10c3(%rip),%xmm8        # 4990 <_sk_callback_sse41+0xefb>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,126,16,0,0               ; addps         0x107e(%rip),%xmm8        # 4870 <_sk_callback_sse41+0xec2>
+  DB  68,15,88,5,199,16,0,0               ; addps         0x10c7(%rip),%xmm8        # 49a0 <_sk_callback_sse41+0xf0b>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,130,16,0,0               ; addps         0x1082(%rip),%xmm8        # 4880 <_sk_callback_sse41+0xed2>
+  DB  68,15,88,5,203,16,0,0               ; addps         0x10cb(%rip),%xmm8        # 49b0 <_sk_callback_sse41+0xf1b>
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -13397,17 +13571,17 @@ _sk_bicubic_n1x_sse41 LABEL PROC
 PUBLIC _sk_bicubic_p1x_sse41
 _sk_bicubic_p1x_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,124,16,0,0               ; movaps        0x107c(%rip),%xmm8        # 4890 <_sk_callback_sse41+0xee2>
+  DB  68,15,40,5,197,16,0,0               ; movaps        0x10c5(%rip),%xmm8        # 49c0 <_sk_callback_sse41+0xf2b>
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,72,64                      ; movups        0x40(%rax),%xmm9
   DB  65,15,88,192                        ; addps         %xmm8,%xmm0
-  DB  68,15,40,21,120,16,0,0              ; movaps        0x1078(%rip),%xmm10        # 48a0 <_sk_callback_sse41+0xef2>
+  DB  68,15,40,21,193,16,0,0              ; movaps        0x10c1(%rip),%xmm10        # 49d0 <_sk_callback_sse41+0xf3b>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,124,16,0,0              ; addps         0x107c(%rip),%xmm10        # 48b0 <_sk_callback_sse41+0xf02>
+  DB  68,15,88,21,197,16,0,0              ; addps         0x10c5(%rip),%xmm10        # 49e0 <_sk_callback_sse41+0xf4b>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,120,16,0,0              ; addps         0x1078(%rip),%xmm10        # 48c0 <_sk_callback_sse41+0xf12>
+  DB  68,15,88,21,193,16,0,0              ; addps         0x10c1(%rip),%xmm10        # 49f0 <_sk_callback_sse41+0xf5b>
   DB  68,15,17,144,128,0,0,0              ; movups        %xmm10,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -13417,11 +13591,11 @@ _sk_bicubic_p3x_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,107,16,0,0                  ; addps         0x106b(%rip),%xmm0        # 48d0 <_sk_callback_sse41+0xf22>
+  DB  15,88,5,180,16,0,0                  ; addps         0x10b4(%rip),%xmm0        # 4a00 <_sk_callback_sse41+0xf6b>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,107,16,0,0               ; mulps         0x106b(%rip),%xmm8        # 48e0 <_sk_callback_sse41+0xf32>
-  DB  68,15,88,5,115,16,0,0               ; addps         0x1073(%rip),%xmm8        # 48f0 <_sk_callback_sse41+0xf42>
+  DB  68,15,89,5,180,16,0,0               ; mulps         0x10b4(%rip),%xmm8        # 4a10 <_sk_callback_sse41+0xf7b>
+  DB  68,15,88,5,188,16,0,0               ; addps         0x10bc(%rip),%xmm8        # 4a20 <_sk_callback_sse41+0xf8b>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -13432,13 +13606,13 @@ _sk_bicubic_n3y_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,97,16,0,0                  ; addps         0x1061(%rip),%xmm1        # 4900 <_sk_callback_sse41+0xf52>
-  DB  68,15,40,13,105,16,0,0              ; movaps        0x1069(%rip),%xmm9        # 4910 <_sk_callback_sse41+0xf62>
+  DB  15,88,13,170,16,0,0                 ; addps         0x10aa(%rip),%xmm1        # 4a30 <_sk_callback_sse41+0xf9b>
+  DB  68,15,40,13,178,16,0,0              ; movaps        0x10b2(%rip),%xmm9        # 4a40 <_sk_callback_sse41+0xfab>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,101,16,0,0              ; mulps         0x1065(%rip),%xmm9        # 4920 <_sk_callback_sse41+0xf72>
-  DB  68,15,88,13,109,16,0,0              ; addps         0x106d(%rip),%xmm9        # 4930 <_sk_callback_sse41+0xf82>
+  DB  68,15,89,13,174,16,0,0              ; mulps         0x10ae(%rip),%xmm9        # 4a50 <_sk_callback_sse41+0xfbb>
+  DB  68,15,88,13,182,16,0,0              ; addps         0x10b6(%rip),%xmm9        # 4a60 <_sk_callback_sse41+0xfcb>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,160,0,0,0              ; movups        %xmm9,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -13449,16 +13623,16 @@ _sk_bicubic_n1y_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,91,16,0,0                  ; addps         0x105b(%rip),%xmm1        # 4940 <_sk_callback_sse41+0xf92>
-  DB  68,15,40,13,99,16,0,0               ; movaps        0x1063(%rip),%xmm9        # 4950 <_sk_callback_sse41+0xfa2>
+  DB  15,88,13,164,16,0,0                 ; addps         0x10a4(%rip),%xmm1        # 4a70 <_sk_callback_sse41+0xfdb>
+  DB  68,15,40,13,172,16,0,0              ; movaps        0x10ac(%rip),%xmm9        # 4a80 <_sk_callback_sse41+0xfeb>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,103,16,0,0               ; movaps        0x1067(%rip),%xmm8        # 4960 <_sk_callback_sse41+0xfb2>
+  DB  68,15,40,5,176,16,0,0               ; movaps        0x10b0(%rip),%xmm8        # 4a90 <_sk_callback_sse41+0xffb>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,107,16,0,0               ; addps         0x106b(%rip),%xmm8        # 4970 <_sk_callback_sse41+0xfc2>
+  DB  68,15,88,5,180,16,0,0               ; addps         0x10b4(%rip),%xmm8        # 4aa0 <_sk_callback_sse41+0x100b>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,111,16,0,0               ; addps         0x106f(%rip),%xmm8        # 4980 <_sk_callback_sse41+0xfd2>
+  DB  68,15,88,5,184,16,0,0               ; addps         0x10b8(%rip),%xmm8        # 4ab0 <_sk_callback_sse41+0x101b>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,115,16,0,0               ; addps         0x1073(%rip),%xmm8        # 4990 <_sk_callback_sse41+0xfe2>
+  DB  68,15,88,5,188,16,0,0               ; addps         0x10bc(%rip),%xmm8        # 4ac0 <_sk_callback_sse41+0x102b>
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -13466,17 +13640,17 @@ _sk_bicubic_n1y_sse41 LABEL PROC
 PUBLIC _sk_bicubic_p1y_sse41
 _sk_bicubic_p1y_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,109,16,0,0               ; movaps        0x106d(%rip),%xmm8        # 49a0 <_sk_callback_sse41+0xff2>
+  DB  68,15,40,5,182,16,0,0               ; movaps        0x10b6(%rip),%xmm8        # 4ad0 <_sk_callback_sse41+0x103b>
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,72,96                      ; movups        0x60(%rax),%xmm9
   DB  65,15,88,200                        ; addps         %xmm8,%xmm1
-  DB  68,15,40,21,104,16,0,0              ; movaps        0x1068(%rip),%xmm10        # 49b0 <_sk_callback_sse41+0x1002>
+  DB  68,15,40,21,177,16,0,0              ; movaps        0x10b1(%rip),%xmm10        # 4ae0 <_sk_callback_sse41+0x104b>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,108,16,0,0              ; addps         0x106c(%rip),%xmm10        # 49c0 <_sk_callback_sse41+0x1012>
+  DB  68,15,88,21,181,16,0,0              ; addps         0x10b5(%rip),%xmm10        # 4af0 <_sk_callback_sse41+0x105b>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,104,16,0,0              ; addps         0x1068(%rip),%xmm10        # 49d0 <_sk_callback_sse41+0x1022>
+  DB  68,15,88,21,177,16,0,0              ; addps         0x10b1(%rip),%xmm10        # 4b00 <_sk_callback_sse41+0x106b>
   DB  68,15,17,144,160,0,0,0              ; movups        %xmm10,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -13486,11 +13660,11 @@ _sk_bicubic_p3y_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,90,16,0,0                  ; addps         0x105a(%rip),%xmm1        # 49e0 <_sk_callback_sse41+0x1032>
+  DB  15,88,13,163,16,0,0                 ; addps         0x10a3(%rip),%xmm1        # 4b10 <_sk_callback_sse41+0x107b>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,90,16,0,0                ; mulps         0x105a(%rip),%xmm8        # 49f0 <_sk_callback_sse41+0x1042>
-  DB  68,15,88,5,98,16,0,0                ; addps         0x1062(%rip),%xmm8        # 4a00 <_sk_callback_sse41+0x1052>
+  DB  68,15,89,5,163,16,0,0               ; mulps         0x10a3(%rip),%xmm8        # 4b20 <_sk_callback_sse41+0x108b>
+  DB  68,15,88,5,171,16,0,0               ; addps         0x10ab(%rip),%xmm8        # 4b30 <_sk_callback_sse41+0x109b>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -13565,6 +13739,40 @@ ALIGN 16
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
   DB  63                                  ; (bad)
   DB  0,0                                 ; add           %al,(%rax)
+  DB  128,63,1                            ; cmpb          $0x1,(%rdi)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,1                                 ; add           %al,(%rcx)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,1                                 ; add           %al,(%rcx)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,1                                 ; add           %al,(%rcx)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,4,0                               ; add           %al,(%rax,%rax,1)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  4,0                                 ; add           $0x0,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  4,0                                 ; add           $0x0,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  4,0                                 ; add           $0x0,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  2,0                                 ; add           (%rax),%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  2,0                                 ; add           (%rax),%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  2,0                                 ; add           (%rax),%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  2,0                                 ; add           (%rax),%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  33,8                                ; and           %ecx,(%rax)
+  DB  130                                 ; (bad)
+  DB  60,33                               ; cmp           $0x21,%al
+  DB  8,130,60,33,8,130                   ; or            %al,-0x7df7dec4(%rdx)
+  DB  60,33                               ; cmp           $0x21,%al
+  DB  8,130,60,0,0,0                      ; or            %al,0x3c(%rdx)
+  DB  191,0,0,0,191                       ; mov           $0xbf000000,%edi
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,191,0,0,0,191                     ; add           %bh,-0x41000000(%rdi)
+  DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
   DB  63                                  ; (bad)
@@ -13655,17 +13863,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
-  DB  63                                  ; (bad)
+  DB  191,0,0,128,191                     ; mov           $0xbf800000,%edi
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,191,0,0,128,191,0               ; cmpb          $0x0,-0x40800000(%rdi)
-  DB  0,128,191,0,0,128                   ; add           %al,-0x7fffff41(%rax)
-  DB  191,0,0,224,64                      ; mov           $0x40e00000,%edi
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  224,64                              ; loopne        3be8 <.literal16+0x188>
+  DB  0,224                               ; add           %ah,%al
+  DB  64,0,0                              ; add           %al,(%rax)
+  DB  224,64                              ; loopne        3d18 <.literal16+0x1d8>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,64                              ; loopne        3bec <.literal16+0x18c>
+  DB  224,64                              ; loopne        3d1c <.literal16+0x1dc>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,64                              ; loopne        3bf0 <.literal16+0x190>
+  DB  224,64                              ; loopne        3d20 <.literal16+0x1e0>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -13884,13 +14091,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        3da9 <.literal16+0x349>
+  DB  224,7                               ; loopne        3ed9 <.literal16+0x399>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        3dad <.literal16+0x34d>
+  DB  224,7                               ; loopne        3edd <.literal16+0x39d>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        3db1 <.literal16+0x351>
+  DB  224,7                               ; loopne        3ee1 <.literal16+0x3a1>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        3db5 <.literal16+0x355>
+  DB  224,7                               ; loopne        3ee5 <.literal16+0x3a5>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -13930,10 +14137,10 @@ ALIGN 16
   DB  0,1                                 ; add           %al,(%rcx)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a003e08 <_sk_callback_sse41+0xa00045a>
+  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a003f38 <_sk_callback_sse41+0xa0004a3>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 3003e10 <_sk_callback_sse41+0x3000462>
+  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 3003f40 <_sk_callback_sse41+0x30004ab>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -13988,11 +14195,11 @@ ALIGN 16
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,127,67                            ; add           %bh,0x43(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            3edb <.literal16+0x47b>
+  DB  127,67                              ; jg            400b <.literal16+0x4cb>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            3edf <.literal16+0x47f>
+  DB  127,67                              ; jg            400f <.literal16+0x4cf>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            3ee3 <.literal16+0x483>
+  DB  127,67                              ; jg            4013 <.literal16+0x4d3>
   DB  129,128,128,59,129,128,128,59,129,128; addl          $0x80813b80,-0x7f7ec480(%rax)
   DB  128,59,129                          ; cmpb          $0x81,(%rbx)
   DB  128,128,59,129,128,128,59           ; addb          $0x3b,-0x7f7f7ec5(%rax)
@@ -14007,16 +14214,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3ed4 <.literal16+0x474>
+  DB  127,0                               ; jg            4004 <.literal16+0x4c4>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3ed8 <.literal16+0x478>
+  DB  127,0                               ; jg            4008 <.literal16+0x4c8>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3edc <.literal16+0x47c>
+  DB  127,0                               ; jg            400c <.literal16+0x4cc>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3ee0 <.literal16+0x480>
+  DB  127,0                               ; jg            4010 <.literal16+0x4d0>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -14025,7 +14232,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            3f65 <.literal16+0x505>
+  DB  119,115                             ; ja            4095 <.literal16+0x555>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -14036,7 +14243,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           3ec9 <.literal16+0x469>
+  DB  117,191                             ; jne           3ff9 <.literal16+0x4b9>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -14048,7 +14255,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a37f0a <_sk_callback_sse41+0xffffffffe9a3455c>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3803a <_sk_callback_sse41+0xffffffffe9a345a5>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  81                                  ; push          %rcx
   DB  140,242                             ; mov           %?,%edx
@@ -14103,16 +14310,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3fa4 <.literal16+0x544>
+  DB  127,0                               ; jg            40d4 <.literal16+0x594>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3fa8 <.literal16+0x548>
+  DB  127,0                               ; jg            40d8 <.literal16+0x598>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3fac <.literal16+0x54c>
+  DB  127,0                               ; jg            40dc <.literal16+0x59c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            3fb0 <.literal16+0x550>
+  DB  127,0                               ; jg            40e0 <.literal16+0x5a0>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -14121,7 +14328,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4035 <.literal16+0x5d5>
+  DB  119,115                             ; ja            4165 <.literal16+0x625>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -14132,7 +14339,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           3f99 <.literal16+0x539>
+  DB  117,191                             ; jne           40c9 <.literal16+0x589>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -14144,7 +14351,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a37fda <_sk_callback_sse41+0xffffffffe9a3462c>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3810a <_sk_callback_sse41+0xffffffffe9a34675>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  81                                  ; push          %rcx
   DB  140,242                             ; mov           %?,%edx
@@ -14199,16 +14406,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4074 <.literal16+0x614>
+  DB  127,0                               ; jg            41a4 <.literal16+0x664>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4078 <.literal16+0x618>
+  DB  127,0                               ; jg            41a8 <.literal16+0x668>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            407c <.literal16+0x61c>
+  DB  127,0                               ; jg            41ac <.literal16+0x66c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4080 <.literal16+0x620>
+  DB  127,0                               ; jg            41b0 <.literal16+0x670>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -14217,7 +14424,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4105 <.literal16+0x6a5>
+  DB  119,115                             ; ja            4235 <.literal16+0x6f5>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -14228,7 +14435,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           4069 <.literal16+0x609>
+  DB  117,191                             ; jne           4199 <.literal16+0x659>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -14240,7 +14447,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a380aa <_sk_callback_sse41+0xffffffffe9a346fc>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a381da <_sk_callback_sse41+0xffffffffe9a34745>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  81                                  ; push          %rcx
   DB  140,242                             ; mov           %?,%edx
@@ -14295,16 +14502,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4144 <.literal16+0x6e4>
+  DB  127,0                               ; jg            4274 <.literal16+0x734>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4148 <.literal16+0x6e8>
+  DB  127,0                               ; jg            4278 <.literal16+0x738>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            414c <.literal16+0x6ec>
+  DB  127,0                               ; jg            427c <.literal16+0x73c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4150 <.literal16+0x6f0>
+  DB  127,0                               ; jg            4280 <.literal16+0x740>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -14313,7 +14520,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            41d5 <.literal16+0x775>
+  DB  119,115                             ; ja            4305 <.literal16+0x7c5>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -14324,7 +14531,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           4139 <.literal16+0x6d9>
+  DB  117,191                             ; jne           4269 <.literal16+0x729>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -14336,7 +14543,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3817a <_sk_callback_sse41+0xffffffffe9a347cc>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a382aa <_sk_callback_sse41+0xffffffffe9a34815>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  81                                  ; push          %rcx
   DB  140,242                             ; mov           %?,%edx
@@ -14387,13 +14594,13 @@ ALIGN 16
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
-  DB  127,67                              ; jg            4257 <.literal16+0x7f7>
+  DB  127,67                              ; jg            4387 <.literal16+0x847>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            425b <.literal16+0x7fb>
+  DB  127,67                              ; jg            438b <.literal16+0x84b>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            425f <.literal16+0x7ff>
+  DB  127,67                              ; jg            438f <.literal16+0x84f>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            4263 <.literal16+0x803>
+  DB  127,67                              ; jg            4393 <.literal16+0x853>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,195                               ; add           %al,%bl
   DB  0,0                                 ; add           %al,(%rax)
@@ -14440,16 +14647,16 @@ ALIGN 16
   DB  128,3,62                            ; addb          $0x3e,(%rbx)
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           42e3 <.literal16+0x883>
+  DB  118,63                              ; jbe           4413 <.literal16+0x8d3>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           42e7 <.literal16+0x887>
+  DB  118,63                              ; jbe           4417 <.literal16+0x8d7>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           42eb <.literal16+0x88b>
+  DB  118,63                              ; jbe           441b <.literal16+0x8db>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           42ef <.literal16+0x88f>
+  DB  118,63                              ; jbe           441f <.literal16+0x8df>
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
@@ -14461,11 +14668,11 @@ ALIGN 16
   DB  128,59,0                            ; cmpb          $0x0,(%rbx)
   DB  0,127,67                            ; add           %bh,0x43(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            432b <.literal16+0x8cb>
+  DB  127,67                              ; jg            445b <.literal16+0x91b>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            432f <.literal16+0x8cf>
+  DB  127,67                              ; jg            445f <.literal16+0x91f>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            4333 <.literal16+0x8d3>
+  DB  127,67                              ; jg            4463 <.literal16+0x923>
   DB  129,128,128,59,129,128,128,59,129,128; addl          $0x80813b80,-0x7f7ec480(%rax)
   DB  128,59,129                          ; cmpb          $0x81,(%rbx)
   DB  128,128,59,0,0,128,63               ; addb          $0x3f,-0x7fffffc5(%rax)
@@ -14494,7 +14701,7 @@ ALIGN 16
   DB  5,255,255,255,9                     ; add           $0x9ffffff,%eax
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 3004360 <_sk_callback_sse41+0x30009b2>
+  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 3004490 <_sk_callback_sse41+0x30009fb>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -14523,13 +14730,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        4399 <.literal16+0x939>
+  DB  224,7                               ; loopne        44c9 <.literal16+0x989>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        439d <.literal16+0x93d>
+  DB  224,7                               ; loopne        44cd <.literal16+0x98d>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        43a1 <.literal16+0x941>
+  DB  224,7                               ; loopne        44d1 <.literal16+0x991>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        43a5 <.literal16+0x945>
+  DB  224,7                               ; loopne        44d5 <.literal16+0x995>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -14575,13 +14782,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        4409 <.literal16+0x9a9>
+  DB  224,7                               ; loopne        4539 <.literal16+0x9f9>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        440d <.literal16+0x9ad>
+  DB  224,7                               ; loopne        453d <.literal16+0x9fd>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        4411 <.literal16+0x9b1>
+  DB  224,7                               ; loopne        4541 <.literal16+0xa01>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        4415 <.literal16+0x9b5>
+  DB  224,7                               ; loopne        4545 <.literal16+0xa05>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -14619,13 +14826,13 @@ ALIGN 16
   DB  65,0,0                              ; add           %al,(%r8)
   DB  248                                 ; clc
   DB  65,0,0                              ; add           %al,(%r8)
-  DB  124,66                              ; jl            44a6 <.literal16+0xa46>
+  DB  124,66                              ; jl            45d6 <.literal16+0xa96>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            44aa <.literal16+0xa4a>
+  DB  124,66                              ; jl            45da <.literal16+0xa9a>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            44ae <.literal16+0xa4e>
+  DB  124,66                              ; jl            45de <.literal16+0xa9e>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            44b2 <.literal16+0xa52>
+  DB  124,66                              ; jl            45e2 <.literal16+0xaa2>
   DB  0,240                               ; add           %dh,%al
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,240                               ; add           %dh,%al
@@ -14715,13 +14922,13 @@ ALIGN 16
   DB  136,136,61,137,136,136              ; mov           %cl,-0x777776c3(%rax)
   DB  61,137,136,136,61                   ; cmp           $0x3d888889,%eax
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            45b5 <.literal16+0xb55>
+  DB  112,65                              ; jo            46e5 <.literal16+0xba5>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            45b9 <.literal16+0xb59>
+  DB  112,65                              ; jo            46e9 <.literal16+0xba9>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            45bd <.literal16+0xb5d>
+  DB  112,65                              ; jo            46ed <.literal16+0xbad>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            45c1 <.literal16+0xb61>
+  DB  112,65                              ; jo            46f1 <.literal16+0xbb1>
   DB  255,0                               ; incl          (%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  255,0                               ; incl          (%rax)
@@ -14736,7 +14943,7 @@ ALIGN 16
   DB  5,255,255,255,9                     ; add           $0x9ffffff,%eax
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 30045b0 <_sk_callback_sse41+0x3000c02>
+  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 30046e0 <_sk_callback_sse41+0x3000c4b>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -14763,7 +14970,7 @@ ALIGN 16
   DB  5,255,255,255,9                     ; add           $0x9ffffff,%eax
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 30045f0 <_sk_callback_sse41+0x3000c42>
+  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 3004720 <_sk_callback_sse41+0x3000c8b>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -14778,11 +14985,11 @@ ALIGN 16
   DB  255,0                               ; incl          (%rax)
   DB  0,127,67                            ; add           %bh,0x43(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            464b <.literal16+0xbeb>
+  DB  127,67                              ; jg            477b <.literal16+0xc3b>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            464f <.literal16+0xbef>
+  DB  127,67                              ; jg            477f <.literal16+0xc3f>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            4653 <.literal16+0xbf3>
+  DB  127,67                              ; jg            4783 <.literal16+0xc43>
   DB  0,128,0,0,0,128                     ; add           %al,-0x80000000(%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,128,0,0,0,128                     ; add           %al,-0x80000000(%rax)
@@ -14858,13 +15065,13 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  255                                 ; (bad)
-  DB  127,71                              ; jg            471b <.literal16+0xcbb>
+  DB  127,71                              ; jg            484b <.literal16+0xd0b>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            471f <.literal16+0xcbf>
+  DB  127,71                              ; jg            484f <.literal16+0xd0f>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            4723 <.literal16+0xcc3>
+  DB  127,71                              ; jg            4853 <.literal16+0xd13>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            4727 <.literal16+0xcc7>
+  DB  127,71                              ; jg            4857 <.literal16+0xd17>
   DB  208                                 ; (bad)
   DB  179,89                              ; mov           $0x59,%bl
   DB  62,208                              ; ds            (bad)
@@ -14990,11 +15197,11 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,114                          ; cmpb          $0x72,(%rdi)
   DB  28,199                              ; sbb           $0xc7,%al
-  DB  62,114,28                           ; jb,pt         4832 <.literal16+0xdd2>
+  DB  62,114,28                           ; jb,pt         4962 <.literal16+0xe22>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4836 <.literal16+0xdd6>
+  DB  62,114,28                           ; jb,pt         4966 <.literal16+0xe26>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         483a <.literal16+0xdda>
+  DB  62,114,28                           ; jb,pt         496a <.literal16+0xe2a>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -15038,7 +15245,7 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63d6c5 <_sk_callback_sse41+0x3d639d17>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63d7f5 <_sk_callback_sse41+0x3d639d60>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -15064,7 +15271,7 @@ ALIGN 16
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63d705 <_sk_callback_sse41+0x3d639d57>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63d835 <_sk_callback_sse41+0x3d639da0>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
@@ -15073,13 +15280,13 @@ ALIGN 16
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
-  DB  114,28                              ; jb            48fe <.literal16+0xe9e>
+  DB  114,28                              ; jb            4a2e <.literal16+0xeee>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4902 <.literal16+0xea2>
+  DB  62,114,28                           ; jb,pt         4a32 <.literal16+0xef2>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4906 <.literal16+0xea6>
+  DB  62,114,28                           ; jb,pt         4a36 <.literal16+0xef6>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         490a <.literal16+0xeaa>
+  DB  62,114,28                           ; jb,pt         4a3a <.literal16+0xefa>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -15100,11 +15307,11 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,114                          ; cmpb          $0x72,(%rdi)
   DB  28,199                              ; sbb           $0xc7,%al
-  DB  62,114,28                           ; jb,pt         4942 <.literal16+0xee2>
+  DB  62,114,28                           ; jb,pt         4a72 <.literal16+0xf32>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4946 <.literal16+0xee6>
+  DB  62,114,28                           ; jb,pt         4a76 <.literal16+0xf36>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         494a <.literal16+0xeea>
+  DB  62,114,28                           ; jb,pt         4a7a <.literal16+0xf3a>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -15148,7 +15355,7 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63d7d5 <_sk_callback_sse41+0x3d639e27>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63d905 <_sk_callback_sse41+0x3d639e70>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -15174,7 +15381,7 @@ ALIGN 16
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63d815 <_sk_callback_sse41+0x3d639e67>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63d945 <_sk_callback_sse41+0x3d639eb0>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
@@ -15183,13 +15390,13 @@ ALIGN 16
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
-  DB  114,28                              ; jb            4a0e <.literal16+0xfae>
+  DB  114,28                              ; jb            4b3e <.literal16+0xffe>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4a12 <_sk_callback_sse41+0x1064>
+  DB  62,114,28                           ; jb,pt         4b42 <_sk_callback_sse41+0x10ad>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4a16 <_sk_callback_sse41+0x1068>
+  DB  62,114,28                           ; jb,pt         4b46 <_sk_callback_sse41+0x10b1>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4a1a <_sk_callback_sse41+0x106c>
+  DB  62,114,28                           ; jb,pt         4b4a <_sk_callback_sse41+0x10b5>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -15280,7 +15487,7 @@ _sk_seed_shader_sse2 LABEL PROC
   DB  102,15,110,199                      ; movd          %edi,%xmm0
   DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
   DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
-  DB  15,40,21,193,61,0,0                 ; movaps        0x3dc1(%rip),%xmm2        # 3ed0 <_sk_callback_sse2+0xb2>
+  DB  15,40,21,161,62,0,0                 ; movaps        0x3ea1(%rip),%xmm2        # 3fb0 <_sk_callback_sse2+0xab>
   DB  15,88,202                           ; addps         %xmm2,%xmm1
   DB  15,16,2                             ; movups        (%rdx),%xmm0
   DB  15,88,193                           ; addps         %xmm1,%xmm0
@@ -15289,7 +15496,7 @@ _sk_seed_shader_sse2 LABEL PROC
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  15,88,202                           ; addps         %xmm2,%xmm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,21,176,61,0,0                 ; movaps        0x3db0(%rip),%xmm2        # 3ee0 <_sk_callback_sse2+0xc2>
+  DB  15,40,21,144,62,0,0                 ; movaps        0x3e90(%rip),%xmm2        # 3fc0 <_sk_callback_sse2+0xbb>
   DB  15,87,219                           ; xorps         %xmm3,%xmm3
   DB  15,87,228                           ; xorps         %xmm4,%xmm4
   DB  15,87,237                           ; xorps         %xmm5,%xmm5
@@ -15297,6 +15504,54 @@ _sk_seed_shader_sse2 LABEL PROC
   DB  15,87,255                           ; xorps         %xmm7,%xmm7
   DB  255,224                             ; jmpq          *%rax
 
+PUBLIC _sk_dither_sse2
+_sk_dither_sse2 LABEL PROC
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  102,68,15,110,199                   ; movd          %edi,%xmm8
+  DB  102,69,15,112,192,0                 ; pshufd        $0x0,%xmm8,%xmm8
+  DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
+  DB  68,15,16,10                         ; movups        (%rdx),%xmm9
+  DB  69,15,88,200                        ; addps         %xmm8,%xmm9
+  DB  243,69,15,91,201                    ; cvttps2dq     %xmm9,%xmm9
+  DB  72,139,8                            ; mov           (%rax),%rcx
+  DB  102,68,15,110,1                     ; movd          (%rcx),%xmm8
+  DB  102,69,15,112,192,0                 ; pshufd        $0x0,%xmm8,%xmm8
+  DB  102,69,15,239,193                   ; pxor          %xmm9,%xmm8
+  DB  102,68,15,111,21,85,62,0,0          ; movdqa        0x3e55(%rip),%xmm10        # 3fd0 <_sk_callback_sse2+0xcb>
+  DB  102,69,15,111,216                   ; movdqa        %xmm8,%xmm11
+  DB  102,69,15,219,218                   ; pand          %xmm10,%xmm11
+  DB  102,65,15,114,243,5                 ; pslld         $0x5,%xmm11
+  DB  102,69,15,219,209                   ; pand          %xmm9,%xmm10
+  DB  102,65,15,114,242,4                 ; pslld         $0x4,%xmm10
+  DB  102,68,15,111,37,65,62,0,0          ; movdqa        0x3e41(%rip),%xmm12        # 3fe0 <_sk_callback_sse2+0xdb>
+  DB  102,68,15,111,45,72,62,0,0          ; movdqa        0x3e48(%rip),%xmm13        # 3ff0 <_sk_callback_sse2+0xeb>
+  DB  102,69,15,111,240                   ; movdqa        %xmm8,%xmm14
+  DB  102,69,15,219,245                   ; pand          %xmm13,%xmm14
+  DB  102,65,15,114,246,2                 ; pslld         $0x2,%xmm14
+  DB  102,69,15,219,233                   ; pand          %xmm9,%xmm13
+  DB  102,69,15,254,237                   ; paddd         %xmm13,%xmm13
+  DB  102,69,15,219,196                   ; pand          %xmm12,%xmm8
+  DB  102,65,15,114,208,1                 ; psrld         $0x1,%xmm8
+  DB  102,69,15,219,204                   ; pand          %xmm12,%xmm9
+  DB  102,65,15,114,209,2                 ; psrld         $0x2,%xmm9
+  DB  102,69,15,235,234                   ; por           %xmm10,%xmm13
+  DB  102,69,15,235,233                   ; por           %xmm9,%xmm13
+  DB  102,69,15,235,243                   ; por           %xmm11,%xmm14
+  DB  102,69,15,235,245                   ; por           %xmm13,%xmm14
+  DB  102,69,15,235,240                   ; por           %xmm8,%xmm14
+  DB  69,15,91,198                        ; cvtdq2ps      %xmm14,%xmm8
+  DB  68,15,89,5,3,62,0,0                 ; mulps         0x3e03(%rip),%xmm8        # 4000 <_sk_callback_sse2+0xfb>
+  DB  68,15,88,5,11,62,0,0                ; addps         0x3e0b(%rip),%xmm8        # 4010 <_sk_callback_sse2+0x10b>
+  DB  243,68,15,16,72,8                   ; movss         0x8(%rax),%xmm9
+  DB  69,15,198,201,0                     ; shufps        $0x0,%xmm9,%xmm9
+  DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
+  DB  68,15,89,203                        ; mulps         %xmm3,%xmm9
+  DB  65,15,88,193                        ; addps         %xmm9,%xmm0
+  DB  65,15,88,201                        ; addps         %xmm9,%xmm1
+  DB  65,15,88,209                        ; addps         %xmm9,%xmm2
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  255,224                             ; jmpq          *%rax
+
 PUBLIC _sk_constant_color_sse2
 _sk_constant_color_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -15323,7 +15578,7 @@ _sk_clear_sse2 LABEL PROC
 PUBLIC _sk_srcatop_sse2
 _sk_srcatop_sse2 LABEL PROC
   DB  15,89,199                           ; mulps         %xmm7,%xmm0
-  DB  68,15,40,5,107,61,0,0               ; movaps        0x3d6b(%rip),%xmm8        # 3ef0 <_sk_callback_sse2+0xd2>
+  DB  68,15,40,5,180,61,0,0               ; movaps        0x3db4(%rip),%xmm8        # 4020 <_sk_callback_sse2+0x11b>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
@@ -15346,7 +15601,7 @@ PUBLIC _sk_dstatop_sse2
 _sk_dstatop_sse2 LABEL PROC
   DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
   DB  68,15,89,196                        ; mulps         %xmm4,%xmm8
-  DB  68,15,40,13,46,61,0,0               ; movaps        0x3d2e(%rip),%xmm9        # 3f00 <_sk_callback_sse2+0xe2>
+  DB  68,15,40,13,119,61,0,0              ; movaps        0x3d77(%rip),%xmm9        # 4030 <_sk_callback_sse2+0x12b>
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  65,15,88,192                        ; addps         %xmm8,%xmm0
@@ -15387,7 +15642,7 @@ _sk_dstin_sse2 LABEL PROC
 
 PUBLIC _sk_srcout_sse2
 _sk_srcout_sse2 LABEL PROC
-  DB  68,15,40,5,210,60,0,0               ; movaps        0x3cd2(%rip),%xmm8        # 3f10 <_sk_callback_sse2+0xf2>
+  DB  68,15,40,5,27,61,0,0                ; movaps        0x3d1b(%rip),%xmm8        # 4040 <_sk_callback_sse2+0x13b>
   DB  68,15,92,199                        ; subps         %xmm7,%xmm8
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
@@ -15398,7 +15653,7 @@ _sk_srcout_sse2 LABEL PROC
 
 PUBLIC _sk_dstout_sse2
 _sk_dstout_sse2 LABEL PROC
-  DB  68,15,40,5,194,60,0,0               ; movaps        0x3cc2(%rip),%xmm8        # 3f20 <_sk_callback_sse2+0x102>
+  DB  68,15,40,5,11,61,0,0                ; movaps        0x3d0b(%rip),%xmm8        # 4050 <_sk_callback_sse2+0x14b>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  15,89,196                           ; mulps         %xmm4,%xmm0
@@ -15413,7 +15668,7 @@ _sk_dstout_sse2 LABEL PROC
 
 PUBLIC _sk_srcover_sse2
 _sk_srcover_sse2 LABEL PROC
-  DB  68,15,40,5,165,60,0,0               ; movaps        0x3ca5(%rip),%xmm8        # 3f30 <_sk_callback_sse2+0x112>
+  DB  68,15,40,5,238,60,0,0               ; movaps        0x3cee(%rip),%xmm8        # 4060 <_sk_callback_sse2+0x15b>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
@@ -15431,7 +15686,7 @@ _sk_srcover_sse2 LABEL PROC
 
 PUBLIC _sk_dstover_sse2
 _sk_dstover_sse2 LABEL PROC
-  DB  68,15,40,5,121,60,0,0               ; movaps        0x3c79(%rip),%xmm8        # 3f40 <_sk_callback_sse2+0x122>
+  DB  68,15,40,5,194,60,0,0               ; movaps        0x3cc2(%rip),%xmm8        # 4070 <_sk_callback_sse2+0x16b>
   DB  68,15,92,199                        ; subps         %xmm7,%xmm8
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -15455,7 +15710,7 @@ _sk_modulate_sse2 LABEL PROC
 
 PUBLIC _sk_multiply_sse2
 _sk_multiply_sse2 LABEL PROC
-  DB  68,15,40,5,77,60,0,0                ; movaps        0x3c4d(%rip),%xmm8        # 3f50 <_sk_callback_sse2+0x132>
+  DB  68,15,40,5,150,60,0,0               ; movaps        0x3c96(%rip),%xmm8        # 4080 <_sk_callback_sse2+0x17b>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
@@ -15525,7 +15780,7 @@ _sk_screen_sse2 LABEL PROC
 PUBLIC _sk_xor__sse2
 _sk_xor__sse2 LABEL PROC
   DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
-  DB  15,40,29,126,59,0,0                 ; movaps        0x3b7e(%rip),%xmm3        # 3f60 <_sk_callback_sse2+0x142>
+  DB  15,40,29,199,59,0,0                 ; movaps        0x3bc7(%rip),%xmm3        # 4090 <_sk_callback_sse2+0x18b>
   DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
@@ -15571,7 +15826,7 @@ _sk_darken_sse2 LABEL PROC
   DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
   DB  65,15,95,209                        ; maxps         %xmm9,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,233,58,0,0                 ; movaps        0x3ae9(%rip),%xmm2        # 3f70 <_sk_callback_sse2+0x152>
+  DB  15,40,21,50,59,0,0                  ; movaps        0x3b32(%rip),%xmm2        # 40a0 <_sk_callback_sse2+0x19b>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -15603,7 +15858,7 @@ _sk_lighten_sse2 LABEL PROC
   DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
   DB  65,15,93,209                        ; minps         %xmm9,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,142,58,0,0                 ; movaps        0x3a8e(%rip),%xmm2        # 3f80 <_sk_callback_sse2+0x162>
+  DB  15,40,21,215,58,0,0                 ; movaps        0x3ad7(%rip),%xmm2        # 40b0 <_sk_callback_sse2+0x1ab>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -15638,7 +15893,7 @@ _sk_difference_sse2 LABEL PROC
   DB  65,15,93,209                        ; minps         %xmm9,%xmm2
   DB  15,88,210                           ; addps         %xmm2,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,40,58,0,0                  ; movaps        0x3a28(%rip),%xmm2        # 3f90 <_sk_callback_sse2+0x172>
+  DB  15,40,21,113,58,0,0                 ; movaps        0x3a71(%rip),%xmm2        # 40c0 <_sk_callback_sse2+0x1bb>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -15663,7 +15918,7 @@ _sk_exclusion_sse2 LABEL PROC
   DB  15,89,214                           ; mulps         %xmm6,%xmm2
   DB  15,88,210                           ; addps         %xmm2,%xmm2
   DB  68,15,92,202                        ; subps         %xmm2,%xmm9
-  DB  15,40,13,233,57,0,0                 ; movaps        0x39e9(%rip),%xmm1        # 3fa0 <_sk_callback_sse2+0x182>
+  DB  15,40,13,50,58,0,0                  ; movaps        0x3a32(%rip),%xmm1        # 40d0 <_sk_callback_sse2+0x1cb>
   DB  15,92,203                           ; subps         %xmm3,%xmm1
   DB  15,89,207                           ; mulps         %xmm7,%xmm1
   DB  15,88,217                           ; addps         %xmm1,%xmm3
@@ -15675,7 +15930,7 @@ _sk_exclusion_sse2 LABEL PROC
 PUBLIC _sk_colorburn_sse2
 _sk_colorburn_sse2 LABEL PROC
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
-  DB  68,15,40,21,216,57,0,0              ; movaps        0x39d8(%rip),%xmm10        # 3fb0 <_sk_callback_sse2+0x192>
+  DB  68,15,40,21,33,58,0,0               ; movaps        0x3a21(%rip),%xmm10        # 40e0 <_sk_callback_sse2+0x1db>
   DB  69,15,40,202                        ; movaps        %xmm10,%xmm9
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  69,15,40,217                        ; movaps        %xmm9,%xmm11
@@ -15767,7 +16022,7 @@ _sk_colorburn_sse2 LABEL PROC
 PUBLIC _sk_colordodge_sse2
 _sk_colordodge_sse2 LABEL PROC
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
-  DB  68,15,40,21,142,56,0,0              ; movaps        0x388e(%rip),%xmm10        # 3fc0 <_sk_callback_sse2+0x1a2>
+  DB  68,15,40,21,215,56,0,0              ; movaps        0x38d7(%rip),%xmm10        # 40f0 <_sk_callback_sse2+0x1eb>
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
   DB  68,15,92,223                        ; subps         %xmm7,%xmm11
   DB  69,15,40,227                        ; movaps        %xmm11,%xmm12
@@ -15860,7 +16115,7 @@ _sk_hardlight_sse2 LABEL PROC
   DB  15,41,52,36                         ; movaps        %xmm6,(%rsp)
   DB  15,40,245                           ; movaps        %xmm5,%xmm6
   DB  15,40,236                           ; movaps        %xmm4,%xmm5
-  DB  68,15,40,29,64,55,0,0               ; movaps        0x3740(%rip),%xmm11        # 3fd0 <_sk_callback_sse2+0x1b2>
+  DB  68,15,40,29,137,55,0,0              ; movaps        0x3789(%rip),%xmm11        # 4100 <_sk_callback_sse2+0x1fb>
   DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
   DB  68,15,92,215                        ; subps         %xmm7,%xmm10
   DB  69,15,40,194                        ; movaps        %xmm10,%xmm8
@@ -15947,7 +16202,7 @@ PUBLIC _sk_overlay_sse2
 _sk_overlay_sse2 LABEL PROC
   DB  68,15,40,193                        ; movaps        %xmm1,%xmm8
   DB  68,15,40,232                        ; movaps        %xmm0,%xmm13
-  DB  68,15,40,13,11,54,0,0               ; movaps        0x360b(%rip),%xmm9        # 3fe0 <_sk_callback_sse2+0x1c2>
+  DB  68,15,40,13,84,54,0,0               ; movaps        0x3654(%rip),%xmm9        # 4110 <_sk_callback_sse2+0x20b>
   DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
   DB  68,15,92,215                        ; subps         %xmm7,%xmm10
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
@@ -16037,7 +16292,7 @@ _sk_softlight_sse2 LABEL PROC
   DB  68,15,40,213                        ; movaps        %xmm5,%xmm10
   DB  68,15,94,215                        ; divps         %xmm7,%xmm10
   DB  69,15,84,212                        ; andps         %xmm12,%xmm10
-  DB  68,15,40,13,197,52,0,0              ; movaps        0x34c5(%rip),%xmm9        # 3ff0 <_sk_callback_sse2+0x1d2>
+  DB  68,15,40,13,14,53,0,0               ; movaps        0x350e(%rip),%xmm9        # 4120 <_sk_callback_sse2+0x21b>
   DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
   DB  69,15,92,250                        ; subps         %xmm10,%xmm15
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
@@ -16050,10 +16305,10 @@ _sk_softlight_sse2 LABEL PROC
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
   DB  65,15,88,194                        ; addps         %xmm10,%xmm0
-  DB  68,15,40,53,159,52,0,0              ; movaps        0x349f(%rip),%xmm14        # 4000 <_sk_callback_sse2+0x1e2>
+  DB  68,15,40,53,232,52,0,0              ; movaps        0x34e8(%rip),%xmm14        # 4130 <_sk_callback_sse2+0x22b>
   DB  69,15,88,222                        ; addps         %xmm14,%xmm11
   DB  68,15,89,216                        ; mulps         %xmm0,%xmm11
-  DB  68,15,40,21,159,52,0,0              ; movaps        0x349f(%rip),%xmm10        # 4010 <_sk_callback_sse2+0x1f2>
+  DB  68,15,40,21,232,52,0,0              ; movaps        0x34e8(%rip),%xmm10        # 4140 <_sk_callback_sse2+0x23b>
   DB  69,15,89,234                        ; mulps         %xmm10,%xmm13
   DB  69,15,88,235                        ; addps         %xmm11,%xmm13
   DB  15,88,228                           ; addps         %xmm4,%xmm4
@@ -16202,7 +16457,7 @@ _sk_clamp_0_sse2 LABEL PROC
 
 PUBLIC _sk_clamp_1_sse2
 _sk_clamp_1_sse2 LABEL PROC
-  DB  68,15,40,5,171,50,0,0               ; movaps        0x32ab(%rip),%xmm8        # 4020 <_sk_callback_sse2+0x202>
+  DB  68,15,40,5,244,50,0,0               ; movaps        0x32f4(%rip),%xmm8        # 4150 <_sk_callback_sse2+0x24b>
   DB  65,15,93,192                        ; minps         %xmm8,%xmm0
   DB  65,15,93,200                        ; minps         %xmm8,%xmm1
   DB  65,15,93,208                        ; minps         %xmm8,%xmm2
@@ -16212,7 +16467,7 @@ _sk_clamp_1_sse2 LABEL PROC
 
 PUBLIC _sk_clamp_a_sse2
 _sk_clamp_a_sse2 LABEL PROC
-  DB  15,93,29,160,50,0,0                 ; minps         0x32a0(%rip),%xmm3        # 4030 <_sk_callback_sse2+0x212>
+  DB  15,93,29,233,50,0,0                 ; minps         0x32e9(%rip),%xmm3        # 4160 <_sk_callback_sse2+0x25b>
   DB  15,93,195                           ; minps         %xmm3,%xmm0
   DB  15,93,203                           ; minps         %xmm3,%xmm1
   DB  15,93,211                           ; minps         %xmm3,%xmm2
@@ -16285,7 +16540,7 @@ _sk_premul_sse2 LABEL PROC
 PUBLIC _sk_unpremul_sse2
 _sk_unpremul_sse2 LABEL PROC
   DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
-  DB  68,15,40,13,11,50,0,0               ; movaps        0x320b(%rip),%xmm9        # 4040 <_sk_callback_sse2+0x222>
+  DB  68,15,40,13,84,50,0,0               ; movaps        0x3254(%rip),%xmm9        # 4170 <_sk_callback_sse2+0x26b>
   DB  68,15,94,203                        ; divps         %xmm3,%xmm9
   DB  68,15,194,195,4                     ; cmpneqps      %xmm3,%xmm8
   DB  69,15,84,193                        ; andps         %xmm9,%xmm8
@@ -16297,20 +16552,20 @@ _sk_unpremul_sse2 LABEL PROC
 
 PUBLIC _sk_from_srgb_sse2
 _sk_from_srgb_sse2 LABEL PROC
-  DB  68,15,40,5,246,49,0,0               ; movaps        0x31f6(%rip),%xmm8        # 4050 <_sk_callback_sse2+0x232>
+  DB  68,15,40,5,63,50,0,0                ; movaps        0x323f(%rip),%xmm8        # 4180 <_sk_callback_sse2+0x27b>
   DB  68,15,40,232                        ; movaps        %xmm0,%xmm13
   DB  69,15,89,232                        ; mulps         %xmm8,%xmm13
   DB  68,15,40,216                        ; movaps        %xmm0,%xmm11
   DB  69,15,89,219                        ; mulps         %xmm11,%xmm11
-  DB  68,15,40,13,238,49,0,0              ; movaps        0x31ee(%rip),%xmm9        # 4060 <_sk_callback_sse2+0x242>
+  DB  68,15,40,13,55,50,0,0               ; movaps        0x3237(%rip),%xmm9        # 4190 <_sk_callback_sse2+0x28b>
   DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
   DB  69,15,89,241                        ; mulps         %xmm9,%xmm14
-  DB  68,15,40,21,238,49,0,0              ; movaps        0x31ee(%rip),%xmm10        # 4070 <_sk_callback_sse2+0x252>
+  DB  68,15,40,21,55,50,0,0               ; movaps        0x3237(%rip),%xmm10        # 41a0 <_sk_callback_sse2+0x29b>
   DB  69,15,88,242                        ; addps         %xmm10,%xmm14
   DB  69,15,89,243                        ; mulps         %xmm11,%xmm14
-  DB  68,15,40,29,238,49,0,0              ; movaps        0x31ee(%rip),%xmm11        # 4080 <_sk_callback_sse2+0x262>
+  DB  68,15,40,29,55,50,0,0               ; movaps        0x3237(%rip),%xmm11        # 41b0 <_sk_callback_sse2+0x2ab>
   DB  69,15,88,243                        ; addps         %xmm11,%xmm14
-  DB  68,15,40,37,242,49,0,0              ; movaps        0x31f2(%rip),%xmm12        # 4090 <_sk_callback_sse2+0x272>
+  DB  68,15,40,37,59,50,0,0               ; movaps        0x323b(%rip),%xmm12        # 41c0 <_sk_callback_sse2+0x2bb>
   DB  65,15,194,196,1                     ; cmpltps       %xmm12,%xmm0
   DB  68,15,84,232                        ; andps         %xmm0,%xmm13
   DB  65,15,85,198                        ; andnps        %xmm14,%xmm0
@@ -16347,20 +16602,20 @@ _sk_to_srgb_sse2 LABEL PROC
   DB  68,15,82,192                        ; rsqrtps       %xmm0,%xmm8
   DB  69,15,83,200                        ; rcpps         %xmm8,%xmm9
   DB  69,15,82,232                        ; rsqrtps       %xmm8,%xmm13
-  DB  68,15,40,5,119,49,0,0               ; movaps        0x3177(%rip),%xmm8        # 40a0 <_sk_callback_sse2+0x282>
+  DB  68,15,40,5,192,49,0,0               ; movaps        0x31c0(%rip),%xmm8        # 41d0 <_sk_callback_sse2+0x2cb>
   DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
   DB  69,15,89,240                        ; mulps         %xmm8,%xmm14
-  DB  68,15,40,21,119,49,0,0              ; movaps        0x3177(%rip),%xmm10        # 40b0 <_sk_callback_sse2+0x292>
+  DB  68,15,40,21,192,49,0,0              ; movaps        0x31c0(%rip),%xmm10        # 41e0 <_sk_callback_sse2+0x2db>
   DB  69,15,89,202                        ; mulps         %xmm10,%xmm9
-  DB  68,15,40,29,123,49,0,0              ; movaps        0x317b(%rip),%xmm11        # 40c0 <_sk_callback_sse2+0x2a2>
+  DB  68,15,40,29,196,49,0,0              ; movaps        0x31c4(%rip),%xmm11        # 41f0 <_sk_callback_sse2+0x2eb>
   DB  69,15,88,203                        ; addps         %xmm11,%xmm9
-  DB  68,15,40,37,127,49,0,0              ; movaps        0x317f(%rip),%xmm12        # 40d0 <_sk_callback_sse2+0x2b2>
+  DB  68,15,40,37,200,49,0,0              ; movaps        0x31c8(%rip),%xmm12        # 4200 <_sk_callback_sse2+0x2fb>
   DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
   DB  69,15,88,233                        ; addps         %xmm9,%xmm13
-  DB  68,15,40,13,127,49,0,0              ; movaps        0x317f(%rip),%xmm9        # 40e0 <_sk_callback_sse2+0x2c2>
+  DB  68,15,40,13,200,49,0,0              ; movaps        0x31c8(%rip),%xmm9        # 4210 <_sk_callback_sse2+0x30b>
   DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
   DB  69,15,93,253                        ; minps         %xmm13,%xmm15
-  DB  68,15,40,45,127,49,0,0              ; movaps        0x317f(%rip),%xmm13        # 40f0 <_sk_callback_sse2+0x2d2>
+  DB  68,15,40,45,200,49,0,0              ; movaps        0x31c8(%rip),%xmm13        # 4220 <_sk_callback_sse2+0x31b>
   DB  65,15,194,197,1                     ; cmpltps       %xmm13,%xmm0
   DB  68,15,84,240                        ; andps         %xmm0,%xmm14
   DB  65,15,85,199                        ; andnps        %xmm15,%xmm0
@@ -16408,7 +16663,7 @@ _sk_rgb_to_hsl_sse2 LABEL PROC
   DB  68,15,93,218                        ; minps         %xmm2,%xmm11
   DB  65,15,40,202                        ; movaps        %xmm10,%xmm1
   DB  65,15,92,203                        ; subps         %xmm11,%xmm1
-  DB  68,15,40,45,216,48,0,0              ; movaps        0x30d8(%rip),%xmm13        # 4100 <_sk_callback_sse2+0x2e2>
+  DB  68,15,40,45,33,49,0,0               ; movaps        0x3121(%rip),%xmm13        # 4230 <_sk_callback_sse2+0x32b>
   DB  68,15,94,233                        ; divps         %xmm1,%xmm13
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  65,15,194,192,0                     ; cmpeqps       %xmm8,%xmm0
@@ -16417,30 +16672,30 @@ _sk_rgb_to_hsl_sse2 LABEL PROC
   DB  69,15,89,229                        ; mulps         %xmm13,%xmm12
   DB  69,15,40,241                        ; movaps        %xmm9,%xmm14
   DB  68,15,194,242,1                     ; cmpltps       %xmm2,%xmm14
-  DB  68,15,84,53,190,48,0,0              ; andps         0x30be(%rip),%xmm14        # 4110 <_sk_callback_sse2+0x2f2>
+  DB  68,15,84,53,7,49,0,0                ; andps         0x3107(%rip),%xmm14        # 4240 <_sk_callback_sse2+0x33b>
   DB  69,15,88,244                        ; addps         %xmm12,%xmm14
   DB  69,15,40,250                        ; movaps        %xmm10,%xmm15
   DB  69,15,194,249,0                     ; cmpeqps       %xmm9,%xmm15
   DB  65,15,92,208                        ; subps         %xmm8,%xmm2
   DB  65,15,89,213                        ; mulps         %xmm13,%xmm2
-  DB  68,15,40,37,177,48,0,0              ; movaps        0x30b1(%rip),%xmm12        # 4120 <_sk_callback_sse2+0x302>
+  DB  68,15,40,37,250,48,0,0              ; movaps        0x30fa(%rip),%xmm12        # 4250 <_sk_callback_sse2+0x34b>
   DB  65,15,88,212                        ; addps         %xmm12,%xmm2
   DB  69,15,92,193                        ; subps         %xmm9,%xmm8
   DB  69,15,89,197                        ; mulps         %xmm13,%xmm8
-  DB  68,15,88,5,173,48,0,0               ; addps         0x30ad(%rip),%xmm8        # 4130 <_sk_callback_sse2+0x312>
+  DB  68,15,88,5,246,48,0,0               ; addps         0x30f6(%rip),%xmm8        # 4260 <_sk_callback_sse2+0x35b>
   DB  65,15,84,215                        ; andps         %xmm15,%xmm2
   DB  69,15,85,248                        ; andnps        %xmm8,%xmm15
   DB  68,15,86,250                        ; orps          %xmm2,%xmm15
   DB  68,15,84,240                        ; andps         %xmm0,%xmm14
   DB  65,15,85,199                        ; andnps        %xmm15,%xmm0
   DB  65,15,86,198                        ; orps          %xmm14,%xmm0
-  DB  15,89,5,158,48,0,0                  ; mulps         0x309e(%rip),%xmm0        # 4140 <_sk_callback_sse2+0x322>
+  DB  15,89,5,231,48,0,0                  ; mulps         0x30e7(%rip),%xmm0        # 4270 <_sk_callback_sse2+0x36b>
   DB  69,15,40,194                        ; movaps        %xmm10,%xmm8
   DB  69,15,194,195,4                     ; cmpneqps      %xmm11,%xmm8
   DB  65,15,84,192                        ; andps         %xmm8,%xmm0
   DB  69,15,92,226                        ; subps         %xmm10,%xmm12
   DB  69,15,88,211                        ; addps         %xmm11,%xmm10
-  DB  68,15,40,13,145,48,0,0              ; movaps        0x3091(%rip),%xmm9        # 4150 <_sk_callback_sse2+0x332>
+  DB  68,15,40,13,218,48,0,0              ; movaps        0x30da(%rip),%xmm9        # 4280 <_sk_callback_sse2+0x37b>
   DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
   DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
   DB  68,15,194,202,1                     ; cmpltps       %xmm2,%xmm9
@@ -16463,7 +16718,7 @@ _sk_hsl_to_rgb_sse2 LABEL PROC
   DB  15,41,92,36,32                      ; movaps        %xmm3,0x20(%rsp)
   DB  68,15,40,218                        ; movaps        %xmm2,%xmm11
   DB  15,40,240                           ; movaps        %xmm0,%xmm6
-  DB  68,15,40,13,76,48,0,0               ; movaps        0x304c(%rip),%xmm9        # 4160 <_sk_callback_sse2+0x342>
+  DB  68,15,40,13,149,48,0,0              ; movaps        0x3095(%rip),%xmm9        # 4290 <_sk_callback_sse2+0x38b>
   DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
   DB  69,15,194,211,2                     ; cmpleps       %xmm11,%xmm10
   DB  15,40,193                           ; movaps        %xmm1,%xmm0
@@ -16480,28 +16735,28 @@ _sk_hsl_to_rgb_sse2 LABEL PROC
   DB  69,15,88,211                        ; addps         %xmm11,%xmm10
   DB  69,15,88,219                        ; addps         %xmm11,%xmm11
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
-  DB  15,40,5,22,48,0,0                   ; movaps        0x3016(%rip),%xmm0        # 4170 <_sk_callback_sse2+0x352>
+  DB  15,40,5,95,48,0,0                   ; movaps        0x305f(%rip),%xmm0        # 42a0 <_sk_callback_sse2+0x39b>
   DB  15,88,198                           ; addps         %xmm6,%xmm0
   DB  243,15,91,200                       ; cvttps2dq     %xmm0,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  15,40,216                           ; movaps        %xmm0,%xmm3
   DB  15,194,217,1                        ; cmpltps       %xmm1,%xmm3
-  DB  15,84,29,14,48,0,0                  ; andps         0x300e(%rip),%xmm3        # 4180 <_sk_callback_sse2+0x362>
+  DB  15,84,29,87,48,0,0                  ; andps         0x3057(%rip),%xmm3        # 42b0 <_sk_callback_sse2+0x3ab>
   DB  15,92,203                           ; subps         %xmm3,%xmm1
   DB  15,92,193                           ; subps         %xmm1,%xmm0
-  DB  68,15,40,45,16,48,0,0               ; movaps        0x3010(%rip),%xmm13        # 4190 <_sk_callback_sse2+0x372>
+  DB  68,15,40,45,89,48,0,0               ; movaps        0x3059(%rip),%xmm13        # 42c0 <_sk_callback_sse2+0x3bb>
   DB  69,15,40,197                        ; movaps        %xmm13,%xmm8
   DB  68,15,194,192,2                     ; cmpleps       %xmm0,%xmm8
   DB  69,15,40,242                        ; movaps        %xmm10,%xmm14
   DB  69,15,92,243                        ; subps         %xmm11,%xmm14
   DB  65,15,40,217                        ; movaps        %xmm9,%xmm3
   DB  15,194,216,2                        ; cmpleps       %xmm0,%xmm3
-  DB  15,40,21,32,48,0,0                  ; movaps        0x3020(%rip),%xmm2        # 41c0 <_sk_callback_sse2+0x3a2>
+  DB  15,40,21,105,48,0,0                 ; movaps        0x3069(%rip),%xmm2        # 42f0 <_sk_callback_sse2+0x3eb>
   DB  68,15,40,250                        ; movaps        %xmm2,%xmm15
   DB  68,15,194,248,2                     ; cmpleps       %xmm0,%xmm15
-  DB  15,40,13,240,47,0,0                 ; movaps        0x2ff0(%rip),%xmm1        # 41a0 <_sk_callback_sse2+0x382>
+  DB  15,40,13,57,48,0,0                  ; movaps        0x3039(%rip),%xmm1        # 42d0 <_sk_callback_sse2+0x3cb>
   DB  15,89,193                           ; mulps         %xmm1,%xmm0
-  DB  15,40,45,246,47,0,0                 ; movaps        0x2ff6(%rip),%xmm5        # 41b0 <_sk_callback_sse2+0x392>
+  DB  15,40,45,63,48,0,0                  ; movaps        0x303f(%rip),%xmm5        # 42e0 <_sk_callback_sse2+0x3db>
   DB  15,40,229                           ; movaps        %xmm5,%xmm4
   DB  15,92,224                           ; subps         %xmm0,%xmm4
   DB  65,15,89,230                        ; mulps         %xmm14,%xmm4
@@ -16524,7 +16779,7 @@ _sk_hsl_to_rgb_sse2 LABEL PROC
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
   DB  15,40,222                           ; movaps        %xmm6,%xmm3
   DB  15,194,216,1                        ; cmpltps       %xmm0,%xmm3
-  DB  15,84,29,107,47,0,0                 ; andps         0x2f6b(%rip),%xmm3        # 4180 <_sk_callback_sse2+0x362>
+  DB  15,84,29,180,47,0,0                 ; andps         0x2fb4(%rip),%xmm3        # 42b0 <_sk_callback_sse2+0x3ab>
   DB  15,92,195                           ; subps         %xmm3,%xmm0
   DB  68,15,40,230                        ; movaps        %xmm6,%xmm12
   DB  68,15,92,224                        ; subps         %xmm0,%xmm12
@@ -16554,12 +16809,12 @@ _sk_hsl_to_rgb_sse2 LABEL PROC
   DB  15,40,60,36                         ; movaps        (%rsp),%xmm7
   DB  15,40,231                           ; movaps        %xmm7,%xmm4
   DB  15,85,227                           ; andnps        %xmm3,%xmm4
-  DB  15,88,53,68,47,0,0                  ; addps         0x2f44(%rip),%xmm6        # 41d0 <_sk_callback_sse2+0x3b2>
+  DB  15,88,53,141,47,0,0                 ; addps         0x2f8d(%rip),%xmm6        # 4300 <_sk_callback_sse2+0x3fb>
   DB  243,15,91,198                       ; cvttps2dq     %xmm6,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
   DB  15,40,222                           ; movaps        %xmm6,%xmm3
   DB  15,194,216,1                        ; cmpltps       %xmm0,%xmm3
-  DB  15,84,29,223,46,0,0                 ; andps         0x2edf(%rip),%xmm3        # 4180 <_sk_callback_sse2+0x362>
+  DB  15,84,29,40,47,0,0                  ; andps         0x2f28(%rip),%xmm3        # 42b0 <_sk_callback_sse2+0x3ab>
   DB  15,92,195                           ; subps         %xmm3,%xmm0
   DB  15,92,240                           ; subps         %xmm0,%xmm6
   DB  15,89,206                           ; mulps         %xmm6,%xmm1
@@ -16620,7 +16875,7 @@ _sk_scale_u8_sse2 LABEL PROC
   DB  102,69,15,96,193                    ; punpcklbw     %xmm9,%xmm8
   DB  102,69,15,97,193                    ; punpcklwd     %xmm9,%xmm8
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,105,46,0,0               ; mulps         0x2e69(%rip),%xmm8        # 41e0 <_sk_callback_sse2+0x3c2>
+  DB  68,15,89,5,178,46,0,0               ; mulps         0x2eb2(%rip),%xmm8        # 4310 <_sk_callback_sse2+0x40b>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
@@ -16657,7 +16912,7 @@ _sk_lerp_u8_sse2 LABEL PROC
   DB  102,69,15,96,193                    ; punpcklbw     %xmm9,%xmm8
   DB  102,69,15,97,193                    ; punpcklwd     %xmm9,%xmm8
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,7,46,0,0                 ; mulps         0x2e07(%rip),%xmm8        # 41f0 <_sk_callback_sse2+0x3d2>
+  DB  68,15,89,5,80,46,0,0                ; mulps         0x2e50(%rip),%xmm8        # 4320 <_sk_callback_sse2+0x41b>
   DB  15,92,196                           ; subps         %xmm4,%xmm0
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -16680,17 +16935,17 @@ _sk_lerp_565_sse2 LABEL PROC
   DB  243,68,15,126,4,120                 ; movq          (%rax,%rdi,2),%xmm8
   DB  102,15,239,219                      ; pxor          %xmm3,%xmm3
   DB  102,68,15,97,195                    ; punpcklwd     %xmm3,%xmm8
-  DB  102,15,111,29,207,45,0,0            ; movdqa        0x2dcf(%rip),%xmm3        # 4200 <_sk_callback_sse2+0x3e2>
+  DB  102,15,111,29,24,46,0,0             ; movdqa        0x2e18(%rip),%xmm3        # 4330 <_sk_callback_sse2+0x42b>
   DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
   DB  68,15,91,203                        ; cvtdq2ps      %xmm3,%xmm9
-  DB  68,15,89,13,206,45,0,0              ; mulps         0x2dce(%rip),%xmm9        # 4210 <_sk_callback_sse2+0x3f2>
-  DB  102,15,111,29,214,45,0,0            ; movdqa        0x2dd6(%rip),%xmm3        # 4220 <_sk_callback_sse2+0x402>
+  DB  68,15,89,13,23,46,0,0               ; mulps         0x2e17(%rip),%xmm9        # 4340 <_sk_callback_sse2+0x43b>
+  DB  102,15,111,29,31,46,0,0             ; movdqa        0x2e1f(%rip),%xmm3        # 4350 <_sk_callback_sse2+0x44b>
   DB  102,65,15,219,216                   ; pand          %xmm8,%xmm3
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,215,45,0,0                 ; mulps         0x2dd7(%rip),%xmm3        # 4230 <_sk_callback_sse2+0x412>
-  DB  102,68,15,219,5,222,45,0,0          ; pand          0x2dde(%rip),%xmm8        # 4240 <_sk_callback_sse2+0x422>
+  DB  15,89,29,32,46,0,0                  ; mulps         0x2e20(%rip),%xmm3        # 4360 <_sk_callback_sse2+0x45b>
+  DB  102,68,15,219,5,39,46,0,0           ; pand          0x2e27(%rip),%xmm8        # 4370 <_sk_callback_sse2+0x46b>
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,226,45,0,0               ; mulps         0x2de2(%rip),%xmm8        # 4250 <_sk_callback_sse2+0x432>
+  DB  68,15,89,5,43,46,0,0                ; mulps         0x2e2b(%rip),%xmm8        # 4380 <_sk_callback_sse2+0x47b>
   DB  15,92,196                           ; subps         %xmm4,%xmm0
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -16701,7 +16956,7 @@ _sk_lerp_565_sse2 LABEL PROC
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  15,88,214                           ; addps         %xmm6,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,204,45,0,0                 ; movaps        0x2dcc(%rip),%xmm3        # 4260 <_sk_callback_sse2+0x442>
+  DB  15,40,29,21,46,0,0                  ; movaps        0x2e15(%rip),%xmm3        # 4390 <_sk_callback_sse2+0x48b>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_load_tables_sse2
@@ -16710,7 +16965,7 @@ _sk_load_tables_sse2 LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,139,72,8                         ; mov           0x8(%rax),%r9
   DB  243,69,15,111,12,184                ; movdqu        (%r8,%rdi,4),%xmm9
-  DB  102,68,15,111,5,194,45,0,0          ; movdqa        0x2dc2(%rip),%xmm8        # 4270 <_sk_callback_sse2+0x452>
+  DB  102,68,15,111,5,11,46,0,0           ; movdqa        0x2e0b(%rip),%xmm8        # 43a0 <_sk_callback_sse2+0x49b>
   DB  102,65,15,111,193                   ; movdqa        %xmm9,%xmm0
   DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
   DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
@@ -16765,7 +17020,7 @@ _sk_load_tables_sse2 LABEL PROC
   DB  65,15,20,208                        ; unpcklps      %xmm8,%xmm2
   DB  102,65,15,114,209,24                ; psrld         $0x18,%xmm9
   DB  65,15,91,217                        ; cvtdq2ps      %xmm9,%xmm3
-  DB  15,89,29,207,44,0,0                 ; mulps         0x2ccf(%rip),%xmm3        # 4280 <_sk_callback_sse2+0x462>
+  DB  15,89,29,24,45,0,0                  ; mulps         0x2d18(%rip),%xmm3        # 43b0 <_sk_callback_sse2+0x4ab>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -16782,7 +17037,7 @@ _sk_load_tables_u16_be_sse2 LABEL PROC
   DB  102,65,15,111,201                   ; movdqa        %xmm9,%xmm1
   DB  102,15,97,200                       ; punpcklwd     %xmm0,%xmm1
   DB  102,68,15,105,200                   ; punpckhwd     %xmm0,%xmm9
-  DB  102,68,15,111,21,162,44,0,0         ; movdqa        0x2ca2(%rip),%xmm10        # 4290 <_sk_callback_sse2+0x472>
+  DB  102,68,15,111,21,235,44,0,0         ; movdqa        0x2ceb(%rip),%xmm10        # 43c0 <_sk_callback_sse2+0x4bb>
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,65,15,219,194                   ; pand          %xmm10,%xmm0
   DB  102,69,15,239,192                   ; pxor          %xmm8,%xmm8
@@ -16843,7 +17098,7 @@ _sk_load_tables_u16_be_sse2 LABEL PROC
   DB  102,65,15,235,217                   ; por           %xmm9,%xmm3
   DB  102,65,15,97,216                    ; punpcklwd     %xmm8,%xmm3
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,145,43,0,0                 ; mulps         0x2b91(%rip),%xmm3        # 42a0 <_sk_callback_sse2+0x482>
+  DB  15,89,29,218,43,0,0                 ; mulps         0x2bda(%rip),%xmm3        # 43d0 <_sk_callback_sse2+0x4cb>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -16863,7 +17118,7 @@ _sk_load_tables_rgb_u16_be_sse2 LABEL PROC
   DB  102,68,15,97,208                    ; punpcklwd     %xmm0,%xmm10
   DB  102,65,15,111,195                   ; movdqa        %xmm11,%xmm0
   DB  102,65,15,97,194                    ; punpcklwd     %xmm10,%xmm0
-  DB  102,68,15,111,5,81,43,0,0           ; movdqa        0x2b51(%rip),%xmm8        # 42b0 <_sk_callback_sse2+0x492>
+  DB  102,68,15,111,5,154,43,0,0          ; movdqa        0x2b9a(%rip),%xmm8        # 43e0 <_sk_callback_sse2+0x4db>
   DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
   DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
   DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
@@ -16918,7 +17173,7 @@ _sk_load_tables_rgb_u16_be_sse2 LABEL PROC
   DB  15,20,211                           ; unpcklps      %xmm3,%xmm2
   DB  65,15,20,208                        ; unpcklps      %xmm8,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,96,42,0,0                  ; movaps        0x2a60(%rip),%xmm3        # 42c0 <_sk_callback_sse2+0x4a2>
+  DB  15,40,29,169,42,0,0                 ; movaps        0x2aa9(%rip),%xmm3        # 43f0 <_sk_callback_sse2+0x4eb>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_byte_tables_sse2
@@ -16926,7 +17181,7 @@ _sk_byte_tables_sse2 LABEL PROC
   DB  65,86                               ; push          %r14
   DB  83                                  ; push          %rbx
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,97,42,0,0                ; movaps        0x2a61(%rip),%xmm8        # 42d0 <_sk_callback_sse2+0x4b2>
+  DB  68,15,40,5,170,42,0,0               ; movaps        0x2aaa(%rip),%xmm8        # 4400 <_sk_callback_sse2+0x4fb>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,91,192                       ; cvtps2dq      %xmm0,%xmm0
   DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
@@ -16953,7 +17208,7 @@ _sk_byte_tables_sse2 LABEL PROC
   DB  102,65,15,96,193                    ; punpcklbw     %xmm9,%xmm0
   DB  102,65,15,97,193                    ; punpcklwd     %xmm9,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,21,254,41,0,0              ; movaps        0x29fe(%rip),%xmm10        # 42e0 <_sk_callback_sse2+0x4c2>
+  DB  68,15,40,21,71,42,0,0               ; movaps        0x2a47(%rip),%xmm10        # 4410 <_sk_callback_sse2+0x50b>
   DB  65,15,89,194                        ; mulps         %xmm10,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,91,201                       ; cvtps2dq      %xmm1,%xmm1
@@ -17067,7 +17322,7 @@ _sk_byte_tables_rgb_sse2 LABEL PROC
   DB  102,65,15,96,193                    ; punpcklbw     %xmm9,%xmm0
   DB  102,65,15,97,193                    ; punpcklwd     %xmm9,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,21,81,40,0,0               ; movaps        0x2851(%rip),%xmm10        # 42f0 <_sk_callback_sse2+0x4d2>
+  DB  68,15,40,21,154,40,0,0              ; movaps        0x289a(%rip),%xmm10        # 4420 <_sk_callback_sse2+0x51b>
   DB  65,15,89,194                        ; mulps         %xmm10,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,91,201                       ; cvtps2dq      %xmm1,%xmm1
@@ -17254,15 +17509,15 @@ _sk_parametric_r_sse2 LABEL PROC
   DB  69,15,88,209                        ; addps         %xmm9,%xmm10
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,91,202                        ; cvtdq2ps      %xmm10,%xmm9
-  DB  68,15,89,13,144,37,0,0              ; mulps         0x2590(%rip),%xmm9        # 4300 <_sk_callback_sse2+0x4e2>
-  DB  68,15,84,21,152,37,0,0              ; andps         0x2598(%rip),%xmm10        # 4310 <_sk_callback_sse2+0x4f2>
-  DB  68,15,86,21,160,37,0,0              ; orps          0x25a0(%rip),%xmm10        # 4320 <_sk_callback_sse2+0x502>
-  DB  68,15,88,13,168,37,0,0              ; addps         0x25a8(%rip),%xmm9        # 4330 <_sk_callback_sse2+0x512>
-  DB  68,15,40,37,176,37,0,0              ; movaps        0x25b0(%rip),%xmm12        # 4340 <_sk_callback_sse2+0x522>
+  DB  68,15,89,13,217,37,0,0              ; mulps         0x25d9(%rip),%xmm9        # 4430 <_sk_callback_sse2+0x52b>
+  DB  68,15,84,21,225,37,0,0              ; andps         0x25e1(%rip),%xmm10        # 4440 <_sk_callback_sse2+0x53b>
+  DB  68,15,86,21,233,37,0,0              ; orps          0x25e9(%rip),%xmm10        # 4450 <_sk_callback_sse2+0x54b>
+  DB  68,15,88,13,241,37,0,0              ; addps         0x25f1(%rip),%xmm9        # 4460 <_sk_callback_sse2+0x55b>
+  DB  68,15,40,37,249,37,0,0              ; movaps        0x25f9(%rip),%xmm12        # 4470 <_sk_callback_sse2+0x56b>
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,88,21,176,37,0,0              ; addps         0x25b0(%rip),%xmm10        # 4350 <_sk_callback_sse2+0x532>
-  DB  68,15,40,37,184,37,0,0              ; movaps        0x25b8(%rip),%xmm12        # 4360 <_sk_callback_sse2+0x542>
+  DB  68,15,88,21,249,37,0,0              ; addps         0x25f9(%rip),%xmm10        # 4480 <_sk_callback_sse2+0x57b>
+  DB  68,15,40,37,1,38,0,0                ; movaps        0x2601(%rip),%xmm12        # 4490 <_sk_callback_sse2+0x58b>
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
   DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
@@ -17270,22 +17525,22 @@ _sk_parametric_r_sse2 LABEL PROC
   DB  69,15,91,226                        ; cvtdq2ps      %xmm10,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,194,236,1                     ; cmpltps       %xmm12,%xmm13
-  DB  68,15,40,21,162,37,0,0              ; movaps        0x25a2(%rip),%xmm10        # 4370 <_sk_callback_sse2+0x552>
+  DB  68,15,40,21,235,37,0,0              ; movaps        0x25eb(%rip),%xmm10        # 44a0 <_sk_callback_sse2+0x59b>
   DB  69,15,84,234                        ; andps         %xmm10,%xmm13
   DB  69,15,87,219                        ; xorps         %xmm11,%xmm11
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,92,236                        ; subps         %xmm12,%xmm13
-  DB  68,15,88,13,150,37,0,0              ; addps         0x2596(%rip),%xmm9        # 4380 <_sk_callback_sse2+0x562>
-  DB  68,15,40,37,158,37,0,0              ; movaps        0x259e(%rip),%xmm12        # 4390 <_sk_callback_sse2+0x572>
+  DB  68,15,88,13,223,37,0,0              ; addps         0x25df(%rip),%xmm9        # 44b0 <_sk_callback_sse2+0x5ab>
+  DB  68,15,40,37,231,37,0,0              ; movaps        0x25e7(%rip),%xmm12        # 44c0 <_sk_callback_sse2+0x5bb>
   DB  69,15,89,229                        ; mulps         %xmm13,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,40,37,158,37,0,0              ; movaps        0x259e(%rip),%xmm12        # 43a0 <_sk_callback_sse2+0x582>
+  DB  68,15,40,37,231,37,0,0              ; movaps        0x25e7(%rip),%xmm12        # 44d0 <_sk_callback_sse2+0x5cb>
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
-  DB  68,15,40,45,162,37,0,0              ; movaps        0x25a2(%rip),%xmm13        # 43b0 <_sk_callback_sse2+0x592>
+  DB  68,15,40,45,235,37,0,0              ; movaps        0x25eb(%rip),%xmm13        # 44e0 <_sk_callback_sse2+0x5db>
   DB  69,15,94,236                        ; divps         %xmm12,%xmm13
   DB  69,15,88,233                        ; addps         %xmm9,%xmm13
-  DB  68,15,89,45,162,37,0,0              ; mulps         0x25a2(%rip),%xmm13        # 43c0 <_sk_callback_sse2+0x5a2>
+  DB  68,15,89,45,235,37,0,0              ; mulps         0x25eb(%rip),%xmm13        # 44f0 <_sk_callback_sse2+0x5eb>
   DB  102,69,15,91,205                    ; cvtps2dq      %xmm13,%xmm9
   DB  243,68,15,16,96,20                  ; movss         0x14(%rax),%xmm12
   DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
@@ -17319,15 +17574,15 @@ _sk_parametric_g_sse2 LABEL PROC
   DB  69,15,88,209                        ; addps         %xmm9,%xmm10
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,91,202                        ; cvtdq2ps      %xmm10,%xmm9
-  DB  68,15,89,13,34,37,0,0               ; mulps         0x2522(%rip),%xmm9        # 43d0 <_sk_callback_sse2+0x5b2>
-  DB  68,15,84,21,42,37,0,0               ; andps         0x252a(%rip),%xmm10        # 43e0 <_sk_callback_sse2+0x5c2>
-  DB  68,15,86,21,50,37,0,0               ; orps          0x2532(%rip),%xmm10        # 43f0 <_sk_callback_sse2+0x5d2>
-  DB  68,15,88,13,58,37,0,0               ; addps         0x253a(%rip),%xmm9        # 4400 <_sk_callback_sse2+0x5e2>
-  DB  68,15,40,37,66,37,0,0               ; movaps        0x2542(%rip),%xmm12        # 4410 <_sk_callback_sse2+0x5f2>
+  DB  68,15,89,13,107,37,0,0              ; mulps         0x256b(%rip),%xmm9        # 4500 <_sk_callback_sse2+0x5fb>
+  DB  68,15,84,21,115,37,0,0              ; andps         0x2573(%rip),%xmm10        # 4510 <_sk_callback_sse2+0x60b>
+  DB  68,15,86,21,123,37,0,0              ; orps          0x257b(%rip),%xmm10        # 4520 <_sk_callback_sse2+0x61b>
+  DB  68,15,88,13,131,37,0,0              ; addps         0x2583(%rip),%xmm9        # 4530 <_sk_callback_sse2+0x62b>
+  DB  68,15,40,37,139,37,0,0              ; movaps        0x258b(%rip),%xmm12        # 4540 <_sk_callback_sse2+0x63b>
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,88,21,66,37,0,0               ; addps         0x2542(%rip),%xmm10        # 4420 <_sk_callback_sse2+0x602>
-  DB  68,15,40,37,74,37,0,0               ; movaps        0x254a(%rip),%xmm12        # 4430 <_sk_callback_sse2+0x612>
+  DB  68,15,88,21,139,37,0,0              ; addps         0x258b(%rip),%xmm10        # 4550 <_sk_callback_sse2+0x64b>
+  DB  68,15,40,37,147,37,0,0              ; movaps        0x2593(%rip),%xmm12        # 4560 <_sk_callback_sse2+0x65b>
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
   DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
@@ -17335,22 +17590,22 @@ _sk_parametric_g_sse2 LABEL PROC
   DB  69,15,91,226                        ; cvtdq2ps      %xmm10,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,194,236,1                     ; cmpltps       %xmm12,%xmm13
-  DB  68,15,40,21,52,37,0,0               ; movaps        0x2534(%rip),%xmm10        # 4440 <_sk_callback_sse2+0x622>
+  DB  68,15,40,21,125,37,0,0              ; movaps        0x257d(%rip),%xmm10        # 4570 <_sk_callback_sse2+0x66b>
   DB  69,15,84,234                        ; andps         %xmm10,%xmm13
   DB  69,15,87,219                        ; xorps         %xmm11,%xmm11
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,92,236                        ; subps         %xmm12,%xmm13
-  DB  68,15,88,13,40,37,0,0               ; addps         0x2528(%rip),%xmm9        # 4450 <_sk_callback_sse2+0x632>
-  DB  68,15,40,37,48,37,0,0               ; movaps        0x2530(%rip),%xmm12        # 4460 <_sk_callback_sse2+0x642>
+  DB  68,15,88,13,113,37,0,0              ; addps         0x2571(%rip),%xmm9        # 4580 <_sk_callback_sse2+0x67b>
+  DB  68,15,40,37,121,37,0,0              ; movaps        0x2579(%rip),%xmm12        # 4590 <_sk_callback_sse2+0x68b>
   DB  69,15,89,229                        ; mulps         %xmm13,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,40,37,48,37,0,0               ; movaps        0x2530(%rip),%xmm12        # 4470 <_sk_callback_sse2+0x652>
+  DB  68,15,40,37,121,37,0,0              ; movaps        0x2579(%rip),%xmm12        # 45a0 <_sk_callback_sse2+0x69b>
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
-  DB  68,15,40,45,52,37,0,0               ; movaps        0x2534(%rip),%xmm13        # 4480 <_sk_callback_sse2+0x662>
+  DB  68,15,40,45,125,37,0,0              ; movaps        0x257d(%rip),%xmm13        # 45b0 <_sk_callback_sse2+0x6ab>
   DB  69,15,94,236                        ; divps         %xmm12,%xmm13
   DB  69,15,88,233                        ; addps         %xmm9,%xmm13
-  DB  68,15,89,45,52,37,0,0               ; mulps         0x2534(%rip),%xmm13        # 4490 <_sk_callback_sse2+0x672>
+  DB  68,15,89,45,125,37,0,0              ; mulps         0x257d(%rip),%xmm13        # 45c0 <_sk_callback_sse2+0x6bb>
   DB  102,69,15,91,205                    ; cvtps2dq      %xmm13,%xmm9
   DB  243,68,15,16,96,20                  ; movss         0x14(%rax),%xmm12
   DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
@@ -17384,15 +17639,15 @@ _sk_parametric_b_sse2 LABEL PROC
   DB  69,15,88,209                        ; addps         %xmm9,%xmm10
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,91,202                        ; cvtdq2ps      %xmm10,%xmm9
-  DB  68,15,89,13,180,36,0,0              ; mulps         0x24b4(%rip),%xmm9        # 44a0 <_sk_callback_sse2+0x682>
-  DB  68,15,84,21,188,36,0,0              ; andps         0x24bc(%rip),%xmm10        # 44b0 <_sk_callback_sse2+0x692>
-  DB  68,15,86,21,196,36,0,0              ; orps          0x24c4(%rip),%xmm10        # 44c0 <_sk_callback_sse2+0x6a2>
-  DB  68,15,88,13,204,36,0,0              ; addps         0x24cc(%rip),%xmm9        # 44d0 <_sk_callback_sse2+0x6b2>
-  DB  68,15,40,37,212,36,0,0              ; movaps        0x24d4(%rip),%xmm12        # 44e0 <_sk_callback_sse2+0x6c2>
+  DB  68,15,89,13,253,36,0,0              ; mulps         0x24fd(%rip),%xmm9        # 45d0 <_sk_callback_sse2+0x6cb>
+  DB  68,15,84,21,5,37,0,0                ; andps         0x2505(%rip),%xmm10        # 45e0 <_sk_callback_sse2+0x6db>
+  DB  68,15,86,21,13,37,0,0               ; orps          0x250d(%rip),%xmm10        # 45f0 <_sk_callback_sse2+0x6eb>
+  DB  68,15,88,13,21,37,0,0               ; addps         0x2515(%rip),%xmm9        # 4600 <_sk_callback_sse2+0x6fb>
+  DB  68,15,40,37,29,37,0,0               ; movaps        0x251d(%rip),%xmm12        # 4610 <_sk_callback_sse2+0x70b>
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,88,21,212,36,0,0              ; addps         0x24d4(%rip),%xmm10        # 44f0 <_sk_callback_sse2+0x6d2>
-  DB  68,15,40,37,220,36,0,0              ; movaps        0x24dc(%rip),%xmm12        # 4500 <_sk_callback_sse2+0x6e2>
+  DB  68,15,88,21,29,37,0,0               ; addps         0x251d(%rip),%xmm10        # 4620 <_sk_callback_sse2+0x71b>
+  DB  68,15,40,37,37,37,0,0               ; movaps        0x2525(%rip),%xmm12        # 4630 <_sk_callback_sse2+0x72b>
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
   DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
@@ -17400,22 +17655,22 @@ _sk_parametric_b_sse2 LABEL PROC
   DB  69,15,91,226                        ; cvtdq2ps      %xmm10,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,194,236,1                     ; cmpltps       %xmm12,%xmm13
-  DB  68,15,40,21,198,36,0,0              ; movaps        0x24c6(%rip),%xmm10        # 4510 <_sk_callback_sse2+0x6f2>
+  DB  68,15,40,21,15,37,0,0               ; movaps        0x250f(%rip),%xmm10        # 4640 <_sk_callback_sse2+0x73b>
   DB  69,15,84,234                        ; andps         %xmm10,%xmm13
   DB  69,15,87,219                        ; xorps         %xmm11,%xmm11
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,92,236                        ; subps         %xmm12,%xmm13
-  DB  68,15,88,13,186,36,0,0              ; addps         0x24ba(%rip),%xmm9        # 4520 <_sk_callback_sse2+0x702>
-  DB  68,15,40,37,194,36,0,0              ; movaps        0x24c2(%rip),%xmm12        # 4530 <_sk_callback_sse2+0x712>
+  DB  68,15,88,13,3,37,0,0                ; addps         0x2503(%rip),%xmm9        # 4650 <_sk_callback_sse2+0x74b>
+  DB  68,15,40,37,11,37,0,0               ; movaps        0x250b(%rip),%xmm12        # 4660 <_sk_callback_sse2+0x75b>
   DB  69,15,89,229                        ; mulps         %xmm13,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,40,37,194,36,0,0              ; movaps        0x24c2(%rip),%xmm12        # 4540 <_sk_callback_sse2+0x722>
+  DB  68,15,40,37,11,37,0,0               ; movaps        0x250b(%rip),%xmm12        # 4670 <_sk_callback_sse2+0x76b>
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
-  DB  68,15,40,45,198,36,0,0              ; movaps        0x24c6(%rip),%xmm13        # 4550 <_sk_callback_sse2+0x732>
+  DB  68,15,40,45,15,37,0,0               ; movaps        0x250f(%rip),%xmm13        # 4680 <_sk_callback_sse2+0x77b>
   DB  69,15,94,236                        ; divps         %xmm12,%xmm13
   DB  69,15,88,233                        ; addps         %xmm9,%xmm13
-  DB  68,15,89,45,198,36,0,0              ; mulps         0x24c6(%rip),%xmm13        # 4560 <_sk_callback_sse2+0x742>
+  DB  68,15,89,45,15,37,0,0               ; mulps         0x250f(%rip),%xmm13        # 4690 <_sk_callback_sse2+0x78b>
   DB  102,69,15,91,205                    ; cvtps2dq      %xmm13,%xmm9
   DB  243,68,15,16,96,20                  ; movss         0x14(%rax),%xmm12
   DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
@@ -17449,15 +17704,15 @@ _sk_parametric_a_sse2 LABEL PROC
   DB  69,15,88,209                        ; addps         %xmm9,%xmm10
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,91,202                        ; cvtdq2ps      %xmm10,%xmm9
-  DB  68,15,89,13,70,36,0,0               ; mulps         0x2446(%rip),%xmm9        # 4570 <_sk_callback_sse2+0x752>
-  DB  68,15,84,21,78,36,0,0               ; andps         0x244e(%rip),%xmm10        # 4580 <_sk_callback_sse2+0x762>
-  DB  68,15,86,21,86,36,0,0               ; orps          0x2456(%rip),%xmm10        # 4590 <_sk_callback_sse2+0x772>
-  DB  68,15,88,13,94,36,0,0               ; addps         0x245e(%rip),%xmm9        # 45a0 <_sk_callback_sse2+0x782>
-  DB  68,15,40,37,102,36,0,0              ; movaps        0x2466(%rip),%xmm12        # 45b0 <_sk_callback_sse2+0x792>
+  DB  68,15,89,13,143,36,0,0              ; mulps         0x248f(%rip),%xmm9        # 46a0 <_sk_callback_sse2+0x79b>
+  DB  68,15,84,21,151,36,0,0              ; andps         0x2497(%rip),%xmm10        # 46b0 <_sk_callback_sse2+0x7ab>
+  DB  68,15,86,21,159,36,0,0              ; orps          0x249f(%rip),%xmm10        # 46c0 <_sk_callback_sse2+0x7bb>
+  DB  68,15,88,13,167,36,0,0              ; addps         0x24a7(%rip),%xmm9        # 46d0 <_sk_callback_sse2+0x7cb>
+  DB  68,15,40,37,175,36,0,0              ; movaps        0x24af(%rip),%xmm12        # 46e0 <_sk_callback_sse2+0x7db>
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,88,21,102,36,0,0              ; addps         0x2466(%rip),%xmm10        # 45c0 <_sk_callback_sse2+0x7a2>
-  DB  68,15,40,37,110,36,0,0              ; movaps        0x246e(%rip),%xmm12        # 45d0 <_sk_callback_sse2+0x7b2>
+  DB  68,15,88,21,175,36,0,0              ; addps         0x24af(%rip),%xmm10        # 46f0 <_sk_callback_sse2+0x7eb>
+  DB  68,15,40,37,183,36,0,0              ; movaps        0x24b7(%rip),%xmm12        # 4700 <_sk_callback_sse2+0x7fb>
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
   DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
@@ -17465,22 +17720,22 @@ _sk_parametric_a_sse2 LABEL PROC
   DB  69,15,91,226                        ; cvtdq2ps      %xmm10,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,194,236,1                     ; cmpltps       %xmm12,%xmm13
-  DB  68,15,40,21,88,36,0,0               ; movaps        0x2458(%rip),%xmm10        # 45e0 <_sk_callback_sse2+0x7c2>
+  DB  68,15,40,21,161,36,0,0              ; movaps        0x24a1(%rip),%xmm10        # 4710 <_sk_callback_sse2+0x80b>
   DB  69,15,84,234                        ; andps         %xmm10,%xmm13
   DB  69,15,87,219                        ; xorps         %xmm11,%xmm11
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,92,236                        ; subps         %xmm12,%xmm13
-  DB  68,15,88,13,76,36,0,0               ; addps         0x244c(%rip),%xmm9        # 45f0 <_sk_callback_sse2+0x7d2>
-  DB  68,15,40,37,84,36,0,0               ; movaps        0x2454(%rip),%xmm12        # 4600 <_sk_callback_sse2+0x7e2>
+  DB  68,15,88,13,149,36,0,0              ; addps         0x2495(%rip),%xmm9        # 4720 <_sk_callback_sse2+0x81b>
+  DB  68,15,40,37,157,36,0,0              ; movaps        0x249d(%rip),%xmm12        # 4730 <_sk_callback_sse2+0x82b>
   DB  69,15,89,229                        ; mulps         %xmm13,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,40,37,84,36,0,0               ; movaps        0x2454(%rip),%xmm12        # 4610 <_sk_callback_sse2+0x7f2>
+  DB  68,15,40,37,157,36,0,0              ; movaps        0x249d(%rip),%xmm12        # 4740 <_sk_callback_sse2+0x83b>
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
-  DB  68,15,40,45,88,36,0,0               ; movaps        0x2458(%rip),%xmm13        # 4620 <_sk_callback_sse2+0x802>
+  DB  68,15,40,45,161,36,0,0              ; movaps        0x24a1(%rip),%xmm13        # 4750 <_sk_callback_sse2+0x84b>
   DB  69,15,94,236                        ; divps         %xmm12,%xmm13
   DB  69,15,88,233                        ; addps         %xmm9,%xmm13
-  DB  68,15,89,45,88,36,0,0               ; mulps         0x2458(%rip),%xmm13        # 4630 <_sk_callback_sse2+0x812>
+  DB  68,15,89,45,161,36,0,0              ; mulps         0x24a1(%rip),%xmm13        # 4760 <_sk_callback_sse2+0x85b>
   DB  102,69,15,91,205                    ; cvtps2dq      %xmm13,%xmm9
   DB  243,68,15,16,96,20                  ; movss         0x14(%rax),%xmm12
   DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
@@ -17495,29 +17750,29 @@ _sk_parametric_a_sse2 LABEL PROC
 
 PUBLIC _sk_lab_to_xyz_sse2
 _sk_lab_to_xyz_sse2 LABEL PROC
-  DB  15,89,5,53,36,0,0                   ; mulps         0x2435(%rip),%xmm0        # 4640 <_sk_callback_sse2+0x822>
-  DB  68,15,40,5,61,36,0,0                ; movaps        0x243d(%rip),%xmm8        # 4650 <_sk_callback_sse2+0x832>
+  DB  15,89,5,126,36,0,0                  ; mulps         0x247e(%rip),%xmm0        # 4770 <_sk_callback_sse2+0x86b>
+  DB  68,15,40,5,134,36,0,0               ; movaps        0x2486(%rip),%xmm8        # 4780 <_sk_callback_sse2+0x87b>
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
-  DB  68,15,40,13,65,36,0,0               ; movaps        0x2441(%rip),%xmm9        # 4660 <_sk_callback_sse2+0x842>
+  DB  68,15,40,13,138,36,0,0              ; movaps        0x248a(%rip),%xmm9        # 4790 <_sk_callback_sse2+0x88b>
   DB  65,15,88,201                        ; addps         %xmm9,%xmm1
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  65,15,88,209                        ; addps         %xmm9,%xmm2
-  DB  15,88,5,62,36,0,0                   ; addps         0x243e(%rip),%xmm0        # 4670 <_sk_callback_sse2+0x852>
-  DB  15,89,5,71,36,0,0                   ; mulps         0x2447(%rip),%xmm0        # 4680 <_sk_callback_sse2+0x862>
-  DB  15,89,13,80,36,0,0                  ; mulps         0x2450(%rip),%xmm1        # 4690 <_sk_callback_sse2+0x872>
+  DB  15,88,5,135,36,0,0                  ; addps         0x2487(%rip),%xmm0        # 47a0 <_sk_callback_sse2+0x89b>
+  DB  15,89,5,144,36,0,0                  ; mulps         0x2490(%rip),%xmm0        # 47b0 <_sk_callback_sse2+0x8ab>
+  DB  15,89,13,153,36,0,0                 ; mulps         0x2499(%rip),%xmm1        # 47c0 <_sk_callback_sse2+0x8bb>
   DB  15,88,200                           ; addps         %xmm0,%xmm1
-  DB  15,89,21,86,36,0,0                  ; mulps         0x2456(%rip),%xmm2        # 46a0 <_sk_callback_sse2+0x882>
+  DB  15,89,21,159,36,0,0                 ; mulps         0x249f(%rip),%xmm2        # 47d0 <_sk_callback_sse2+0x8cb>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  68,15,92,202                        ; subps         %xmm2,%xmm9
   DB  68,15,40,225                        ; movaps        %xmm1,%xmm12
   DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
   DB  68,15,89,225                        ; mulps         %xmm1,%xmm12
-  DB  15,40,21,75,36,0,0                  ; movaps        0x244b(%rip),%xmm2        # 46b0 <_sk_callback_sse2+0x892>
+  DB  15,40,21,148,36,0,0                 ; movaps        0x2494(%rip),%xmm2        # 47e0 <_sk_callback_sse2+0x8db>
   DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
   DB  69,15,194,196,1                     ; cmpltps       %xmm12,%xmm8
-  DB  68,15,40,21,74,36,0,0               ; movaps        0x244a(%rip),%xmm10        # 46c0 <_sk_callback_sse2+0x8a2>
+  DB  68,15,40,21,147,36,0,0              ; movaps        0x2493(%rip),%xmm10        # 47f0 <_sk_callback_sse2+0x8eb>
   DB  65,15,88,202                        ; addps         %xmm10,%xmm1
-  DB  68,15,40,29,78,36,0,0               ; movaps        0x244e(%rip),%xmm11        # 46d0 <_sk_callback_sse2+0x8b2>
+  DB  68,15,40,29,151,36,0,0              ; movaps        0x2497(%rip),%xmm11        # 4800 <_sk_callback_sse2+0x8fb>
   DB  65,15,89,203                        ; mulps         %xmm11,%xmm1
   DB  69,15,84,224                        ; andps         %xmm8,%xmm12
   DB  68,15,85,193                        ; andnps        %xmm1,%xmm8
@@ -17541,8 +17796,8 @@ _sk_lab_to_xyz_sse2 LABEL PROC
   DB  15,84,194                           ; andps         %xmm2,%xmm0
   DB  65,15,85,209                        ; andnps        %xmm9,%xmm2
   DB  15,86,208                           ; orps          %xmm0,%xmm2
-  DB  68,15,89,5,254,35,0,0               ; mulps         0x23fe(%rip),%xmm8        # 46e0 <_sk_callback_sse2+0x8c2>
-  DB  15,89,21,7,36,0,0                   ; mulps         0x2407(%rip),%xmm2        # 46f0 <_sk_callback_sse2+0x8d2>
+  DB  68,15,89,5,71,36,0,0                ; mulps         0x2447(%rip),%xmm8        # 4810 <_sk_callback_sse2+0x90b>
+  DB  15,89,21,80,36,0,0                  ; mulps         0x2450(%rip),%xmm2        # 4820 <_sk_callback_sse2+0x91b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  255,224                             ; jmpq          *%rax
@@ -17556,7 +17811,7 @@ _sk_load_a8_sse2 LABEL PROC
   DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
   DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
   DB  15,91,216                           ; cvtdq2ps      %xmm0,%xmm3
-  DB  15,89,29,239,35,0,0                 ; mulps         0x23ef(%rip),%xmm3        # 4700 <_sk_callback_sse2+0x8e2>
+  DB  15,89,29,56,36,0,0                  ; mulps         0x2438(%rip),%xmm3        # 4830 <_sk_callback_sse2+0x92b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
@@ -17599,7 +17854,7 @@ _sk_gather_a8_sse2 LABEL PROC
   DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
   DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
   DB  15,91,216                           ; cvtdq2ps      %xmm0,%xmm3
-  DB  15,89,29,94,35,0,0                  ; mulps         0x235e(%rip),%xmm3        # 4710 <_sk_callback_sse2+0x8f2>
+  DB  15,89,29,167,35,0,0                 ; mulps         0x23a7(%rip),%xmm3        # 4840 <_sk_callback_sse2+0x93b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
@@ -17610,7 +17865,7 @@ PUBLIC _sk_store_a8_sse2
 _sk_store_a8_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,82,35,0,0                ; movaps        0x2352(%rip),%xmm8        # 4720 <_sk_callback_sse2+0x902>
+  DB  68,15,40,5,155,35,0,0               ; movaps        0x239b(%rip),%xmm8        # 4850 <_sk_callback_sse2+0x94b>
   DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
   DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
   DB  102,65,15,114,240,16                ; pslld         $0x10,%xmm8
@@ -17630,9 +17885,9 @@ _sk_load_g8_sse2 LABEL PROC
   DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
   DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,25,35,0,0                   ; mulps         0x2319(%rip),%xmm0        # 4730 <_sk_callback_sse2+0x912>
+  DB  15,89,5,98,35,0,0                   ; mulps         0x2362(%rip),%xmm0        # 4860 <_sk_callback_sse2+0x95b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,32,35,0,0                  ; movaps        0x2320(%rip),%xmm3        # 4740 <_sk_callback_sse2+0x922>
+  DB  15,40,29,105,35,0,0                 ; movaps        0x2369(%rip),%xmm3        # 4870 <_sk_callback_sse2+0x96b>
   DB  15,40,200                           ; movaps        %xmm0,%xmm1
   DB  15,40,208                           ; movaps        %xmm0,%xmm2
   DB  255,224                             ; jmpq          *%rax
@@ -17673,9 +17928,9 @@ _sk_gather_g8_sse2 LABEL PROC
   DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
   DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,149,34,0,0                  ; mulps         0x2295(%rip),%xmm0        # 4750 <_sk_callback_sse2+0x932>
+  DB  15,89,5,222,34,0,0                  ; mulps         0x22de(%rip),%xmm0        # 4880 <_sk_callback_sse2+0x97b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,156,34,0,0                 ; movaps        0x229c(%rip),%xmm3        # 4760 <_sk_callback_sse2+0x942>
+  DB  15,40,29,229,34,0,0                 ; movaps        0x22e5(%rip),%xmm3        # 4890 <_sk_callback_sse2+0x98b>
   DB  15,40,200                           ; movaps        %xmm0,%xmm1
   DB  15,40,208                           ; movaps        %xmm0,%xmm2
   DB  255,224                             ; jmpq          *%rax
@@ -17685,9 +17940,9 @@ _sk_gather_i8_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  73,137,192                          ; mov           %rax,%r8
   DB  77,133,192                          ; test          %r8,%r8
-  DB  116,5                               ; je            24db <_sk_gather_i8_sse2+0xf>
+  DB  116,5                               ; je            25c2 <_sk_gather_i8_sse2+0xf>
   DB  76,137,192                          ; mov           %r8,%rax
-  DB  235,2                               ; jmp           24dd <_sk_gather_i8_sse2+0x11>
+  DB  235,2                               ; jmp           25c4 <_sk_gather_i8_sse2+0x11>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  243,15,91,201                       ; cvttps2dq     %xmm1,%xmm1
@@ -17736,11 +17991,11 @@ _sk_gather_i8_sse2 LABEL PROC
   DB  102,67,15,110,12,136                ; movd          (%r8,%r9,4),%xmm1
   DB  102,68,15,98,201                    ; punpckldq     %xmm1,%xmm9
   DB  102,68,15,98,200                    ; punpckldq     %xmm0,%xmm9
-  DB  102,15,111,21,187,33,0,0            ; movdqa        0x21bb(%rip),%xmm2        # 4770 <_sk_callback_sse2+0x952>
+  DB  102,15,111,21,4,34,0,0              ; movdqa        0x2204(%rip),%xmm2        # 48a0 <_sk_callback_sse2+0x99b>
   DB  102,65,15,111,193                   ; movdqa        %xmm9,%xmm0
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,183,33,0,0               ; movaps        0x21b7(%rip),%xmm8        # 4780 <_sk_callback_sse2+0x962>
+  DB  68,15,40,5,0,34,0,0                 ; movaps        0x2200(%rip),%xmm8        # 48b0 <_sk_callback_sse2+0x9ab>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,65,15,111,201                   ; movdqa        %xmm9,%xmm1
   DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
@@ -17765,19 +18020,19 @@ _sk_load_565_sse2 LABEL PROC
   DB  243,15,126,20,120                   ; movq          (%rax,%rdi,2),%xmm2
   DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
   DB  102,15,97,208                       ; punpcklwd     %xmm0,%xmm2
-  DB  102,15,111,5,109,33,0,0             ; movdqa        0x216d(%rip),%xmm0        # 4790 <_sk_callback_sse2+0x972>
+  DB  102,15,111,5,182,33,0,0             ; movdqa        0x21b6(%rip),%xmm0        # 48c0 <_sk_callback_sse2+0x9bb>
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,111,33,0,0                  ; mulps         0x216f(%rip),%xmm0        # 47a0 <_sk_callback_sse2+0x982>
-  DB  102,15,111,13,119,33,0,0            ; movdqa        0x2177(%rip),%xmm1        # 47b0 <_sk_callback_sse2+0x992>
+  DB  15,89,5,184,33,0,0                  ; mulps         0x21b8(%rip),%xmm0        # 48d0 <_sk_callback_sse2+0x9cb>
+  DB  102,15,111,13,192,33,0,0            ; movdqa        0x21c0(%rip),%xmm1        # 48e0 <_sk_callback_sse2+0x9db>
   DB  102,15,219,202                      ; pand          %xmm2,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,121,33,0,0                 ; mulps         0x2179(%rip),%xmm1        # 47c0 <_sk_callback_sse2+0x9a2>
-  DB  102,15,219,21,129,33,0,0            ; pand          0x2181(%rip),%xmm2        # 47d0 <_sk_callback_sse2+0x9b2>
+  DB  15,89,13,194,33,0,0                 ; mulps         0x21c2(%rip),%xmm1        # 48f0 <_sk_callback_sse2+0x9eb>
+  DB  102,15,219,21,202,33,0,0            ; pand          0x21ca(%rip),%xmm2        # 4900 <_sk_callback_sse2+0x9fb>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,135,33,0,0                 ; mulps         0x2187(%rip),%xmm2        # 47e0 <_sk_callback_sse2+0x9c2>
+  DB  15,89,21,208,33,0,0                 ; mulps         0x21d0(%rip),%xmm2        # 4910 <_sk_callback_sse2+0xa0b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,142,33,0,0                 ; movaps        0x218e(%rip),%xmm3        # 47f0 <_sk_callback_sse2+0x9d2>
+  DB  15,40,29,215,33,0,0                 ; movaps        0x21d7(%rip),%xmm3        # 4920 <_sk_callback_sse2+0xa1b>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_gather_565_sse2
@@ -17810,31 +18065,31 @@ _sk_gather_565_sse2 LABEL PROC
   DB  102,15,196,208,3                    ; pinsrw        $0x3,%eax,%xmm2
   DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
   DB  102,15,97,208                       ; punpcklwd     %xmm0,%xmm2
-  DB  102,15,111,5,23,33,0,0              ; movdqa        0x2117(%rip),%xmm0        # 4800 <_sk_callback_sse2+0x9e2>
+  DB  102,15,111,5,96,33,0,0              ; movdqa        0x2160(%rip),%xmm0        # 4930 <_sk_callback_sse2+0xa2b>
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,25,33,0,0                   ; mulps         0x2119(%rip),%xmm0        # 4810 <_sk_callback_sse2+0x9f2>
-  DB  102,15,111,13,33,33,0,0             ; movdqa        0x2121(%rip),%xmm1        # 4820 <_sk_callback_sse2+0xa02>
+  DB  15,89,5,98,33,0,0                   ; mulps         0x2162(%rip),%xmm0        # 4940 <_sk_callback_sse2+0xa3b>
+  DB  102,15,111,13,106,33,0,0            ; movdqa        0x216a(%rip),%xmm1        # 4950 <_sk_callback_sse2+0xa4b>
   DB  102,15,219,202                      ; pand          %xmm2,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,35,33,0,0                  ; mulps         0x2123(%rip),%xmm1        # 4830 <_sk_callback_sse2+0xa12>
-  DB  102,15,219,21,43,33,0,0             ; pand          0x212b(%rip),%xmm2        # 4840 <_sk_callback_sse2+0xa22>
+  DB  15,89,13,108,33,0,0                 ; mulps         0x216c(%rip),%xmm1        # 4960 <_sk_callback_sse2+0xa5b>
+  DB  102,15,219,21,116,33,0,0            ; pand          0x2174(%rip),%xmm2        # 4970 <_sk_callback_sse2+0xa6b>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,49,33,0,0                  ; mulps         0x2131(%rip),%xmm2        # 4850 <_sk_callback_sse2+0xa32>
+  DB  15,89,21,122,33,0,0                 ; mulps         0x217a(%rip),%xmm2        # 4980 <_sk_callback_sse2+0xa7b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,56,33,0,0                  ; movaps        0x2138(%rip),%xmm3        # 4860 <_sk_callback_sse2+0xa42>
+  DB  15,40,29,129,33,0,0                 ; movaps        0x2181(%rip),%xmm3        # 4990 <_sk_callback_sse2+0xa8b>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_store_565_sse2
 _sk_store_565_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,57,33,0,0                ; movaps        0x2139(%rip),%xmm8        # 4870 <_sk_callback_sse2+0xa52>
+  DB  68,15,40,5,130,33,0,0               ; movaps        0x2182(%rip),%xmm8        # 49a0 <_sk_callback_sse2+0xa9b>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
   DB  102,65,15,114,241,11                ; pslld         $0xb,%xmm9
-  DB  68,15,40,21,46,33,0,0               ; movaps        0x212e(%rip),%xmm10        # 4880 <_sk_callback_sse2+0xa62>
+  DB  68,15,40,21,119,33,0,0              ; movaps        0x2177(%rip),%xmm10        # 49b0 <_sk_callback_sse2+0xaab>
   DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
   DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
   DB  102,65,15,114,242,5                 ; pslld         $0x5,%xmm10
@@ -17856,21 +18111,21 @@ _sk_load_4444_sse2 LABEL PROC
   DB  243,15,126,28,120                   ; movq          (%rax,%rdi,2),%xmm3
   DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
   DB  102,15,97,216                       ; punpcklwd     %xmm0,%xmm3
-  DB  102,15,111,5,231,32,0,0             ; movdqa        0x20e7(%rip),%xmm0        # 4890 <_sk_callback_sse2+0xa72>
+  DB  102,15,111,5,48,33,0,0              ; movdqa        0x2130(%rip),%xmm0        # 49c0 <_sk_callback_sse2+0xabb>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,233,32,0,0                  ; mulps         0x20e9(%rip),%xmm0        # 48a0 <_sk_callback_sse2+0xa82>
-  DB  102,15,111,13,241,32,0,0            ; movdqa        0x20f1(%rip),%xmm1        # 48b0 <_sk_callback_sse2+0xa92>
+  DB  15,89,5,50,33,0,0                   ; mulps         0x2132(%rip),%xmm0        # 49d0 <_sk_callback_sse2+0xacb>
+  DB  102,15,111,13,58,33,0,0             ; movdqa        0x213a(%rip),%xmm1        # 49e0 <_sk_callback_sse2+0xadb>
   DB  102,15,219,203                      ; pand          %xmm3,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,243,32,0,0                 ; mulps         0x20f3(%rip),%xmm1        # 48c0 <_sk_callback_sse2+0xaa2>
-  DB  102,15,111,21,251,32,0,0            ; movdqa        0x20fb(%rip),%xmm2        # 48d0 <_sk_callback_sse2+0xab2>
+  DB  15,89,13,60,33,0,0                  ; mulps         0x213c(%rip),%xmm1        # 49f0 <_sk_callback_sse2+0xaeb>
+  DB  102,15,111,21,68,33,0,0             ; movdqa        0x2144(%rip),%xmm2        # 4a00 <_sk_callback_sse2+0xafb>
   DB  102,15,219,211                      ; pand          %xmm3,%xmm2
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,253,32,0,0                 ; mulps         0x20fd(%rip),%xmm2        # 48e0 <_sk_callback_sse2+0xac2>
-  DB  102,15,219,29,5,33,0,0              ; pand          0x2105(%rip),%xmm3        # 48f0 <_sk_callback_sse2+0xad2>
+  DB  15,89,21,70,33,0,0                  ; mulps         0x2146(%rip),%xmm2        # 4a10 <_sk_callback_sse2+0xb0b>
+  DB  102,15,219,29,78,33,0,0             ; pand          0x214e(%rip),%xmm3        # 4a20 <_sk_callback_sse2+0xb1b>
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,11,33,0,0                  ; mulps         0x210b(%rip),%xmm3        # 4900 <_sk_callback_sse2+0xae2>
+  DB  15,89,29,84,33,0,0                  ; mulps         0x2154(%rip),%xmm3        # 4a30 <_sk_callback_sse2+0xb2b>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -17904,21 +18159,21 @@ _sk_gather_4444_sse2 LABEL PROC
   DB  102,15,196,216,3                    ; pinsrw        $0x3,%eax,%xmm3
   DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
   DB  102,15,97,216                       ; punpcklwd     %xmm0,%xmm3
-  DB  102,15,111,5,146,32,0,0             ; movdqa        0x2092(%rip),%xmm0        # 4910 <_sk_callback_sse2+0xaf2>
+  DB  102,15,111,5,219,32,0,0             ; movdqa        0x20db(%rip),%xmm0        # 4a40 <_sk_callback_sse2+0xb3b>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,148,32,0,0                  ; mulps         0x2094(%rip),%xmm0        # 4920 <_sk_callback_sse2+0xb02>
-  DB  102,15,111,13,156,32,0,0            ; movdqa        0x209c(%rip),%xmm1        # 4930 <_sk_callback_sse2+0xb12>
+  DB  15,89,5,221,32,0,0                  ; mulps         0x20dd(%rip),%xmm0        # 4a50 <_sk_callback_sse2+0xb4b>
+  DB  102,15,111,13,229,32,0,0            ; movdqa        0x20e5(%rip),%xmm1        # 4a60 <_sk_callback_sse2+0xb5b>
   DB  102,15,219,203                      ; pand          %xmm3,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,158,32,0,0                 ; mulps         0x209e(%rip),%xmm1        # 4940 <_sk_callback_sse2+0xb22>
-  DB  102,15,111,21,166,32,0,0            ; movdqa        0x20a6(%rip),%xmm2        # 4950 <_sk_callback_sse2+0xb32>
+  DB  15,89,13,231,32,0,0                 ; mulps         0x20e7(%rip),%xmm1        # 4a70 <_sk_callback_sse2+0xb6b>
+  DB  102,15,111,21,239,32,0,0            ; movdqa        0x20ef(%rip),%xmm2        # 4a80 <_sk_callback_sse2+0xb7b>
   DB  102,15,219,211                      ; pand          %xmm3,%xmm2
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,168,32,0,0                 ; mulps         0x20a8(%rip),%xmm2        # 4960 <_sk_callback_sse2+0xb42>
-  DB  102,15,219,29,176,32,0,0            ; pand          0x20b0(%rip),%xmm3        # 4970 <_sk_callback_sse2+0xb52>
+  DB  15,89,21,241,32,0,0                 ; mulps         0x20f1(%rip),%xmm2        # 4a90 <_sk_callback_sse2+0xb8b>
+  DB  102,15,219,29,249,32,0,0            ; pand          0x20f9(%rip),%xmm3        # 4aa0 <_sk_callback_sse2+0xb9b>
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,182,32,0,0                 ; mulps         0x20b6(%rip),%xmm3        # 4980 <_sk_callback_sse2+0xb62>
+  DB  15,89,29,255,32,0,0                 ; mulps         0x20ff(%rip),%xmm3        # 4ab0 <_sk_callback_sse2+0xbab>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -17926,7 +18181,7 @@ PUBLIC _sk_store_4444_sse2
 _sk_store_4444_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,181,32,0,0               ; movaps        0x20b5(%rip),%xmm8        # 4990 <_sk_callback_sse2+0xb72>
+  DB  68,15,40,5,254,32,0,0               ; movaps        0x20fe(%rip),%xmm8        # 4ac0 <_sk_callback_sse2+0xbbb>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
@@ -17956,11 +18211,11 @@ _sk_load_8888_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  68,15,16,12,184                     ; movups        (%rax,%rdi,4),%xmm9
-  DB  15,40,21,72,32,0,0                  ; movaps        0x2048(%rip),%xmm2        # 49a0 <_sk_callback_sse2+0xb82>
+  DB  15,40,21,145,32,0,0                 ; movaps        0x2091(%rip),%xmm2        # 4ad0 <_sk_callback_sse2+0xbcb>
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  15,84,194                           ; andps         %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,70,32,0,0                ; movaps        0x2046(%rip),%xmm8        # 49b0 <_sk_callback_sse2+0xb92>
+  DB  68,15,40,5,143,32,0,0               ; movaps        0x208f(%rip),%xmm8        # 4ae0 <_sk_callback_sse2+0xbdb>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
   DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
@@ -18007,11 +18262,11 @@ _sk_gather_8888_sse2 LABEL PROC
   DB  102,67,15,110,12,129                ; movd          (%r9,%r8,4),%xmm1
   DB  102,68,15,98,201                    ; punpckldq     %xmm1,%xmm9
   DB  102,68,15,98,200                    ; punpckldq     %xmm0,%xmm9
-  DB  102,15,111,21,151,31,0,0            ; movdqa        0x1f97(%rip),%xmm2        # 49c0 <_sk_callback_sse2+0xba2>
+  DB  102,15,111,21,224,31,0,0            ; movdqa        0x1fe0(%rip),%xmm2        # 4af0 <_sk_callback_sse2+0xbeb>
   DB  102,65,15,111,193                   ; movdqa        %xmm9,%xmm0
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,147,31,0,0               ; movaps        0x1f93(%rip),%xmm8        # 49d0 <_sk_callback_sse2+0xbb2>
+  DB  68,15,40,5,220,31,0,0               ; movaps        0x1fdc(%rip),%xmm8        # 4b00 <_sk_callback_sse2+0xbfb>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,65,15,111,201                   ; movdqa        %xmm9,%xmm1
   DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
@@ -18033,7 +18288,7 @@ PUBLIC _sk_store_8888_sse2
 _sk_store_8888_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,86,31,0,0                ; movaps        0x1f56(%rip),%xmm8        # 49e0 <_sk_callback_sse2+0xbc2>
+  DB  68,15,40,5,159,31,0,0               ; movaps        0x1f9f(%rip),%xmm8        # 4b10 <_sk_callback_sse2+0xc0b>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
@@ -18070,7 +18325,7 @@ _sk_load_f16_sse2 LABEL PROC
   DB  102,69,15,239,210                   ; pxor          %xmm10,%xmm10
   DB  102,65,15,111,206                   ; movdqa        %xmm14,%xmm1
   DB  102,65,15,97,202                    ; punpcklwd     %xmm10,%xmm1
-  DB  102,68,15,111,13,198,30,0,0         ; movdqa        0x1ec6(%rip),%xmm9        # 49f0 <_sk_callback_sse2+0xbd2>
+  DB  102,68,15,111,13,15,31,0,0          ; movdqa        0x1f0f(%rip),%xmm9        # 4b20 <_sk_callback_sse2+0xc1b>
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,65,15,219,193                   ; pand          %xmm9,%xmm0
   DB  102,15,239,200                      ; pxor          %xmm0,%xmm1
@@ -18078,11 +18333,11 @@ _sk_load_f16_sse2 LABEL PROC
   DB  102,68,15,111,233                   ; movdqa        %xmm1,%xmm13
   DB  102,65,15,114,245,13                ; pslld         $0xd,%xmm13
   DB  102,68,15,235,232                   ; por           %xmm0,%xmm13
-  DB  102,68,15,111,29,171,30,0,0         ; movdqa        0x1eab(%rip),%xmm11        # 4a00 <_sk_callback_sse2+0xbe2>
+  DB  102,68,15,111,29,244,30,0,0         ; movdqa        0x1ef4(%rip),%xmm11        # 4b30 <_sk_callback_sse2+0xc2b>
   DB  102,69,15,254,235                   ; paddd         %xmm11,%xmm13
-  DB  102,68,15,111,37,173,30,0,0         ; movdqa        0x1ead(%rip),%xmm12        # 4a10 <_sk_callback_sse2+0xbf2>
+  DB  102,68,15,111,37,246,30,0,0         ; movdqa        0x1ef6(%rip),%xmm12        # 4b40 <_sk_callback_sse2+0xc3b>
   DB  102,65,15,239,204                   ; pxor          %xmm12,%xmm1
-  DB  102,15,111,29,176,30,0,0            ; movdqa        0x1eb0(%rip),%xmm3        # 4a20 <_sk_callback_sse2+0xc02>
+  DB  102,15,111,29,249,30,0,0            ; movdqa        0x1ef9(%rip),%xmm3        # 4b50 <_sk_callback_sse2+0xc4b>
   DB  102,15,111,195                      ; movdqa        %xmm3,%xmm0
   DB  102,15,102,193                      ; pcmpgtd       %xmm1,%xmm0
   DB  102,65,15,223,197                   ; pandn         %xmm13,%xmm0
@@ -18166,7 +18421,7 @@ _sk_gather_f16_sse2 LABEL PROC
   DB  102,69,15,239,210                   ; pxor          %xmm10,%xmm10
   DB  102,65,15,111,206                   ; movdqa        %xmm14,%xmm1
   DB  102,65,15,97,202                    ; punpcklwd     %xmm10,%xmm1
-  DB  102,68,15,111,13,62,29,0,0          ; movdqa        0x1d3e(%rip),%xmm9        # 4a30 <_sk_callback_sse2+0xc12>
+  DB  102,68,15,111,13,135,29,0,0         ; movdqa        0x1d87(%rip),%xmm9        # 4b60 <_sk_callback_sse2+0xc5b>
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,65,15,219,193                   ; pand          %xmm9,%xmm0
   DB  102,15,239,200                      ; pxor          %xmm0,%xmm1
@@ -18174,11 +18429,11 @@ _sk_gather_f16_sse2 LABEL PROC
   DB  102,68,15,111,233                   ; movdqa        %xmm1,%xmm13
   DB  102,65,15,114,245,13                ; pslld         $0xd,%xmm13
   DB  102,68,15,235,232                   ; por           %xmm0,%xmm13
-  DB  102,68,15,111,29,35,29,0,0          ; movdqa        0x1d23(%rip),%xmm11        # 4a40 <_sk_callback_sse2+0xc22>
+  DB  102,68,15,111,29,108,29,0,0         ; movdqa        0x1d6c(%rip),%xmm11        # 4b70 <_sk_callback_sse2+0xc6b>
   DB  102,69,15,254,235                   ; paddd         %xmm11,%xmm13
-  DB  102,68,15,111,37,37,29,0,0          ; movdqa        0x1d25(%rip),%xmm12        # 4a50 <_sk_callback_sse2+0xc32>
+  DB  102,68,15,111,37,110,29,0,0         ; movdqa        0x1d6e(%rip),%xmm12        # 4b80 <_sk_callback_sse2+0xc7b>
   DB  102,65,15,239,204                   ; pxor          %xmm12,%xmm1
-  DB  102,15,111,29,40,29,0,0             ; movdqa        0x1d28(%rip),%xmm3        # 4a60 <_sk_callback_sse2+0xc42>
+  DB  102,15,111,29,113,29,0,0            ; movdqa        0x1d71(%rip),%xmm3        # 4b90 <_sk_callback_sse2+0xc8b>
   DB  102,15,111,195                      ; movdqa        %xmm3,%xmm0
   DB  102,15,102,193                      ; pcmpgtd       %xmm1,%xmm0
   DB  102,65,15,223,197                   ; pandn         %xmm13,%xmm0
@@ -18229,17 +18484,17 @@ PUBLIC _sk_store_f16_sse2
 _sk_store_f16_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  102,68,15,111,21,80,28,0,0          ; movdqa        0x1c50(%rip),%xmm10        # 4a70 <_sk_callback_sse2+0xc52>
+  DB  102,68,15,111,21,153,28,0,0         ; movdqa        0x1c99(%rip),%xmm10        # 4ba0 <_sk_callback_sse2+0xc9b>
   DB  102,68,15,111,224                   ; movdqa        %xmm0,%xmm12
   DB  102,68,15,111,232                   ; movdqa        %xmm0,%xmm13
   DB  102,69,15,219,234                   ; pand          %xmm10,%xmm13
   DB  102,69,15,239,229                   ; pxor          %xmm13,%xmm12
-  DB  102,68,15,111,13,67,28,0,0          ; movdqa        0x1c43(%rip),%xmm9        # 4a80 <_sk_callback_sse2+0xc62>
+  DB  102,68,15,111,13,140,28,0,0         ; movdqa        0x1c8c(%rip),%xmm9        # 4bb0 <_sk_callback_sse2+0xcab>
   DB  102,65,15,114,213,16                ; psrld         $0x10,%xmm13
   DB  102,69,15,111,193                   ; movdqa        %xmm9,%xmm8
   DB  102,69,15,102,196                   ; pcmpgtd       %xmm12,%xmm8
   DB  102,65,15,114,212,13                ; psrld         $0xd,%xmm12
-  DB  102,68,15,111,29,52,28,0,0          ; movdqa        0x1c34(%rip),%xmm11        # 4a90 <_sk_callback_sse2+0xc72>
+  DB  102,68,15,111,29,125,28,0,0         ; movdqa        0x1c7d(%rip),%xmm11        # 4bc0 <_sk_callback_sse2+0xcbb>
   DB  102,69,15,235,235                   ; por           %xmm11,%xmm13
   DB  102,69,15,254,236                   ; paddd         %xmm12,%xmm13
   DB  102,65,15,114,245,16                ; pslld         $0x10,%xmm13
@@ -18316,7 +18571,7 @@ _sk_load_u16_be_sse2 LABEL PROC
   DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
   DB  102,65,15,97,201                    ; punpcklwd     %xmm9,%xmm1
   DB  15,91,193                           ; cvtdq2ps      %xmm1,%xmm0
-  DB  68,15,40,5,210,26,0,0               ; movaps        0x1ad2(%rip),%xmm8        # 4aa0 <_sk_callback_sse2+0xc82>
+  DB  68,15,40,5,27,27,0,0                ; movaps        0x1b1b(%rip),%xmm8        # 4bd0 <_sk_callback_sse2+0xccb>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
   DB  102,15,113,241,8                    ; psllw         $0x8,%xmm1
@@ -18367,7 +18622,7 @@ _sk_load_rgb_u16_be_sse2 LABEL PROC
   DB  102,69,15,239,192                   ; pxor          %xmm8,%xmm8
   DB  102,65,15,97,192                    ; punpcklwd     %xmm8,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,13,14,26,0,0               ; movaps        0x1a0e(%rip),%xmm9        # 4ab0 <_sk_callback_sse2+0xc92>
+  DB  68,15,40,13,87,26,0,0               ; movaps        0x1a57(%rip),%xmm9        # 4be0 <_sk_callback_sse2+0xcdb>
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
   DB  102,15,113,241,8                    ; psllw         $0x8,%xmm1
@@ -18384,14 +18639,14 @@ _sk_load_rgb_u16_be_sse2 LABEL PROC
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
   DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,213,25,0,0                 ; movaps        0x19d5(%rip),%xmm3        # 4ac0 <_sk_callback_sse2+0xca2>
+  DB  15,40,29,30,26,0,0                  ; movaps        0x1a1e(%rip),%xmm3        # 4bf0 <_sk_callback_sse2+0xceb>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_store_u16_be_sse2
 _sk_store_u16_be_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,13,214,25,0,0              ; movaps        0x19d6(%rip),%xmm9        # 4ad0 <_sk_callback_sse2+0xcb2>
+  DB  68,15,40,13,31,26,0,0               ; movaps        0x1a1f(%rip),%xmm9        # 4c00 <_sk_callback_sse2+0xcfb>
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
@@ -18531,7 +18786,7 @@ _sk_repeat_x_sse2 LABEL PROC
   DB  243,69,15,91,209                    ; cvttps2dq     %xmm9,%xmm10
   DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
   DB  69,15,194,202,1                     ; cmpltps       %xmm10,%xmm9
-  DB  68,15,84,13,192,23,0,0              ; andps         0x17c0(%rip),%xmm9        # 4ae0 <_sk_callback_sse2+0xcc2>
+  DB  68,15,84,13,9,24,0,0                ; andps         0x1809(%rip),%xmm9        # 4c10 <_sk_callback_sse2+0xd0b>
   DB  69,15,92,209                        ; subps         %xmm9,%xmm10
   DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
   DB  65,15,92,194                        ; subps         %xmm10,%xmm0
@@ -18551,7 +18806,7 @@ _sk_repeat_y_sse2 LABEL PROC
   DB  243,69,15,91,209                    ; cvttps2dq     %xmm9,%xmm10
   DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
   DB  69,15,194,202,1                     ; cmpltps       %xmm10,%xmm9
-  DB  68,15,84,13,136,23,0,0              ; andps         0x1788(%rip),%xmm9        # 4af0 <_sk_callback_sse2+0xcd2>
+  DB  68,15,84,13,209,23,0,0              ; andps         0x17d1(%rip),%xmm9        # 4c20 <_sk_callback_sse2+0xd1b>
   DB  69,15,92,209                        ; subps         %xmm9,%xmm10
   DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
   DB  65,15,92,202                        ; subps         %xmm10,%xmm1
@@ -18575,7 +18830,7 @@ _sk_mirror_x_sse2 LABEL PROC
   DB  243,69,15,91,218                    ; cvttps2dq     %xmm10,%xmm11
   DB  69,15,91,219                        ; cvtdq2ps      %xmm11,%xmm11
   DB  69,15,194,211,1                     ; cmpltps       %xmm11,%xmm10
-  DB  68,15,84,21,62,23,0,0               ; andps         0x173e(%rip),%xmm10        # 4b00 <_sk_callback_sse2+0xce2>
+  DB  68,15,84,21,135,23,0,0              ; andps         0x1787(%rip),%xmm10        # 4c30 <_sk_callback_sse2+0xd2b>
   DB  69,15,87,228                        ; xorps         %xmm12,%xmm12
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
   DB  69,15,89,216                        ; mulps         %xmm8,%xmm11
@@ -18603,7 +18858,7 @@ _sk_mirror_y_sse2 LABEL PROC
   DB  243,69,15,91,218                    ; cvttps2dq     %xmm10,%xmm11
   DB  69,15,91,219                        ; cvtdq2ps      %xmm11,%xmm11
   DB  69,15,194,211,1                     ; cmpltps       %xmm11,%xmm10
-  DB  68,15,84,21,228,22,0,0              ; andps         0x16e4(%rip),%xmm10        # 4b10 <_sk_callback_sse2+0xcf2>
+  DB  68,15,84,21,45,23,0,0               ; andps         0x172d(%rip),%xmm10        # 4c40 <_sk_callback_sse2+0xd3b>
   DB  69,15,87,228                        ; xorps         %xmm12,%xmm12
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
   DB  69,15,89,216                        ; mulps         %xmm8,%xmm11
@@ -18620,10 +18875,10 @@ _sk_mirror_y_sse2 LABEL PROC
 PUBLIC _sk_luminance_to_alpha_sse2
 _sk_luminance_to_alpha_sse2 LABEL PROC
   DB  15,40,218                           ; movaps        %xmm2,%xmm3
-  DB  15,89,5,188,22,0,0                  ; mulps         0x16bc(%rip),%xmm0        # 4b20 <_sk_callback_sse2+0xd02>
-  DB  15,89,13,197,22,0,0                 ; mulps         0x16c5(%rip),%xmm1        # 4b30 <_sk_callback_sse2+0xd12>
+  DB  15,89,5,5,23,0,0                    ; mulps         0x1705(%rip),%xmm0        # 4c50 <_sk_callback_sse2+0xd4b>
+  DB  15,89,13,14,23,0,0                  ; mulps         0x170e(%rip),%xmm1        # 4c60 <_sk_callback_sse2+0xd5b>
   DB  15,88,200                           ; addps         %xmm0,%xmm1
-  DB  15,89,29,203,22,0,0                 ; mulps         0x16cb(%rip),%xmm3        # 4b40 <_sk_callback_sse2+0xd22>
+  DB  15,89,29,20,23,0,0                  ; mulps         0x1714(%rip),%xmm3        # 4c70 <_sk_callback_sse2+0xd6b>
   DB  15,88,217                           ; addps         %xmm1,%xmm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
@@ -18846,7 +19101,7 @@ _sk_linear_gradient_sse2 LABEL PROC
   DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
   DB  72,139,8                            ; mov           (%rax),%rcx
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,132,15,1,0,0                     ; je            3928 <_sk_linear_gradient_sse2+0x149>
+  DB  15,132,15,1,0,0                     ; je            3a0f <_sk_linear_gradient_sse2+0x149>
   DB  72,139,64,8                         ; mov           0x8(%rax),%rax
   DB  72,131,192,32                       ; add           $0x20,%rax
   DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
@@ -18907,8 +19162,8 @@ _sk_linear_gradient_sse2 LABEL PROC
   DB  69,15,86,231                        ; orps          %xmm15,%xmm12
   DB  72,131,192,36                       ; add           $0x24,%rax
   DB  72,255,201                          ; dec           %rcx
-  DB  15,133,8,255,255,255                ; jne           382e <_sk_linear_gradient_sse2+0x4f>
-  DB  235,13                              ; jmp           3935 <_sk_linear_gradient_sse2+0x156>
+  DB  15,133,8,255,255,255                ; jne           3915 <_sk_linear_gradient_sse2+0x4f>
+  DB  235,13                              ; jmp           3a1c <_sk_linear_gradient_sse2+0x156>
   DB  15,87,201                           ; xorps         %xmm1,%xmm1
   DB  15,87,210                           ; xorps         %xmm2,%xmm2
   DB  15,87,219                           ; xorps         %xmm3,%xmm3
@@ -18973,29 +19228,29 @@ _sk_xy_to_polar_unit_sse2 LABEL PROC
   DB  69,15,94,220                        ; divps         %xmm12,%xmm11
   DB  69,15,40,227                        ; movaps        %xmm11,%xmm12
   DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
-  DB  68,15,40,45,67,17,0,0               ; movaps        0x1143(%rip),%xmm13        # 4b50 <_sk_callback_sse2+0xd32>
+  DB  68,15,40,45,140,17,0,0              ; movaps        0x118c(%rip),%xmm13        # 4c80 <_sk_callback_sse2+0xd7b>
   DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
-  DB  68,15,88,45,71,17,0,0               ; addps         0x1147(%rip),%xmm13        # 4b60 <_sk_callback_sse2+0xd42>
+  DB  68,15,88,45,144,17,0,0              ; addps         0x1190(%rip),%xmm13        # 4c90 <_sk_callback_sse2+0xd8b>
   DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
-  DB  68,15,88,45,75,17,0,0               ; addps         0x114b(%rip),%xmm13        # 4b70 <_sk_callback_sse2+0xd52>
+  DB  68,15,88,45,148,17,0,0              ; addps         0x1194(%rip),%xmm13        # 4ca0 <_sk_callback_sse2+0xd9b>
   DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
-  DB  68,15,88,45,79,17,0,0               ; addps         0x114f(%rip),%xmm13        # 4b80 <_sk_callback_sse2+0xd62>
+  DB  68,15,88,45,152,17,0,0              ; addps         0x1198(%rip),%xmm13        # 4cb0 <_sk_callback_sse2+0xdab>
   DB  69,15,89,235                        ; mulps         %xmm11,%xmm13
   DB  69,15,194,202,1                     ; cmpltps       %xmm10,%xmm9
-  DB  68,15,40,21,78,17,0,0               ; movaps        0x114e(%rip),%xmm10        # 4b90 <_sk_callback_sse2+0xd72>
+  DB  68,15,40,21,151,17,0,0              ; movaps        0x1197(%rip),%xmm10        # 4cc0 <_sk_callback_sse2+0xdbb>
   DB  69,15,92,213                        ; subps         %xmm13,%xmm10
   DB  69,15,84,209                        ; andps         %xmm9,%xmm10
   DB  69,15,85,205                        ; andnps        %xmm13,%xmm9
   DB  69,15,86,202                        ; orps          %xmm10,%xmm9
   DB  68,15,194,192,1                     ; cmpltps       %xmm0,%xmm8
-  DB  68,15,40,21,65,17,0,0               ; movaps        0x1141(%rip),%xmm10        # 4ba0 <_sk_callback_sse2+0xd82>
+  DB  68,15,40,21,138,17,0,0              ; movaps        0x118a(%rip),%xmm10        # 4cd0 <_sk_callback_sse2+0xdcb>
   DB  69,15,92,209                        ; subps         %xmm9,%xmm10
   DB  69,15,84,208                        ; andps         %xmm8,%xmm10
   DB  69,15,85,193                        ; andnps        %xmm9,%xmm8
   DB  69,15,86,194                        ; orps          %xmm10,%xmm8
   DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
   DB  68,15,194,200,1                     ; cmpltps       %xmm0,%xmm9
-  DB  68,15,40,21,48,17,0,0               ; movaps        0x1130(%rip),%xmm10        # 4bb0 <_sk_callback_sse2+0xd92>
+  DB  68,15,40,21,121,17,0,0              ; movaps        0x1179(%rip),%xmm10        # 4ce0 <_sk_callback_sse2+0xddb>
   DB  69,15,92,208                        ; subps         %xmm8,%xmm10
   DB  69,15,84,209                        ; andps         %xmm9,%xmm10
   DB  69,15,85,200                        ; andnps        %xmm8,%xmm9
@@ -19008,7 +19263,7 @@ _sk_xy_to_polar_unit_sse2 LABEL PROC
 PUBLIC _sk_save_xy_sse2
 _sk_save_xy_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,25,17,0,0                ; movaps        0x1119(%rip),%xmm8        # 4bc0 <_sk_callback_sse2+0xda2>
+  DB  68,15,40,5,98,17,0,0                ; movaps        0x1162(%rip),%xmm8        # 4cf0 <_sk_callback_sse2+0xdeb>
   DB  15,17,0                             ; movups        %xmm0,(%rax)
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,88,200                        ; addps         %xmm8,%xmm9
@@ -19016,7 +19271,7 @@ _sk_save_xy_sse2 LABEL PROC
   DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
   DB  69,15,40,217                        ; movaps        %xmm9,%xmm11
   DB  69,15,194,218,1                     ; cmpltps       %xmm10,%xmm11
-  DB  68,15,40,37,4,17,0,0                ; movaps        0x1104(%rip),%xmm12        # 4bd0 <_sk_callback_sse2+0xdb2>
+  DB  68,15,40,37,77,17,0,0               ; movaps        0x114d(%rip),%xmm12        # 4d00 <_sk_callback_sse2+0xdfb>
   DB  69,15,84,220                        ; andps         %xmm12,%xmm11
   DB  69,15,92,211                        ; subps         %xmm11,%xmm10
   DB  69,15,92,202                        ; subps         %xmm10,%xmm9
@@ -19059,8 +19314,8 @@ _sk_bilinear_nx_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,125,16,0,0                  ; addps         0x107d(%rip),%xmm0        # 4be0 <_sk_callback_sse2+0xdc2>
-  DB  68,15,40,13,133,16,0,0              ; movaps        0x1085(%rip),%xmm9        # 4bf0 <_sk_callback_sse2+0xdd2>
+  DB  15,88,5,198,16,0,0                  ; addps         0x10c6(%rip),%xmm0        # 4d10 <_sk_callback_sse2+0xe0b>
+  DB  68,15,40,13,206,16,0,0              ; movaps        0x10ce(%rip),%xmm9        # 4d20 <_sk_callback_sse2+0xe1b>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,128,0,0,0              ; movups        %xmm9,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -19071,7 +19326,7 @@ _sk_bilinear_px_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,116,16,0,0                  ; addps         0x1074(%rip),%xmm0        # 4c00 <_sk_callback_sse2+0xde2>
+  DB  15,88,5,189,16,0,0                  ; addps         0x10bd(%rip),%xmm0        # 4d30 <_sk_callback_sse2+0xe2b>
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -19081,8 +19336,8 @@ _sk_bilinear_ny_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,102,16,0,0                 ; addps         0x1066(%rip),%xmm1        # 4c10 <_sk_callback_sse2+0xdf2>
-  DB  68,15,40,13,110,16,0,0              ; movaps        0x106e(%rip),%xmm9        # 4c20 <_sk_callback_sse2+0xe02>
+  DB  15,88,13,175,16,0,0                 ; addps         0x10af(%rip),%xmm1        # 4d40 <_sk_callback_sse2+0xe3b>
+  DB  68,15,40,13,183,16,0,0              ; movaps        0x10b7(%rip),%xmm9        # 4d50 <_sk_callback_sse2+0xe4b>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,160,0,0,0              ; movups        %xmm9,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -19093,7 +19348,7 @@ _sk_bilinear_py_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,92,16,0,0                  ; addps         0x105c(%rip),%xmm1        # 4c30 <_sk_callback_sse2+0xe12>
+  DB  15,88,13,165,16,0,0                 ; addps         0x10a5(%rip),%xmm1        # 4d60 <_sk_callback_sse2+0xe5b>
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -19103,13 +19358,13 @@ _sk_bicubic_n3x_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,79,16,0,0                   ; addps         0x104f(%rip),%xmm0        # 4c40 <_sk_callback_sse2+0xe22>
-  DB  68,15,40,13,87,16,0,0               ; movaps        0x1057(%rip),%xmm9        # 4c50 <_sk_callback_sse2+0xe32>
+  DB  15,88,5,152,16,0,0                  ; addps         0x1098(%rip),%xmm0        # 4d70 <_sk_callback_sse2+0xe6b>
+  DB  68,15,40,13,160,16,0,0              ; movaps        0x10a0(%rip),%xmm9        # 4d80 <_sk_callback_sse2+0xe7b>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,83,16,0,0               ; mulps         0x1053(%rip),%xmm9        # 4c60 <_sk_callback_sse2+0xe42>
-  DB  68,15,88,13,91,16,0,0               ; addps         0x105b(%rip),%xmm9        # 4c70 <_sk_callback_sse2+0xe52>
+  DB  68,15,89,13,156,16,0,0              ; mulps         0x109c(%rip),%xmm9        # 4d90 <_sk_callback_sse2+0xe8b>
+  DB  68,15,88,13,164,16,0,0              ; addps         0x10a4(%rip),%xmm9        # 4da0 <_sk_callback_sse2+0xe9b>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,128,0,0,0              ; movups        %xmm9,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -19120,16 +19375,16 @@ _sk_bicubic_n1x_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,74,16,0,0                   ; addps         0x104a(%rip),%xmm0        # 4c80 <_sk_callback_sse2+0xe62>
-  DB  68,15,40,13,82,16,0,0               ; movaps        0x1052(%rip),%xmm9        # 4c90 <_sk_callback_sse2+0xe72>
+  DB  15,88,5,147,16,0,0                  ; addps         0x1093(%rip),%xmm0        # 4db0 <_sk_callback_sse2+0xeab>
+  DB  68,15,40,13,155,16,0,0              ; movaps        0x109b(%rip),%xmm9        # 4dc0 <_sk_callback_sse2+0xebb>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,86,16,0,0                ; movaps        0x1056(%rip),%xmm8        # 4ca0 <_sk_callback_sse2+0xe82>
+  DB  68,15,40,5,159,16,0,0               ; movaps        0x109f(%rip),%xmm8        # 4dd0 <_sk_callback_sse2+0xecb>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,90,16,0,0                ; addps         0x105a(%rip),%xmm8        # 4cb0 <_sk_callback_sse2+0xe92>
+  DB  68,15,88,5,163,16,0,0               ; addps         0x10a3(%rip),%xmm8        # 4de0 <_sk_callback_sse2+0xedb>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,94,16,0,0                ; addps         0x105e(%rip),%xmm8        # 4cc0 <_sk_callback_sse2+0xea2>
+  DB  68,15,88,5,167,16,0,0               ; addps         0x10a7(%rip),%xmm8        # 4df0 <_sk_callback_sse2+0xeeb>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,98,16,0,0                ; addps         0x1062(%rip),%xmm8        # 4cd0 <_sk_callback_sse2+0xeb2>
+  DB  68,15,88,5,171,16,0,0               ; addps         0x10ab(%rip),%xmm8        # 4e00 <_sk_callback_sse2+0xefb>
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -19137,17 +19392,17 @@ _sk_bicubic_n1x_sse2 LABEL PROC
 PUBLIC _sk_bicubic_p1x_sse2
 _sk_bicubic_p1x_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,92,16,0,0                ; movaps        0x105c(%rip),%xmm8        # 4ce0 <_sk_callback_sse2+0xec2>
+  DB  68,15,40,5,165,16,0,0               ; movaps        0x10a5(%rip),%xmm8        # 4e10 <_sk_callback_sse2+0xf0b>
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,72,64                      ; movups        0x40(%rax),%xmm9
   DB  65,15,88,192                        ; addps         %xmm8,%xmm0
-  DB  68,15,40,21,88,16,0,0               ; movaps        0x1058(%rip),%xmm10        # 4cf0 <_sk_callback_sse2+0xed2>
+  DB  68,15,40,21,161,16,0,0              ; movaps        0x10a1(%rip),%xmm10        # 4e20 <_sk_callback_sse2+0xf1b>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,92,16,0,0               ; addps         0x105c(%rip),%xmm10        # 4d00 <_sk_callback_sse2+0xee2>
+  DB  68,15,88,21,165,16,0,0              ; addps         0x10a5(%rip),%xmm10        # 4e30 <_sk_callback_sse2+0xf2b>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,88,16,0,0               ; addps         0x1058(%rip),%xmm10        # 4d10 <_sk_callback_sse2+0xef2>
+  DB  68,15,88,21,161,16,0,0              ; addps         0x10a1(%rip),%xmm10        # 4e40 <_sk_callback_sse2+0xf3b>
   DB  68,15,17,144,128,0,0,0              ; movups        %xmm10,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -19157,11 +19412,11 @@ _sk_bicubic_p3x_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,75,16,0,0                   ; addps         0x104b(%rip),%xmm0        # 4d20 <_sk_callback_sse2+0xf02>
+  DB  15,88,5,148,16,0,0                  ; addps         0x1094(%rip),%xmm0        # 4e50 <_sk_callback_sse2+0xf4b>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,75,16,0,0                ; mulps         0x104b(%rip),%xmm8        # 4d30 <_sk_callback_sse2+0xf12>
-  DB  68,15,88,5,83,16,0,0                ; addps         0x1053(%rip),%xmm8        # 4d40 <_sk_callback_sse2+0xf22>
+  DB  68,15,89,5,148,16,0,0               ; mulps         0x1094(%rip),%xmm8        # 4e60 <_sk_callback_sse2+0xf5b>
+  DB  68,15,88,5,156,16,0,0               ; addps         0x109c(%rip),%xmm8        # 4e70 <_sk_callback_sse2+0xf6b>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -19172,13 +19427,13 @@ _sk_bicubic_n3y_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,65,16,0,0                  ; addps         0x1041(%rip),%xmm1        # 4d50 <_sk_callback_sse2+0xf32>
-  DB  68,15,40,13,73,16,0,0               ; movaps        0x1049(%rip),%xmm9        # 4d60 <_sk_callback_sse2+0xf42>
+  DB  15,88,13,138,16,0,0                 ; addps         0x108a(%rip),%xmm1        # 4e80 <_sk_callback_sse2+0xf7b>
+  DB  68,15,40,13,146,16,0,0              ; movaps        0x1092(%rip),%xmm9        # 4e90 <_sk_callback_sse2+0xf8b>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,69,16,0,0               ; mulps         0x1045(%rip),%xmm9        # 4d70 <_sk_callback_sse2+0xf52>
-  DB  68,15,88,13,77,16,0,0               ; addps         0x104d(%rip),%xmm9        # 4d80 <_sk_callback_sse2+0xf62>
+  DB  68,15,89,13,142,16,0,0              ; mulps         0x108e(%rip),%xmm9        # 4ea0 <_sk_callback_sse2+0xf9b>
+  DB  68,15,88,13,150,16,0,0              ; addps         0x1096(%rip),%xmm9        # 4eb0 <_sk_callback_sse2+0xfab>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,160,0,0,0              ; movups        %xmm9,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -19189,16 +19444,16 @@ _sk_bicubic_n1y_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,59,16,0,0                  ; addps         0x103b(%rip),%xmm1        # 4d90 <_sk_callback_sse2+0xf72>
-  DB  68,15,40,13,67,16,0,0               ; movaps        0x1043(%rip),%xmm9        # 4da0 <_sk_callback_sse2+0xf82>
+  DB  15,88,13,132,16,0,0                 ; addps         0x1084(%rip),%xmm1        # 4ec0 <_sk_callback_sse2+0xfbb>
+  DB  68,15,40,13,140,16,0,0              ; movaps        0x108c(%rip),%xmm9        # 4ed0 <_sk_callback_sse2+0xfcb>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,71,16,0,0                ; movaps        0x1047(%rip),%xmm8        # 4db0 <_sk_callback_sse2+0xf92>
+  DB  68,15,40,5,144,16,0,0               ; movaps        0x1090(%rip),%xmm8        # 4ee0 <_sk_callback_sse2+0xfdb>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,75,16,0,0                ; addps         0x104b(%rip),%xmm8        # 4dc0 <_sk_callback_sse2+0xfa2>
+  DB  68,15,88,5,148,16,0,0               ; addps         0x1094(%rip),%xmm8        # 4ef0 <_sk_callback_sse2+0xfeb>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,79,16,0,0                ; addps         0x104f(%rip),%xmm8        # 4dd0 <_sk_callback_sse2+0xfb2>
+  DB  68,15,88,5,152,16,0,0               ; addps         0x1098(%rip),%xmm8        # 4f00 <_sk_callback_sse2+0xffb>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,83,16,0,0                ; addps         0x1053(%rip),%xmm8        # 4de0 <_sk_callback_sse2+0xfc2>
+  DB  68,15,88,5,156,16,0,0               ; addps         0x109c(%rip),%xmm8        # 4f10 <_sk_callback_sse2+0x100b>
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -19206,17 +19461,17 @@ _sk_bicubic_n1y_sse2 LABEL PROC
 PUBLIC _sk_bicubic_p1y_sse2
 _sk_bicubic_p1y_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,77,16,0,0                ; movaps        0x104d(%rip),%xmm8        # 4df0 <_sk_callback_sse2+0xfd2>
+  DB  68,15,40,5,150,16,0,0               ; movaps        0x1096(%rip),%xmm8        # 4f20 <_sk_callback_sse2+0x101b>
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,72,96                      ; movups        0x60(%rax),%xmm9
   DB  65,15,88,200                        ; addps         %xmm8,%xmm1
-  DB  68,15,40,21,72,16,0,0               ; movaps        0x1048(%rip),%xmm10        # 4e00 <_sk_callback_sse2+0xfe2>
+  DB  68,15,40,21,145,16,0,0              ; movaps        0x1091(%rip),%xmm10        # 4f30 <_sk_callback_sse2+0x102b>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,76,16,0,0               ; addps         0x104c(%rip),%xmm10        # 4e10 <_sk_callback_sse2+0xff2>
+  DB  68,15,88,21,149,16,0,0              ; addps         0x1095(%rip),%xmm10        # 4f40 <_sk_callback_sse2+0x103b>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,72,16,0,0               ; addps         0x1048(%rip),%xmm10        # 4e20 <_sk_callback_sse2+0x1002>
+  DB  68,15,88,21,145,16,0,0              ; addps         0x1091(%rip),%xmm10        # 4f50 <_sk_callback_sse2+0x104b>
   DB  68,15,17,144,160,0,0,0              ; movups        %xmm10,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -19226,11 +19481,11 @@ _sk_bicubic_p3y_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,58,16,0,0                  ; addps         0x103a(%rip),%xmm1        # 4e30 <_sk_callback_sse2+0x1012>
+  DB  15,88,13,131,16,0,0                 ; addps         0x1083(%rip),%xmm1        # 4f60 <_sk_callback_sse2+0x105b>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,58,16,0,0                ; mulps         0x103a(%rip),%xmm8        # 4e40 <_sk_callback_sse2+0x1022>
-  DB  68,15,88,5,66,16,0,0                ; addps         0x1042(%rip),%xmm8        # 4e50 <_sk_callback_sse2+0x1032>
+  DB  68,15,89,5,131,16,0,0               ; mulps         0x1083(%rip),%xmm8        # 4f70 <_sk_callback_sse2+0x106b>
+  DB  68,15,88,5,139,16,0,0               ; addps         0x108b(%rip),%xmm8        # 4f80 <_sk_callback_sse2+0x107b>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -19305,6 +19560,40 @@ ALIGN 16
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
   DB  63                                  ; (bad)
   DB  0,0                                 ; add           %al,(%rax)
+  DB  128,63,1                            ; cmpb          $0x1,(%rdi)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,1                                 ; add           %al,(%rcx)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,1                                 ; add           %al,(%rcx)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,1                                 ; add           %al,(%rcx)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,4,0                               ; add           %al,(%rax,%rax,1)
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  4,0                                 ; add           $0x0,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  4,0                                 ; add           $0x0,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  4,0                                 ; add           $0x0,%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  2,0                                 ; add           (%rax),%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  2,0                                 ; add           (%rax),%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  2,0                                 ; add           (%rax),%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  2,0                                 ; add           (%rax),%al
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  33,8                                ; and           %ecx,(%rax)
+  DB  130                                 ; (bad)
+  DB  60,33                               ; cmp           $0x21,%al
+  DB  8,130,60,33,8,130                   ; or            %al,-0x7df7dec4(%rdx)
+  DB  60,33                               ; cmp           $0x21,%al
+  DB  8,130,60,0,0,0                      ; or            %al,0x3c(%rdx)
+  DB  191,0,0,0,191                       ; mov           $0xbf000000,%edi
+  DB  0,0                                 ; add           %al,(%rax)
+  DB  0,191,0,0,0,191                     ; add           %bh,-0x41000000(%rdi)
+  DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
   DB  63                                  ; (bad)
@@ -19395,17 +19684,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
-  DB  63                                  ; (bad)
+  DB  191,0,0,128,191                     ; mov           $0xbf800000,%edi
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,191,0,0,128,191,0               ; cmpb          $0x0,-0x40800000(%rdi)
-  DB  0,128,191,0,0,128                   ; add           %al,-0x7fffff41(%rax)
-  DB  191,0,0,224,64                      ; mov           $0x40e00000,%edi
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  224,64                              ; loopne        4058 <.literal16+0x188>
+  DB  0,224                               ; add           %ah,%al
+  DB  64,0,0                              ; add           %al,(%rax)
+  DB  224,64                              ; loopne        4188 <.literal16+0x1d8>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,64                              ; loopne        405c <.literal16+0x18c>
+  DB  224,64                              ; loopne        418c <.literal16+0x1dc>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,64                              ; loopne        4060 <.literal16+0x190>
+  DB  224,64                              ; loopne        4190 <.literal16+0x1e0>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -19629,13 +19917,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        4229 <.literal16+0x359>
+  DB  224,7                               ; loopne        4359 <.literal16+0x3a9>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        422d <.literal16+0x35d>
+  DB  224,7                               ; loopne        435d <.literal16+0x3ad>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        4231 <.literal16+0x361>
+  DB  224,7                               ; loopne        4361 <.literal16+0x3b1>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        4235 <.literal16+0x365>
+  DB  224,7                               ; loopne        4365 <.literal16+0x3b5>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -19704,11 +19992,11 @@ ALIGN 16
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,127,67                            ; add           %bh,0x43(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            431b <.literal16+0x44b>
+  DB  127,67                              ; jg            444b <.literal16+0x49b>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            431f <.literal16+0x44f>
+  DB  127,67                              ; jg            444f <.literal16+0x49f>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            4323 <.literal16+0x453>
+  DB  127,67                              ; jg            4453 <.literal16+0x4a3>
   DB  129,128,128,59,129,128,128,59,129,128; addl          $0x80813b80,-0x7f7ec480(%rax)
   DB  128,59,129                          ; cmpb          $0x81,(%rbx)
   DB  128,128,59,129,128,128,59           ; addb          $0x3b,-0x7f7f7ec5(%rax)
@@ -19723,16 +20011,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4314 <.literal16+0x444>
+  DB  127,0                               ; jg            4444 <.literal16+0x494>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4318 <.literal16+0x448>
+  DB  127,0                               ; jg            4448 <.literal16+0x498>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            431c <.literal16+0x44c>
+  DB  127,0                               ; jg            444c <.literal16+0x49c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4320 <.literal16+0x450>
+  DB  127,0                               ; jg            4450 <.literal16+0x4a0>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -19741,7 +20029,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            43a5 <.literal16+0x4d5>
+  DB  119,115                             ; ja            44d5 <.literal16+0x525>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -19752,7 +20040,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           4309 <.literal16+0x439>
+  DB  117,191                             ; jne           4439 <.literal16+0x489>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -19764,7 +20052,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3834a <_sk_callback_sse2+0xffffffffe9a3452c>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3847a <_sk_callback_sse2+0xffffffffe9a34575>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
@@ -19818,16 +20106,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            43e4 <.literal16+0x514>
+  DB  127,0                               ; jg            4514 <.literal16+0x564>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            43e8 <.literal16+0x518>
+  DB  127,0                               ; jg            4518 <.literal16+0x568>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            43ec <.literal16+0x51c>
+  DB  127,0                               ; jg            451c <.literal16+0x56c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            43f0 <.literal16+0x520>
+  DB  127,0                               ; jg            4520 <.literal16+0x570>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -19836,7 +20124,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4475 <.literal16+0x5a5>
+  DB  119,115                             ; ja            45a5 <.literal16+0x5f5>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -19847,7 +20135,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           43d9 <.literal16+0x509>
+  DB  117,191                             ; jne           4509 <.literal16+0x559>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -19859,7 +20147,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3841a <_sk_callback_sse2+0xffffffffe9a345fc>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3854a <_sk_callback_sse2+0xffffffffe9a34645>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
@@ -19913,16 +20201,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            44b4 <.literal16+0x5e4>
+  DB  127,0                               ; jg            45e4 <.literal16+0x634>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            44b8 <.literal16+0x5e8>
+  DB  127,0                               ; jg            45e8 <.literal16+0x638>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            44bc <.literal16+0x5ec>
+  DB  127,0                               ; jg            45ec <.literal16+0x63c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            44c0 <.literal16+0x5f0>
+  DB  127,0                               ; jg            45f0 <.literal16+0x640>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -19931,7 +20219,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4545 <.literal16+0x675>
+  DB  119,115                             ; ja            4675 <.literal16+0x6c5>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -19942,7 +20230,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           44a9 <.literal16+0x5d9>
+  DB  117,191                             ; jne           45d9 <.literal16+0x629>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -19954,7 +20242,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a384ea <_sk_callback_sse2+0xffffffffe9a346cc>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3861a <_sk_callback_sse2+0xffffffffe9a34715>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
@@ -20008,16 +20296,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4584 <.literal16+0x6b4>
+  DB  127,0                               ; jg            46b4 <.literal16+0x704>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4588 <.literal16+0x6b8>
+  DB  127,0                               ; jg            46b8 <.literal16+0x708>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            458c <.literal16+0x6bc>
+  DB  127,0                               ; jg            46bc <.literal16+0x70c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4590 <.literal16+0x6c0>
+  DB  127,0                               ; jg            46c0 <.literal16+0x710>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -20026,7 +20314,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4615 <.literal16+0x745>
+  DB  119,115                             ; ja            4745 <.literal16+0x795>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -20037,7 +20325,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           4579 <.literal16+0x6a9>
+  DB  117,191                             ; jne           46a9 <.literal16+0x6f9>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -20049,7 +20337,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a385ba <_sk_callback_sse2+0xffffffffe9a3479c>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a386ea <_sk_callback_sse2+0xffffffffe9a347e5>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
@@ -20099,13 +20387,13 @@ ALIGN 16
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
-  DB  127,67                              ; jg            4697 <.literal16+0x7c7>
+  DB  127,67                              ; jg            47c7 <.literal16+0x817>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            469b <.literal16+0x7cb>
+  DB  127,67                              ; jg            47cb <.literal16+0x81b>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            469f <.literal16+0x7cf>
+  DB  127,67                              ; jg            47cf <.literal16+0x81f>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            46a3 <.literal16+0x7d3>
+  DB  127,67                              ; jg            47d3 <.literal16+0x823>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,195                               ; add           %al,%bl
   DB  0,0                                 ; add           %al,(%rax)
@@ -20152,16 +20440,16 @@ ALIGN 16
   DB  128,3,62                            ; addb          $0x3e,(%rbx)
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           4723 <.literal16+0x853>
+  DB  118,63                              ; jbe           4853 <.literal16+0x8a3>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           4727 <.literal16+0x857>
+  DB  118,63                              ; jbe           4857 <.literal16+0x8a7>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           472b <.literal16+0x85b>
+  DB  118,63                              ; jbe           485b <.literal16+0x8ab>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           472f <.literal16+0x85f>
+  DB  118,63                              ; jbe           485f <.literal16+0x8af>
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
@@ -20173,11 +20461,11 @@ ALIGN 16
   DB  128,59,0                            ; cmpb          $0x0,(%rbx)
   DB  0,127,67                            ; add           %bh,0x43(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            476b <.literal16+0x89b>
+  DB  127,67                              ; jg            489b <.literal16+0x8eb>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            476f <.literal16+0x89f>
+  DB  127,67                              ; jg            489f <.literal16+0x8ef>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            4773 <.literal16+0x8a3>
+  DB  127,67                              ; jg            48a3 <.literal16+0x8f3>
   DB  129,128,128,59,129,128,128,59,129,128; addl          $0x80813b80,-0x7f7ec480(%rax)
   DB  128,59,129                          ; cmpb          $0x81,(%rbx)
   DB  128,128,59,0,0,128,63               ; addb          $0x3f,-0x7fffffc5(%rax)
@@ -20217,13 +20505,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        47b9 <.literal16+0x8e9>
+  DB  224,7                               ; loopne        48e9 <.literal16+0x939>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        47bd <.literal16+0x8ed>
+  DB  224,7                               ; loopne        48ed <.literal16+0x93d>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        47c1 <.literal16+0x8f1>
+  DB  224,7                               ; loopne        48f1 <.literal16+0x941>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        47c5 <.literal16+0x8f5>
+  DB  224,7                               ; loopne        48f5 <.literal16+0x945>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -20269,13 +20557,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        4829 <.literal16+0x959>
+  DB  224,7                               ; loopne        4959 <.literal16+0x9a9>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        482d <.literal16+0x95d>
+  DB  224,7                               ; loopne        495d <.literal16+0x9ad>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        4831 <.literal16+0x961>
+  DB  224,7                               ; loopne        4961 <.literal16+0x9b1>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        4835 <.literal16+0x965>
+  DB  224,7                               ; loopne        4965 <.literal16+0x9b5>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -20313,13 +20601,13 @@ ALIGN 16
   DB  65,0,0                              ; add           %al,(%r8)
   DB  248                                 ; clc
   DB  65,0,0                              ; add           %al,(%r8)
-  DB  124,66                              ; jl            48c6 <.literal16+0x9f6>
+  DB  124,66                              ; jl            49f6 <.literal16+0xa46>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            48ca <.literal16+0x9fa>
+  DB  124,66                              ; jl            49fa <.literal16+0xa4a>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            48ce <.literal16+0x9fe>
+  DB  124,66                              ; jl            49fe <.literal16+0xa4e>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            48d2 <.literal16+0xa02>
+  DB  124,66                              ; jl            4a02 <.literal16+0xa52>
   DB  0,240                               ; add           %dh,%al
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,240                               ; add           %dh,%al
@@ -20409,13 +20697,13 @@ ALIGN 16
   DB  136,136,61,137,136,136              ; mov           %cl,-0x777776c3(%rax)
   DB  61,137,136,136,61                   ; cmp           $0x3d888889,%eax
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            49d5 <.literal16+0xb05>
+  DB  112,65                              ; jo            4b05 <.literal16+0xb55>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            49d9 <.literal16+0xb09>
+  DB  112,65                              ; jo            4b09 <.literal16+0xb59>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            49dd <.literal16+0xb0d>
+  DB  112,65                              ; jo            4b0d <.literal16+0xb5d>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            49e1 <.literal16+0xb11>
+  DB  112,65                              ; jo            4b11 <.literal16+0xb61>
   DB  255,0                               ; incl          (%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  255,0                               ; incl          (%rax)
@@ -20437,11 +20725,11 @@ ALIGN 16
   DB  128,59,129                          ; cmpb          $0x81,(%rbx)
   DB  128,128,59,0,0,127,67               ; addb          $0x43,0x7f00003b(%rax)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            4a2b <.literal16+0xb5b>
+  DB  127,67                              ; jg            4b5b <.literal16+0xbab>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            4a2f <.literal16+0xb5f>
+  DB  127,67                              ; jg            4b5f <.literal16+0xbaf>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            4a33 <.literal16+0xb63>
+  DB  127,67                              ; jg            4b63 <.literal16+0xbb3>
   DB  0,128,0,0,0,128                     ; add           %al,-0x80000000(%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,128,0,0,0,128                     ; add           %al,-0x80000000(%rax)
@@ -20517,13 +20805,13 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  255                                 ; (bad)
-  DB  127,71                              ; jg            4b1b <.literal16+0xc4b>
+  DB  127,71                              ; jg            4c4b <.literal16+0xc9b>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            4b1f <.literal16+0xc4f>
+  DB  127,71                              ; jg            4c4f <.literal16+0xc9f>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            4b23 <.literal16+0xc53>
+  DB  127,71                              ; jg            4c53 <.literal16+0xca3>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            4b27 <.literal16+0xc57>
+  DB  127,71                              ; jg            4c57 <.literal16+0xca7>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -20676,11 +20964,11 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,114                          ; cmpb          $0x72,(%rdi)
   DB  28,199                              ; sbb           $0xc7,%al
-  DB  62,114,28                           ; jb,pt         4c82 <.literal16+0xdb2>
+  DB  62,114,28                           ; jb,pt         4db2 <.literal16+0xe02>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4c86 <.literal16+0xdb6>
+  DB  62,114,28                           ; jb,pt         4db6 <.literal16+0xe06>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4c8a <.literal16+0xdba>
+  DB  62,114,28                           ; jb,pt         4dba <.literal16+0xe0a>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -20724,7 +21012,7 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63db15 <_sk_callback_sse2+0x3d639cf7>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63dc45 <_sk_callback_sse2+0x3d639d40>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -20750,7 +21038,7 @@ ALIGN 16
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63db55 <_sk_callback_sse2+0x3d639d37>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63dc85 <_sk_callback_sse2+0x3d639d80>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
@@ -20759,13 +21047,13 @@ ALIGN 16
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
-  DB  114,28                              ; jb            4d4e <.literal16+0xe7e>
+  DB  114,28                              ; jb            4e7e <.literal16+0xece>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4d52 <.literal16+0xe82>
+  DB  62,114,28                           ; jb,pt         4e82 <.literal16+0xed2>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4d56 <.literal16+0xe86>
+  DB  62,114,28                           ; jb,pt         4e86 <.literal16+0xed6>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4d5a <.literal16+0xe8a>
+  DB  62,114,28                           ; jb,pt         4e8a <.literal16+0xeda>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -20786,11 +21074,11 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,114                          ; cmpb          $0x72,(%rdi)
   DB  28,199                              ; sbb           $0xc7,%al
-  DB  62,114,28                           ; jb,pt         4d92 <.literal16+0xec2>
+  DB  62,114,28                           ; jb,pt         4ec2 <.literal16+0xf12>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4d96 <.literal16+0xec6>
+  DB  62,114,28                           ; jb,pt         4ec6 <.literal16+0xf16>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4d9a <.literal16+0xeca>
+  DB  62,114,28                           ; jb,pt         4eca <.literal16+0xf1a>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -20834,7 +21122,7 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63dc25 <_sk_callback_sse2+0x3d639e07>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63dd55 <_sk_callback_sse2+0x3d639e50>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -20860,7 +21148,7 @@ ALIGN 16
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63dc65 <_sk_callback_sse2+0x3d639e47>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63dd95 <_sk_callback_sse2+0x3d639e90>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
@@ -20869,13 +21157,13 @@ ALIGN 16
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
-  DB  114,28                              ; jb            4e5e <.literal16+0xf8e>
+  DB  114,28                              ; jb            4f8e <.literal16+0xfde>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4e62 <_sk_callback_sse2+0x1044>
+  DB  62,114,28                           ; jb,pt         4f92 <_sk_callback_sse2+0x108d>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4e66 <_sk_callback_sse2+0x1048>
+  DB  62,114,28                           ; jb,pt         4f96 <_sk_callback_sse2+0x1091>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         4e6a <_sk_callback_sse2+0x104c>
+  DB  62,114,28                           ; jb,pt         4f9a <_sk_callback_sse2+0x1095>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
index 83c806026f800792648ebfbf28b1a20c7029f0f0..b93b5a01fddd0843f4859a52fdb7b810e0829200 100644 (file)
@@ -293,6 +293,36 @@ STAGE(seed_shader) {
     dr = dg = db = da = 0;
 }
 
+STAGE(dither) {
+    auto c = (const SkJumper_DitherCtx*)ctx;
+
+    // Get [(x,y), (x+1,y), (x+2,y), ...] loaded up in integer vectors.
+    U32 X = trunc_((int)x + unaligned_load<F>(k->iota)),  // Going through float is kind of lazy..
+        Y = (uint32_t)*c->y;
+
+    // We're doing 8x8 ordered dithering, see https://en.wikipedia.org/wiki/Ordered_dithering.
+    // In this case n=8 and we're using the matrix that looks like 1/64 x [ 0 48 12 60 ... ].
+
+    // We only need X and X^Y from here on, so it's easier to just think of that as "Y".
+    Y ^= X;
+
+    // We'll mix the bottom 3 bits of each of X and Y to make 6 bits,
+    // for 2^6 == 64 == 8x8 matrix values.  If X=abc and Y=def, we make fcebda.
+    U32 M = (Y & 1) << 5 | (X & 1) << 4
+          | (Y & 2) << 2 | (X & 2) << 1
+          | (Y & 4) >> 1 | (X & 4) >> 2;
+
+    // Scale that dither to [0,1], then [-0.5,+0.5].
+    // I chose to scale by 1/63.0f here to make this exactly [0,1].
+    // I suspect the divide by 64 in the article was written with fast integer math in mind.
+    F dither = cast(M) * (1/63.0f) - 0.5f;
+
+    // Fold in an extra alpha to dither as if applied to the unpremul values of r,g,b.
+    r += c->rate*dither*a;
+    g += c->rate*dither*a;
+    b += c->rate*dither*a;
+}
+
 STAGE(constant_color) {
     auto rgba = (const float*)ctx;
     r = rgba[0];