Revert "We can mask load and store with just AVX."
authorBrian Osman <brianosman@google.com>
Mon, 22 May 2017 19:25:36 +0000 (19:25 +0000)
committerSkia Commit-Bot <skia-commit-bot@chromium.org>
Mon, 22 May 2017 19:30:46 +0000 (19:30 +0000)
This reverts commit 139e463dc6f965fdaed854efcb20c6cafbb6dbdc.

Reason for revert: Crashes on Valgrind bots.

Original change's description:
> We can mask load and store with just AVX.
>
> Previously we were using AVX2 instructions to generate the masks,
> and AVX2 instructions for the mask load and stores themselves.
>
> AVX came with float mask loads and stores, which will work perfectly
> fine.  I don't really get what the point of the 32-bit int loads and
> stores are in AVX2, beyond maybe syntax sugar?
>
> Change-Id: I81fa55fb09daea4f5546f8c9ebbc886015edce51
> Reviewed-on: https://skia-review.googlesource.com/17452
> Reviewed-by: Herb Derby <herb@google.com>
> Commit-Queue: Ravi Mistry <rmistry@google.com>
>

TBR=mtklein@chromium.org,rmistry@google.com,herb@google.com,reed@google.com
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true

Change-Id: I3a48f006c20475f6334ff94998281f381c696c93
Reviewed-on: https://skia-review.googlesource.com/17524
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>

src/jumper/SkJumper_generated.S
src/jumper/SkJumper_generated_win.S
src/jumper/SkJumper_stages.cpp

index 37d3fd2..143f6b5 100644 (file)
@@ -10432,8 +10432,8 @@ _sk_load_tables_hsw:
   .byte  76,3,8                              // add           (%rax),%r9
   .byte  77,133,192                          // test          %r8,%r8
   .byte  117,105                             // jne           1a4a <_sk_load_tables_hsw+0x7e>
-  .byte  196,193,124,16,25                   // vmovups       (%r9),%ymm3
-  .byte  197,228,84,13,18,50,0,0             // vandps        0x3212(%rip),%ymm3,%ymm1        # 4c00 <_sk_callback_hsw+0x513>
+  .byte  196,193,126,111,25                  // vmovdqu       (%r9),%ymm3
+  .byte  197,229,219,13,18,50,0,0            // vpand         0x3212(%rip),%ymm3,%ymm1        # 4c00 <_sk_callback_hsw+0x513>
   .byte  196,65,61,118,192                   // vpcmpeqd      %ymm8,%ymm8,%ymm8
   .byte  72,139,72,8                         // mov           0x8(%rax),%rcx
   .byte  76,139,72,16                        // mov           0x10(%rax),%r9
@@ -10459,7 +10459,7 @@ _sk_load_tables_hsw:
   .byte  73,211,234                          // shr           %cl,%r10
   .byte  196,193,249,110,194                 // vmovq         %r10,%xmm0
   .byte  196,226,125,33,192                  // vpmovsxbd     %xmm0,%ymm0
-  .byte  196,194,125,44,25                   // vmaskmovps    (%r9),%ymm0,%ymm3
+  .byte  196,194,125,140,25                  // vpmaskmovd    (%r9),%ymm0,%ymm3
   .byte  233,115,255,255,255                 // jmpq          19e6 <_sk_load_tables_hsw+0x1a>
 
 HIDDEN _sk_load_tables_u16_be_hsw
@@ -11984,8 +11984,8 @@ _sk_load_8888_hsw:
   .byte  76,3,8                              // add           (%rax),%r9
   .byte  77,133,192                          // test          %r8,%r8
   .byte  117,88                              // jne           336d <_sk_load_8888_hsw+0x6d>
-  .byte  196,193,124,16,25                   // vmovups       (%r9),%ymm3
-  .byte  197,228,84,5,158,25,0,0             // vandps        0x199e(%rip),%ymm3,%ymm0        # 4cc0 <_sk_callback_hsw+0x5d3>
+  .byte  196,193,126,111,25                  // vmovdqu       (%r9),%ymm3
+  .byte  197,229,219,5,158,25,0,0            // vpand         0x199e(%rip),%ymm3,%ymm0        # 4cc0 <_sk_callback_hsw+0x5d3>
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
   .byte  196,98,125,24,5,217,23,0,0          // vbroadcastss  0x17d9(%rip),%ymm8        # 4b08 <_sk_callback_hsw+0x41b>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
@@ -12008,7 +12008,7 @@ _sk_load_8888_hsw:
   .byte  72,211,232                          // shr           %cl,%rax
   .byte  196,225,249,110,192                 // vmovq         %rax,%xmm0
   .byte  196,226,125,33,192                  // vpmovsxbd     %xmm0,%ymm0
-  .byte  196,194,125,44,25                   // vmaskmovps    (%r9),%ymm0,%ymm3
+  .byte  196,194,125,140,25                  // vpmaskmovd    (%r9),%ymm0,%ymm3
   .byte  235,135                             // jmp           331a <_sk_load_8888_hsw+0x1a>
 
 HIDDEN _sk_gather_8888_hsw
@@ -12065,7 +12065,7 @@ _sk_store_8888_hsw:
   .byte  196,65,53,235,192                   // vpor          %ymm8,%ymm9,%ymm8
   .byte  77,133,192                          // test          %r8,%r8
   .byte  117,12                              // jne           347c <_sk_store_8888_hsw+0x73>
-  .byte  196,65,124,17,1                     // vmovups       %ymm8,(%r9)
+  .byte  196,65,126,127,1                    // vmovdqu       %ymm8,(%r9)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,137,193                          // mov           %r8,%rcx
   .byte  255,224                             // jmpq          *%rax
@@ -12076,7 +12076,7 @@ _sk_store_8888_hsw:
   .byte  72,211,232                          // shr           %cl,%rax
   .byte  196,97,249,110,200                  // vmovq         %rax,%xmm9
   .byte  196,66,125,33,201                   // vpmovsxbd     %xmm9,%ymm9
-  .byte  196,66,53,46,1                      // vmaskmovps    %ymm8,%ymm9,(%r9)
+  .byte  196,66,53,142,1                     // vpmaskmovd    %ymm8,%ymm9,(%r9)
   .byte  235,211                             // jmp           3475 <_sk_store_8888_hsw+0x6c>
 
 HIDDEN _sk_load_f16_hsw
@@ -13984,14 +13984,14 @@ _sk_seed_shader_avx:
   .byte  197,249,112,192,0                   // vpshufd       $0x0,%xmm0,%xmm0
   .byte  196,227,125,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,67,98,0,0         // vbroadcastss  0x6243(%rip),%ymm1        # 630c <_sk_callback_avx+0x125>
+  .byte  196,226,125,24,13,95,99,0,0         // vbroadcastss  0x635f(%rip),%ymm1        # 6428 <_sk_callback_avx+0x125>
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
   .byte  197,252,88,2                        // vaddps        (%rdx),%ymm0,%ymm0
   .byte  196,226,125,24,16                   // vbroadcastss  (%rax),%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  197,236,88,201                      // vaddps        %ymm1,%ymm2,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,21,39,98,0,0         // vbroadcastss  0x6227(%rip),%ymm2        # 6310 <_sk_callback_avx+0x129>
+  .byte  196,226,125,24,21,67,99,0,0         // vbroadcastss  0x6343(%rip),%ymm2        # 642c <_sk_callback_avx+0x129>
   .byte  197,228,87,219                      // vxorps        %ymm3,%ymm3,%ymm3
   .byte  197,220,87,228                      // vxorps        %ymm4,%ymm4,%ymm4
   .byte  197,212,87,237                      // vxorps        %ymm5,%ymm5,%ymm5
@@ -14014,7 +14014,7 @@ _sk_dither_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  196,66,125,24,8                     // vbroadcastss  (%r8),%ymm9
   .byte  196,65,60,87,209                    // vxorps        %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,29,216,97,0,0         // vbroadcastss  0x61d8(%rip),%ymm11        # 6314 <_sk_callback_avx+0x12d>
+  .byte  196,98,125,24,29,244,98,0,0         // vbroadcastss  0x62f4(%rip),%ymm11        # 6430 <_sk_callback_avx+0x12d>
   .byte  196,65,44,84,203                    // vandps        %ymm11,%ymm10,%ymm9
   .byte  196,193,25,114,241,5                // vpslld        $0x5,%xmm9,%xmm12
   .byte  196,67,125,25,201,1                 // vextractf128  $0x1,%ymm9,%xmm9
@@ -14025,8 +14025,8 @@ _sk_dither_avx:
   .byte  196,67,125,25,219,1                 // vextractf128  $0x1,%ymm11,%xmm11
   .byte  196,193,33,114,243,4                // vpslld        $0x4,%xmm11,%xmm11
   .byte  196,67,29,24,219,1                  // vinsertf128   $0x1,%xmm11,%ymm12,%ymm11
-  .byte  196,98,125,24,37,153,97,0,0         // vbroadcastss  0x6199(%rip),%ymm12        # 6318 <_sk_callback_avx+0x131>
-  .byte  196,98,125,24,45,148,97,0,0         // vbroadcastss  0x6194(%rip),%ymm13        # 631c <_sk_callback_avx+0x135>
+  .byte  196,98,125,24,37,181,98,0,0         // vbroadcastss  0x62b5(%rip),%ymm12        # 6434 <_sk_callback_avx+0x131>
+  .byte  196,98,125,24,45,176,98,0,0         // vbroadcastss  0x62b0(%rip),%ymm13        # 6438 <_sk_callback_avx+0x135>
   .byte  196,65,44,84,245                    // vandps        %ymm13,%ymm10,%ymm14
   .byte  196,193,1,114,246,2                 // vpslld        $0x2,%xmm14,%xmm15
   .byte  196,67,125,25,246,1                 // vextractf128  $0x1,%ymm14,%xmm14
@@ -14053,9 +14053,9 @@ _sk_dither_avx:
   .byte  196,65,12,86,202                    // vorps         %ymm10,%ymm14,%ymm9
   .byte  196,65,60,86,193                    // vorps         %ymm9,%ymm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,255,96,0,0         // vbroadcastss  0x60ff(%rip),%ymm9        # 6320 <_sk_callback_avx+0x139>
+  .byte  196,98,125,24,13,27,98,0,0          // vbroadcastss  0x621b(%rip),%ymm9        # 643c <_sk_callback_avx+0x139>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,245,96,0,0         // vbroadcastss  0x60f5(%rip),%ymm9        # 6324 <_sk_callback_avx+0x13d>
+  .byte  196,98,125,24,13,17,98,0,0          // vbroadcastss  0x6211(%rip),%ymm9        # 6440 <_sk_callback_avx+0x13d>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  196,98,125,24,72,8                  // vbroadcastss  0x8(%rax),%ymm9
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
@@ -14124,7 +14124,7 @@ HIDDEN _sk_srcatop_avx
 FUNCTION(_sk_srcatop_avx)
 _sk_srcatop_avx:
   .byte  197,252,89,199                      // vmulps        %ymm7,%ymm0,%ymm0
-  .byte  196,98,125,24,5,76,96,0,0           // vbroadcastss  0x604c(%rip),%ymm8        # 6328 <_sk_callback_avx+0x141>
+  .byte  196,98,125,24,5,104,97,0,0          // vbroadcastss  0x6168(%rip),%ymm8        # 6444 <_sk_callback_avx+0x141>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,204                       // vmulps        %ymm4,%ymm8,%ymm9
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -14145,7 +14145,7 @@ HIDDEN _sk_dstatop_avx
 FUNCTION(_sk_dstatop_avx)
 _sk_dstatop_avx:
   .byte  197,100,89,196                      // vmulps        %ymm4,%ymm3,%ymm8
-  .byte  196,98,125,24,13,14,96,0,0          // vbroadcastss  0x600e(%rip),%ymm9        # 632c <_sk_callback_avx+0x145>
+  .byte  196,98,125,24,13,42,97,0,0          // vbroadcastss  0x612a(%rip),%ymm9        # 6448 <_sk_callback_avx+0x145>
   .byte  197,52,92,207                       // vsubps        %ymm7,%ymm9,%ymm9
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
   .byte  197,188,88,192                      // vaddps        %ymm0,%ymm8,%ymm0
@@ -14187,7 +14187,7 @@ HIDDEN _sk_srcout_avx
 .globl _sk_srcout_avx
 FUNCTION(_sk_srcout_avx)
 _sk_srcout_avx:
-  .byte  196,98,125,24,5,173,95,0,0          // vbroadcastss  0x5fad(%rip),%ymm8        # 6330 <_sk_callback_avx+0x149>
+  .byte  196,98,125,24,5,201,96,0,0          // vbroadcastss  0x60c9(%rip),%ymm8        # 644c <_sk_callback_avx+0x149>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -14200,7 +14200,7 @@ HIDDEN _sk_dstout_avx
 .globl _sk_dstout_avx
 FUNCTION(_sk_dstout_avx)
 _sk_dstout_avx:
-  .byte  196,226,125,24,5,144,95,0,0         // vbroadcastss  0x5f90(%rip),%ymm0        # 6334 <_sk_callback_avx+0x14d>
+  .byte  196,226,125,24,5,172,96,0,0         // vbroadcastss  0x60ac(%rip),%ymm0        # 6450 <_sk_callback_avx+0x14d>
   .byte  197,252,92,219                      // vsubps        %ymm3,%ymm0,%ymm3
   .byte  197,228,89,196                      // vmulps        %ymm4,%ymm3,%ymm0
   .byte  197,228,89,205                      // vmulps        %ymm5,%ymm3,%ymm1
@@ -14213,7 +14213,7 @@ HIDDEN _sk_srcover_avx
 .globl _sk_srcover_avx
 FUNCTION(_sk_srcover_avx)
 _sk_srcover_avx:
-  .byte  196,98,125,24,5,115,95,0,0          // vbroadcastss  0x5f73(%rip),%ymm8        # 6338 <_sk_callback_avx+0x151>
+  .byte  196,98,125,24,5,143,96,0,0          // vbroadcastss  0x608f(%rip),%ymm8        # 6454 <_sk_callback_avx+0x151>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,204                       // vmulps        %ymm4,%ymm8,%ymm9
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -14230,7 +14230,7 @@ HIDDEN _sk_dstover_avx
 .globl _sk_dstover_avx
 FUNCTION(_sk_dstover_avx)
 _sk_dstover_avx:
-  .byte  196,98,125,24,5,70,95,0,0           // vbroadcastss  0x5f46(%rip),%ymm8        # 633c <_sk_callback_avx+0x155>
+  .byte  196,98,125,24,5,98,96,0,0           // vbroadcastss  0x6062(%rip),%ymm8        # 6458 <_sk_callback_avx+0x155>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,252,88,196                      // vaddps        %ymm4,%ymm0,%ymm0
@@ -14258,7 +14258,7 @@ HIDDEN _sk_multiply_avx
 .globl _sk_multiply_avx
 FUNCTION(_sk_multiply_avx)
 _sk_multiply_avx:
-  .byte  196,98,125,24,5,5,95,0,0            // vbroadcastss  0x5f05(%rip),%ymm8        # 6340 <_sk_callback_avx+0x159>
+  .byte  196,98,125,24,5,33,96,0,0           // vbroadcastss  0x6021(%rip),%ymm8        # 645c <_sk_callback_avx+0x159>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,208                       // vmulps        %ymm0,%ymm9,%ymm10
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -14318,7 +14318,7 @@ HIDDEN _sk_xor__avx
 .globl _sk_xor__avx
 FUNCTION(_sk_xor__avx)
 _sk_xor__avx:
-  .byte  196,98,125,24,5,84,94,0,0           // vbroadcastss  0x5e54(%rip),%ymm8        # 6344 <_sk_callback_avx+0x15d>
+  .byte  196,98,125,24,5,112,95,0,0          // vbroadcastss  0x5f70(%rip),%ymm8        # 6460 <_sk_callback_avx+0x15d>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -14355,7 +14355,7 @@ _sk_darken_avx:
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,95,209                  // vmaxps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,212,93,0,0          // vbroadcastss  0x5dd4(%rip),%ymm8        # 6348 <_sk_callback_avx+0x161>
+  .byte  196,98,125,24,5,240,94,0,0          // vbroadcastss  0x5ef0(%rip),%ymm8        # 6464 <_sk_callback_avx+0x161>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -14381,7 +14381,7 @@ _sk_lighten_avx:
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,128,93,0,0          // vbroadcastss  0x5d80(%rip),%ymm8        # 634c <_sk_callback_avx+0x165>
+  .byte  196,98,125,24,5,156,94,0,0          // vbroadcastss  0x5e9c(%rip),%ymm8        # 6468 <_sk_callback_avx+0x165>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -14410,7 +14410,7 @@ _sk_difference_avx:
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,32,93,0,0           // vbroadcastss  0x5d20(%rip),%ymm8        # 6350 <_sk_callback_avx+0x169>
+  .byte  196,98,125,24,5,60,94,0,0           // vbroadcastss  0x5e3c(%rip),%ymm8        # 646c <_sk_callback_avx+0x169>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -14433,7 +14433,7 @@ _sk_exclusion_avx:
   .byte  197,236,89,214                      // vmulps        %ymm6,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,219,92,0,0          // vbroadcastss  0x5cdb(%rip),%ymm8        # 6354 <_sk_callback_avx+0x16d>
+  .byte  196,98,125,24,5,247,93,0,0          // vbroadcastss  0x5df7(%rip),%ymm8        # 6470 <_sk_callback_avx+0x16d>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -14444,7 +14444,7 @@ HIDDEN _sk_colorburn_avx
 .globl _sk_colorburn_avx
 FUNCTION(_sk_colorburn_avx)
 _sk_colorburn_avx:
-  .byte  196,98,125,24,5,198,92,0,0          // vbroadcastss  0x5cc6(%rip),%ymm8        # 6358 <_sk_callback_avx+0x171>
+  .byte  196,98,125,24,5,226,93,0,0          // vbroadcastss  0x5de2(%rip),%ymm8        # 6474 <_sk_callback_avx+0x171>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,216                       // vmulps        %ymm0,%ymm9,%ymm11
   .byte  196,65,44,87,210                    // vxorps        %ymm10,%ymm10,%ymm10
@@ -14506,7 +14506,7 @@ HIDDEN _sk_colordodge_avx
 FUNCTION(_sk_colordodge_avx)
 _sk_colordodge_avx:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
-  .byte  196,98,125,24,13,194,91,0,0         // vbroadcastss  0x5bc2(%rip),%ymm9        # 635c <_sk_callback_avx+0x175>
+  .byte  196,98,125,24,13,222,92,0,0         // vbroadcastss  0x5cde(%rip),%ymm9        # 6478 <_sk_callback_avx+0x175>
   .byte  197,52,92,215                       // vsubps        %ymm7,%ymm9,%ymm10
   .byte  197,44,89,216                       // vmulps        %ymm0,%ymm10,%ymm11
   .byte  197,52,92,203                       // vsubps        %ymm3,%ymm9,%ymm9
@@ -14563,7 +14563,7 @@ HIDDEN _sk_hardlight_avx
 .globl _sk_hardlight_avx
 FUNCTION(_sk_hardlight_avx)
 _sk_hardlight_avx:
-  .byte  196,98,125,24,5,212,90,0,0          // vbroadcastss  0x5ad4(%rip),%ymm8        # 6360 <_sk_callback_avx+0x179>
+  .byte  196,98,125,24,5,240,91,0,0          // vbroadcastss  0x5bf0(%rip),%ymm8        # 647c <_sk_callback_avx+0x179>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,200                       // vmulps        %ymm0,%ymm10,%ymm9
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -14618,7 +14618,7 @@ HIDDEN _sk_overlay_avx
 .globl _sk_overlay_avx
 FUNCTION(_sk_overlay_avx)
 _sk_overlay_avx:
-  .byte  196,98,125,24,5,253,89,0,0          // vbroadcastss  0x59fd(%rip),%ymm8        # 6364 <_sk_callback_avx+0x17d>
+  .byte  196,98,125,24,5,25,91,0,0           // vbroadcastss  0x5b19(%rip),%ymm8        # 6480 <_sk_callback_avx+0x17d>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,200                       // vmulps        %ymm0,%ymm10,%ymm9
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -14684,10 +14684,10 @@ _sk_softlight_avx:
   .byte  196,65,60,88,192                    // vaddps        %ymm8,%ymm8,%ymm8
   .byte  196,65,60,89,216                    // vmulps        %ymm8,%ymm8,%ymm11
   .byte  196,65,60,88,195                    // vaddps        %ymm11,%ymm8,%ymm8
-  .byte  196,98,125,24,29,244,88,0,0         // vbroadcastss  0x58f4(%rip),%ymm11        # 636c <_sk_callback_avx+0x185>
+  .byte  196,98,125,24,29,16,90,0,0          // vbroadcastss  0x5a10(%rip),%ymm11        # 6488 <_sk_callback_avx+0x185>
   .byte  196,65,28,88,235                    // vaddps        %ymm11,%ymm12,%ymm13
   .byte  196,65,20,89,192                    // vmulps        %ymm8,%ymm13,%ymm8
-  .byte  196,98,125,24,45,229,88,0,0         // vbroadcastss  0x58e5(%rip),%ymm13        # 6370 <_sk_callback_avx+0x189>
+  .byte  196,98,125,24,45,1,90,0,0           // vbroadcastss  0x5a01(%rip),%ymm13        # 648c <_sk_callback_avx+0x189>
   .byte  196,65,28,89,245                    // vmulps        %ymm13,%ymm12,%ymm14
   .byte  196,65,12,88,192                    // vaddps        %ymm8,%ymm14,%ymm8
   .byte  196,65,124,82,244                   // vrsqrtps      %ymm12,%ymm14
@@ -14698,7 +14698,7 @@ _sk_softlight_avx:
   .byte  197,4,194,255,2                     // vcmpleps      %ymm7,%ymm15,%ymm15
   .byte  196,67,13,74,240,240                // vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   .byte  197,116,88,249                      // vaddps        %ymm1,%ymm1,%ymm15
-  .byte  196,98,125,24,5,163,88,0,0          // vbroadcastss  0x58a3(%rip),%ymm8        # 6368 <_sk_callback_avx+0x181>
+  .byte  196,98,125,24,5,191,89,0,0          // vbroadcastss  0x59bf(%rip),%ymm8        # 6484 <_sk_callback_avx+0x181>
   .byte  196,65,60,92,228                    // vsubps        %ymm12,%ymm8,%ymm12
   .byte  197,132,92,195                      // vsubps        %ymm3,%ymm15,%ymm0
   .byte  196,65,124,89,228                   // vmulps        %ymm12,%ymm0,%ymm12
@@ -14825,12 +14825,12 @@ _sk_hue_avx:
   .byte  196,65,28,89,219                    // vmulps        %ymm11,%ymm12,%ymm11
   .byte  196,65,36,94,222                    // vdivps        %ymm14,%ymm11,%ymm11
   .byte  196,67,37,74,224,240                // vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  .byte  196,98,125,24,53,114,86,0,0         // vbroadcastss  0x5672(%rip),%ymm14        # 6374 <_sk_callback_avx+0x18d>
+  .byte  196,98,125,24,53,142,87,0,0         // vbroadcastss  0x578e(%rip),%ymm14        # 6490 <_sk_callback_avx+0x18d>
   .byte  196,65,92,89,222                    // vmulps        %ymm14,%ymm4,%ymm11
-  .byte  196,98,125,24,61,104,86,0,0         // vbroadcastss  0x5668(%rip),%ymm15        # 6378 <_sk_callback_avx+0x191>
+  .byte  196,98,125,24,61,132,87,0,0         // vbroadcastss  0x5784(%rip),%ymm15        # 6494 <_sk_callback_avx+0x191>
   .byte  196,65,84,89,239                    // vmulps        %ymm15,%ymm5,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
-  .byte  196,226,125,24,5,89,86,0,0          // vbroadcastss  0x5659(%rip),%ymm0        # 637c <_sk_callback_avx+0x195>
+  .byte  196,226,125,24,5,117,87,0,0         // vbroadcastss  0x5775(%rip),%ymm0        # 6498 <_sk_callback_avx+0x195>
   .byte  197,76,89,232                       // vmulps        %ymm0,%ymm6,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
   .byte  196,65,52,89,238                    // vmulps        %ymm14,%ymm9,%ymm13
@@ -14891,7 +14891,7 @@ _sk_hue_avx:
   .byte  196,65,36,95,208                    // vmaxps        %ymm8,%ymm11,%ymm10
   .byte  196,195,109,74,209,240              // vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,50,85,0,0           // vbroadcastss  0x5532(%rip),%ymm8        # 6380 <_sk_callback_avx+0x199>
+  .byte  196,98,125,24,5,78,86,0,0           // vbroadcastss  0x564e(%rip),%ymm8        # 649c <_sk_callback_avx+0x199>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,201                      // vmulps        %ymm1,%ymm9,%ymm1
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -14948,12 +14948,12 @@ _sk_saturation_avx:
   .byte  196,65,28,89,219                    // vmulps        %ymm11,%ymm12,%ymm11
   .byte  196,65,36,94,222                    // vdivps        %ymm14,%ymm11,%ymm11
   .byte  196,67,37,74,224,240                // vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  .byte  196,98,125,24,53,64,84,0,0          // vbroadcastss  0x5440(%rip),%ymm14        # 6384 <_sk_callback_avx+0x19d>
+  .byte  196,98,125,24,53,92,85,0,0          // vbroadcastss  0x555c(%rip),%ymm14        # 64a0 <_sk_callback_avx+0x19d>
   .byte  196,65,92,89,222                    // vmulps        %ymm14,%ymm4,%ymm11
-  .byte  196,98,125,24,61,54,84,0,0          // vbroadcastss  0x5436(%rip),%ymm15        # 6388 <_sk_callback_avx+0x1a1>
+  .byte  196,98,125,24,61,82,85,0,0          // vbroadcastss  0x5552(%rip),%ymm15        # 64a4 <_sk_callback_avx+0x1a1>
   .byte  196,65,84,89,239                    // vmulps        %ymm15,%ymm5,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
-  .byte  196,226,125,24,5,39,84,0,0          // vbroadcastss  0x5427(%rip),%ymm0        # 638c <_sk_callback_avx+0x1a5>
+  .byte  196,226,125,24,5,67,85,0,0          // vbroadcastss  0x5543(%rip),%ymm0        # 64a8 <_sk_callback_avx+0x1a5>
   .byte  197,76,89,232                       // vmulps        %ymm0,%ymm6,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
   .byte  196,65,52,89,238                    // vmulps        %ymm14,%ymm9,%ymm13
@@ -15014,7 +15014,7 @@ _sk_saturation_avx:
   .byte  196,65,36,95,208                    // vmaxps        %ymm8,%ymm11,%ymm10
   .byte  196,195,109,74,209,240              // vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,0,83,0,0            // vbroadcastss  0x5300(%rip),%ymm8        # 6390 <_sk_callback_avx+0x1a9>
+  .byte  196,98,125,24,5,28,84,0,0           // vbroadcastss  0x541c(%rip),%ymm8        # 64ac <_sk_callback_avx+0x1a9>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,201                      // vmulps        %ymm1,%ymm9,%ymm1
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -15043,12 +15043,12 @@ _sk_color_avx:
   .byte  197,252,17,68,36,168                // vmovups       %ymm0,-0x58(%rsp)
   .byte  197,124,89,199                      // vmulps        %ymm7,%ymm0,%ymm8
   .byte  197,116,89,207                      // vmulps        %ymm7,%ymm1,%ymm9
-  .byte  196,98,125,24,45,150,82,0,0         // vbroadcastss  0x5296(%rip),%ymm13        # 6394 <_sk_callback_avx+0x1ad>
+  .byte  196,98,125,24,45,178,83,0,0         // vbroadcastss  0x53b2(%rip),%ymm13        # 64b0 <_sk_callback_avx+0x1ad>
   .byte  196,65,92,89,213                    // vmulps        %ymm13,%ymm4,%ymm10
-  .byte  196,98,125,24,53,140,82,0,0         // vbroadcastss  0x528c(%rip),%ymm14        # 6398 <_sk_callback_avx+0x1b1>
+  .byte  196,98,125,24,53,168,83,0,0         // vbroadcastss  0x53a8(%rip),%ymm14        # 64b4 <_sk_callback_avx+0x1b1>
   .byte  196,65,84,89,222                    // vmulps        %ymm14,%ymm5,%ymm11
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
-  .byte  196,98,125,24,61,125,82,0,0         // vbroadcastss  0x527d(%rip),%ymm15        # 639c <_sk_callback_avx+0x1b5>
+  .byte  196,98,125,24,61,153,83,0,0         // vbroadcastss  0x5399(%rip),%ymm15        # 64b8 <_sk_callback_avx+0x1b5>
   .byte  196,65,76,89,223                    // vmulps        %ymm15,%ymm6,%ymm11
   .byte  196,193,44,88,195                   // vaddps        %ymm11,%ymm10,%ymm0
   .byte  196,65,60,89,221                    // vmulps        %ymm13,%ymm8,%ymm11
@@ -15111,7 +15111,7 @@ _sk_color_avx:
   .byte  196,65,44,95,207                    // vmaxps        %ymm15,%ymm10,%ymm9
   .byte  196,195,37,74,192,0                 // vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   .byte  196,65,124,95,199                   // vmaxps        %ymm15,%ymm0,%ymm8
-  .byte  196,226,125,24,5,68,81,0,0          // vbroadcastss  0x5144(%rip),%ymm0        # 63a0 <_sk_callback_avx+0x1b9>
+  .byte  196,226,125,24,5,96,82,0,0          // vbroadcastss  0x5260(%rip),%ymm0        # 64bc <_sk_callback_avx+0x1b9>
   .byte  197,124,92,215                      // vsubps        %ymm7,%ymm0,%ymm10
   .byte  197,172,89,84,36,168                // vmulps        -0x58(%rsp),%ymm10,%ymm2
   .byte  197,124,92,219                      // vsubps        %ymm3,%ymm0,%ymm11
@@ -15141,12 +15141,12 @@ _sk_luminosity_avx:
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
   .byte  197,100,89,196                      // vmulps        %ymm4,%ymm3,%ymm8
   .byte  197,100,89,205                      // vmulps        %ymm5,%ymm3,%ymm9
-  .byte  196,98,125,24,45,214,80,0,0         // vbroadcastss  0x50d6(%rip),%ymm13        # 63a4 <_sk_callback_avx+0x1bd>
+  .byte  196,98,125,24,45,242,81,0,0         // vbroadcastss  0x51f2(%rip),%ymm13        # 64c0 <_sk_callback_avx+0x1bd>
   .byte  196,65,108,89,213                   // vmulps        %ymm13,%ymm2,%ymm10
-  .byte  196,98,125,24,53,204,80,0,0         // vbroadcastss  0x50cc(%rip),%ymm14        # 63a8 <_sk_callback_avx+0x1c1>
+  .byte  196,98,125,24,53,232,81,0,0         // vbroadcastss  0x51e8(%rip),%ymm14        # 64c4 <_sk_callback_avx+0x1c1>
   .byte  196,65,116,89,222                   // vmulps        %ymm14,%ymm1,%ymm11
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
-  .byte  196,98,125,24,61,189,80,0,0         // vbroadcastss  0x50bd(%rip),%ymm15        # 63ac <_sk_callback_avx+0x1c5>
+  .byte  196,98,125,24,61,217,81,0,0         // vbroadcastss  0x51d9(%rip),%ymm15        # 64c8 <_sk_callback_avx+0x1c5>
   .byte  196,65,28,89,223                    // vmulps        %ymm15,%ymm12,%ymm11
   .byte  196,193,44,88,195                   // vaddps        %ymm11,%ymm10,%ymm0
   .byte  196,65,60,89,221                    // vmulps        %ymm13,%ymm8,%ymm11
@@ -15209,7 +15209,7 @@ _sk_luminosity_avx:
   .byte  196,65,44,95,207                    // vmaxps        %ymm15,%ymm10,%ymm9
   .byte  196,195,37,74,192,0                 // vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   .byte  196,65,124,95,199                   // vmaxps        %ymm15,%ymm0,%ymm8
-  .byte  196,226,125,24,5,132,79,0,0         // vbroadcastss  0x4f84(%rip),%ymm0        # 63b0 <_sk_callback_avx+0x1c9>
+  .byte  196,226,125,24,5,160,80,0,0         // vbroadcastss  0x50a0(%rip),%ymm0        # 64cc <_sk_callback_avx+0x1c9>
   .byte  197,124,92,215                      // vsubps        %ymm7,%ymm0,%ymm10
   .byte  197,172,89,210                      // vmulps        %ymm2,%ymm10,%ymm2
   .byte  197,124,92,219                      // vsubps        %ymm3,%ymm0,%ymm11
@@ -15245,7 +15245,7 @@ HIDDEN _sk_clamp_1_avx
 .globl _sk_clamp_1_avx
 FUNCTION(_sk_clamp_1_avx)
 _sk_clamp_1_avx:
-  .byte  196,98,125,24,5,23,79,0,0           // vbroadcastss  0x4f17(%rip),%ymm8        # 63b4 <_sk_callback_avx+0x1cd>
+  .byte  196,98,125,24,5,51,80,0,0           // vbroadcastss  0x5033(%rip),%ymm8        # 64d0 <_sk_callback_avx+0x1cd>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
@@ -15257,7 +15257,7 @@ HIDDEN _sk_clamp_a_avx
 .globl _sk_clamp_a_avx
 FUNCTION(_sk_clamp_a_avx)
 _sk_clamp_a_avx:
-  .byte  196,98,125,24,5,250,78,0,0          // vbroadcastss  0x4efa(%rip),%ymm8        # 63b8 <_sk_callback_avx+0x1d1>
+  .byte  196,98,125,24,5,22,80,0,0           // vbroadcastss  0x5016(%rip),%ymm8        # 64d4 <_sk_callback_avx+0x1d1>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  197,252,93,195                      // vminps        %ymm3,%ymm0,%ymm0
   .byte  197,244,93,203                      // vminps        %ymm3,%ymm1,%ymm1
@@ -15343,7 +15343,7 @@ FUNCTION(_sk_unpremul_avx)
 _sk_unpremul_avx:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,65,100,194,200,0                // vcmpeqps      %ymm8,%ymm3,%ymm9
-  .byte  196,98,125,24,21,66,78,0,0          // vbroadcastss  0x4e42(%rip),%ymm10        # 63bc <_sk_callback_avx+0x1d5>
+  .byte  196,98,125,24,21,94,79,0,0          // vbroadcastss  0x4f5e(%rip),%ymm10        # 64d8 <_sk_callback_avx+0x1d5>
   .byte  197,44,94,211                       // vdivps        %ymm3,%ymm10,%ymm10
   .byte  196,67,45,74,192,144                // vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
@@ -15356,17 +15356,17 @@ HIDDEN _sk_from_srgb_avx
 .globl _sk_from_srgb_avx
 FUNCTION(_sk_from_srgb_avx)
 _sk_from_srgb_avx:
-  .byte  196,98,125,24,5,35,78,0,0           // vbroadcastss  0x4e23(%rip),%ymm8        # 63c0 <_sk_callback_avx+0x1d9>
+  .byte  196,98,125,24,5,63,79,0,0           // vbroadcastss  0x4f3f(%rip),%ymm8        # 64dc <_sk_callback_avx+0x1d9>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  197,124,89,208                      // vmulps        %ymm0,%ymm0,%ymm10
-  .byte  196,98,125,24,29,21,78,0,0          // vbroadcastss  0x4e15(%rip),%ymm11        # 63c4 <_sk_callback_avx+0x1dd>
+  .byte  196,98,125,24,29,49,79,0,0          // vbroadcastss  0x4f31(%rip),%ymm11        # 64e0 <_sk_callback_avx+0x1dd>
   .byte  196,65,124,89,227                   // vmulps        %ymm11,%ymm0,%ymm12
-  .byte  196,98,125,24,45,11,78,0,0          // vbroadcastss  0x4e0b(%rip),%ymm13        # 63c8 <_sk_callback_avx+0x1e1>
+  .byte  196,98,125,24,45,39,79,0,0          // vbroadcastss  0x4f27(%rip),%ymm13        # 64e4 <_sk_callback_avx+0x1e1>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,44,89,212                    // vmulps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,252,77,0,0         // vbroadcastss  0x4dfc(%rip),%ymm12        # 63cc <_sk_callback_avx+0x1e5>
+  .byte  196,98,125,24,37,24,79,0,0          // vbroadcastss  0x4f18(%rip),%ymm12        # 64e8 <_sk_callback_avx+0x1e5>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,53,242,77,0,0         // vbroadcastss  0x4df2(%rip),%ymm14        # 63d0 <_sk_callback_avx+0x1e9>
+  .byte  196,98,125,24,53,14,79,0,0          // vbroadcastss  0x4f0e(%rip),%ymm14        # 64ec <_sk_callback_avx+0x1e9>
   .byte  196,193,124,194,198,1               // vcmpltps      %ymm14,%ymm0,%ymm0
   .byte  196,195,45,74,193,0                 // vblendvps     %ymm0,%ymm9,%ymm10,%ymm0
   .byte  196,65,116,89,200                   // vmulps        %ymm8,%ymm1,%ymm9
@@ -15393,20 +15393,20 @@ HIDDEN _sk_to_srgb_avx
 FUNCTION(_sk_to_srgb_avx)
 _sk_to_srgb_avx:
   .byte  197,124,82,200                      // vrsqrtps      %ymm0,%ymm9
-  .byte  196,98,125,24,5,135,77,0,0          // vbroadcastss  0x4d87(%rip),%ymm8        # 63d4 <_sk_callback_avx+0x1ed>
+  .byte  196,98,125,24,5,163,78,0,0          // vbroadcastss  0x4ea3(%rip),%ymm8        # 64f0 <_sk_callback_avx+0x1ed>
   .byte  196,65,124,89,208                   // vmulps        %ymm8,%ymm0,%ymm10
-  .byte  196,98,125,24,29,125,77,0,0         // vbroadcastss  0x4d7d(%rip),%ymm11        # 63d8 <_sk_callback_avx+0x1f1>
+  .byte  196,98,125,24,29,153,78,0,0         // vbroadcastss  0x4e99(%rip),%ymm11        # 64f4 <_sk_callback_avx+0x1f1>
   .byte  196,65,52,89,227                    // vmulps        %ymm11,%ymm9,%ymm12
-  .byte  196,98,125,24,45,115,77,0,0         // vbroadcastss  0x4d73(%rip),%ymm13        # 63dc <_sk_callback_avx+0x1f5>
+  .byte  196,98,125,24,45,143,78,0,0         // vbroadcastss  0x4e8f(%rip),%ymm13        # 64f8 <_sk_callback_avx+0x1f5>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,52,89,228                    // vmulps        %ymm12,%ymm9,%ymm12
-  .byte  196,98,125,24,53,100,77,0,0         // vbroadcastss  0x4d64(%rip),%ymm14        # 63e0 <_sk_callback_avx+0x1f9>
+  .byte  196,98,125,24,53,128,78,0,0         // vbroadcastss  0x4e80(%rip),%ymm14        # 64fc <_sk_callback_avx+0x1f9>
   .byte  196,65,28,88,230                    // vaddps        %ymm14,%ymm12,%ymm12
-  .byte  196,98,125,24,61,90,77,0,0          // vbroadcastss  0x4d5a(%rip),%ymm15        # 63e4 <_sk_callback_avx+0x1fd>
+  .byte  196,98,125,24,61,118,78,0,0         // vbroadcastss  0x4e76(%rip),%ymm15        # 6500 <_sk_callback_avx+0x1fd>
   .byte  196,65,52,88,207                    // vaddps        %ymm15,%ymm9,%ymm9
   .byte  196,65,124,83,201                   // vrcpps        %ymm9,%ymm9
   .byte  196,65,52,89,204                    // vmulps        %ymm12,%ymm9,%ymm9
-  .byte  196,98,125,24,37,70,77,0,0          // vbroadcastss  0x4d46(%rip),%ymm12        # 63e8 <_sk_callback_avx+0x201>
+  .byte  196,98,125,24,37,98,78,0,0          // vbroadcastss  0x4e62(%rip),%ymm12        # 6504 <_sk_callback_avx+0x201>
   .byte  196,193,124,194,196,1               // vcmpltps      %ymm12,%ymm0,%ymm0
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  197,124,82,201                      // vrsqrtps      %ymm1,%ymm9
@@ -15443,7 +15443,7 @@ _sk_rgb_to_hsl_avx:
   .byte  197,124,93,201                      // vminps        %ymm1,%ymm0,%ymm9
   .byte  197,52,93,202                       // vminps        %ymm2,%ymm9,%ymm9
   .byte  196,65,60,92,209                    // vsubps        %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,29,172,76,0,0         // vbroadcastss  0x4cac(%rip),%ymm11        # 63ec <_sk_callback_avx+0x205>
+  .byte  196,98,125,24,29,200,77,0,0         // vbroadcastss  0x4dc8(%rip),%ymm11        # 6508 <_sk_callback_avx+0x205>
   .byte  196,65,36,94,218                    // vdivps        %ymm10,%ymm11,%ymm11
   .byte  197,116,92,226                      // vsubps        %ymm2,%ymm1,%ymm12
   .byte  196,65,28,89,227                    // vmulps        %ymm11,%ymm12,%ymm12
@@ -15453,19 +15453,19 @@ _sk_rgb_to_hsl_avx:
   .byte  196,193,108,89,211                  // vmulps        %ymm11,%ymm2,%ymm2
   .byte  197,252,92,201                      // vsubps        %ymm1,%ymm0,%ymm1
   .byte  196,193,116,89,203                  // vmulps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,133,76,0,0         // vbroadcastss  0x4c85(%rip),%ymm11        # 63f8 <_sk_callback_avx+0x211>
+  .byte  196,98,125,24,29,161,77,0,0         // vbroadcastss  0x4da1(%rip),%ymm11        # 6514 <_sk_callback_avx+0x211>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,115,76,0,0         // vbroadcastss  0x4c73(%rip),%ymm11        # 63f4 <_sk_callback_avx+0x20d>
+  .byte  196,98,125,24,29,143,77,0,0         // vbroadcastss  0x4d8f(%rip),%ymm11        # 6510 <_sk_callback_avx+0x20d>
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,227,117,74,202,224              // vblendvps     %ymm14,%ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,91,76,0,0         // vbroadcastss  0x4c5b(%rip),%ymm2        # 63f0 <_sk_callback_avx+0x209>
+  .byte  196,226,125,24,21,119,77,0,0        // vbroadcastss  0x4d77(%rip),%ymm2        # 650c <_sk_callback_avx+0x209>
   .byte  196,65,12,87,246                    // vxorps        %ymm14,%ymm14,%ymm14
   .byte  196,227,13,74,210,208               // vblendvps     %ymm13,%ymm2,%ymm14,%ymm2
   .byte  197,188,194,192,0                   // vcmpeqps      %ymm0,%ymm8,%ymm0
   .byte  196,193,108,88,212                  // vaddps        %ymm12,%ymm2,%ymm2
   .byte  196,227,117,74,194,0                // vblendvps     %ymm0,%ymm2,%ymm1,%ymm0
   .byte  196,193,60,88,201                   // vaddps        %ymm9,%ymm8,%ymm1
-  .byte  196,98,125,24,37,66,76,0,0          // vbroadcastss  0x4c42(%rip),%ymm12        # 6400 <_sk_callback_avx+0x219>
+  .byte  196,98,125,24,37,94,77,0,0          // vbroadcastss  0x4d5e(%rip),%ymm12        # 651c <_sk_callback_avx+0x219>
   .byte  196,193,116,89,212                  // vmulps        %ymm12,%ymm1,%ymm2
   .byte  197,28,194,226,1                    // vcmpltps      %ymm2,%ymm12,%ymm12
   .byte  196,65,36,92,216                    // vsubps        %ymm8,%ymm11,%ymm11
@@ -15475,7 +15475,7 @@ _sk_rgb_to_hsl_avx:
   .byte  197,172,94,201                      // vdivps        %ymm1,%ymm10,%ymm1
   .byte  196,195,125,74,198,128              // vblendvps     %ymm8,%ymm14,%ymm0,%ymm0
   .byte  196,195,117,74,206,128              // vblendvps     %ymm8,%ymm14,%ymm1,%ymm1
-  .byte  196,98,125,24,5,5,76,0,0            // vbroadcastss  0x4c05(%rip),%ymm8        # 63fc <_sk_callback_avx+0x215>
+  .byte  196,98,125,24,5,33,77,0,0           // vbroadcastss  0x4d21(%rip),%ymm8        # 6518 <_sk_callback_avx+0x215>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15492,7 +15492,7 @@ _sk_hsl_to_rgb_avx:
   .byte  197,252,17,92,36,128                // vmovups       %ymm3,-0x80(%rsp)
   .byte  197,252,40,225                      // vmovaps       %ymm1,%ymm4
   .byte  197,252,40,216                      // vmovaps       %ymm0,%ymm3
-  .byte  196,98,125,24,5,210,75,0,0          // vbroadcastss  0x4bd2(%rip),%ymm8        # 6404 <_sk_callback_avx+0x21d>
+  .byte  196,98,125,24,5,238,76,0,0          // vbroadcastss  0x4cee(%rip),%ymm8        # 6520 <_sk_callback_avx+0x21d>
   .byte  197,60,194,202,2                    // vcmpleps      %ymm2,%ymm8,%ymm9
   .byte  197,92,89,210                       // vmulps        %ymm2,%ymm4,%ymm10
   .byte  196,65,92,92,218                    // vsubps        %ymm10,%ymm4,%ymm11
@@ -15500,23 +15500,23 @@ _sk_hsl_to_rgb_avx:
   .byte  197,52,88,210                       // vaddps        %ymm2,%ymm9,%ymm10
   .byte  197,108,88,202                      // vaddps        %ymm2,%ymm2,%ymm9
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,29,172,75,0,0         // vbroadcastss  0x4bac(%rip),%ymm11        # 6408 <_sk_callback_avx+0x221>
+  .byte  196,98,125,24,29,200,76,0,0         // vbroadcastss  0x4cc8(%rip),%ymm11        # 6524 <_sk_callback_avx+0x221>
   .byte  196,65,100,88,219                   // vaddps        %ymm11,%ymm3,%ymm11
   .byte  196,67,125,8,227,1                  // vroundps      $0x1,%ymm11,%ymm12
   .byte  196,65,36,92,252                    // vsubps        %ymm12,%ymm11,%ymm15
   .byte  196,65,44,92,217                    // vsubps        %ymm9,%ymm10,%ymm11
-  .byte  196,98,125,24,37,150,75,0,0         // vbroadcastss  0x4b96(%rip),%ymm12        # 6410 <_sk_callback_avx+0x229>
+  .byte  196,98,125,24,37,178,76,0,0         // vbroadcastss  0x4cb2(%rip),%ymm12        # 652c <_sk_callback_avx+0x229>
   .byte  196,193,4,89,196                    // vmulps        %ymm12,%ymm15,%ymm0
-  .byte  196,98,125,24,45,140,75,0,0         // vbroadcastss  0x4b8c(%rip),%ymm13        # 6414 <_sk_callback_avx+0x22d>
+  .byte  196,98,125,24,45,168,76,0,0         // vbroadcastss  0x4ca8(%rip),%ymm13        # 6530 <_sk_callback_avx+0x22d>
   .byte  197,20,92,240                       // vsubps        %ymm0,%ymm13,%ymm14
   .byte  196,65,36,89,246                    // vmulps        %ymm14,%ymm11,%ymm14
   .byte  196,65,52,88,246                    // vaddps        %ymm14,%ymm9,%ymm14
-  .byte  196,226,125,24,13,109,75,0,0        // vbroadcastss  0x4b6d(%rip),%ymm1        # 640c <_sk_callback_avx+0x225>
+  .byte  196,226,125,24,13,137,76,0,0        // vbroadcastss  0x4c89(%rip),%ymm1        # 6528 <_sk_callback_avx+0x225>
   .byte  196,193,116,194,255,2               // vcmpleps      %ymm15,%ymm1,%ymm7
   .byte  196,195,13,74,249,112               // vblendvps     %ymm7,%ymm9,%ymm14,%ymm7
   .byte  196,65,60,194,247,2                 // vcmpleps      %ymm15,%ymm8,%ymm14
   .byte  196,227,45,74,255,224               // vblendvps     %ymm14,%ymm7,%ymm10,%ymm7
-  .byte  196,98,125,24,53,88,75,0,0          // vbroadcastss  0x4b58(%rip),%ymm14        # 6418 <_sk_callback_avx+0x231>
+  .byte  196,98,125,24,53,116,76,0,0         // vbroadcastss  0x4c74(%rip),%ymm14        # 6534 <_sk_callback_avx+0x231>
   .byte  196,65,12,194,255,2                 // vcmpleps      %ymm15,%ymm14,%ymm15
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -15535,7 +15535,7 @@ _sk_hsl_to_rgb_avx:
   .byte  197,164,89,247                      // vmulps        %ymm7,%ymm11,%ymm6
   .byte  197,180,88,246                      // vaddps        %ymm6,%ymm9,%ymm6
   .byte  196,227,77,74,237,0                 // vblendvps     %ymm0,%ymm5,%ymm6,%ymm5
-  .byte  196,226,125,24,5,250,74,0,0         // vbroadcastss  0x4afa(%rip),%ymm0        # 641c <_sk_callback_avx+0x235>
+  .byte  196,226,125,24,5,22,76,0,0          // vbroadcastss  0x4c16(%rip),%ymm0        # 6538 <_sk_callback_avx+0x235>
   .byte  197,228,88,192                      // vaddps        %ymm0,%ymm3,%ymm0
   .byte  196,227,125,8,216,1                 // vroundps      $0x1,%ymm0,%ymm3
   .byte  197,252,92,195                      // vsubps        %ymm3,%ymm0,%ymm0
@@ -15594,7 +15594,7 @@ _sk_scale_u8_avx:
   .byte  196,66,121,49,192                   // vpmovzxbd     %xmm8,%xmm8
   .byte  196,67,53,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,35,74,0,0          // vbroadcastss  0x4a23(%rip),%ymm9        # 6420 <_sk_callback_avx+0x239>
+  .byte  196,98,125,24,13,63,75,0,0          // vbroadcastss  0x4b3f(%rip),%ymm9        # 653c <_sk_callback_avx+0x239>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -15653,7 +15653,7 @@ _sk_lerp_u8_avx:
   .byte  196,66,121,49,192                   // vpmovzxbd     %xmm8,%xmm8
   .byte  196,67,53,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,111,73,0,0         // vbroadcastss  0x496f(%rip),%ymm9        # 6424 <_sk_callback_avx+0x23d>
+  .byte  196,98,125,24,13,139,74,0,0         // vbroadcastss  0x4a8b(%rip),%ymm9        # 6540 <_sk_callback_avx+0x23d>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
@@ -15696,20 +15696,20 @@ _sk_lerp_565_avx:
   .byte  196,65,57,105,201                   // vpunpckhwd    %xmm9,%xmm8,%xmm9
   .byte  196,66,121,51,192                   // vpmovzxwd     %xmm8,%xmm8
   .byte  196,67,61,24,193,1                  // vinsertf128   $0x1,%xmm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,217,72,0,0         // vbroadcastss  0x48d9(%rip),%ymm9        # 6428 <_sk_callback_avx+0x241>
+  .byte  196,98,125,24,13,245,73,0,0         // vbroadcastss  0x49f5(%rip),%ymm9        # 6544 <_sk_callback_avx+0x241>
   .byte  196,65,60,84,201                    // vandps        %ymm9,%ymm8,%ymm9
   .byte  196,65,124,91,201                   // vcvtdq2ps     %ymm9,%ymm9
-  .byte  196,98,125,24,21,202,72,0,0         // vbroadcastss  0x48ca(%rip),%ymm10        # 642c <_sk_callback_avx+0x245>
+  .byte  196,98,125,24,21,230,73,0,0         // vbroadcastss  0x49e6(%rip),%ymm10        # 6548 <_sk_callback_avx+0x245>
   .byte  196,65,52,89,202                    // vmulps        %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,192,72,0,0         // vbroadcastss  0x48c0(%rip),%ymm10        # 6430 <_sk_callback_avx+0x249>
+  .byte  196,98,125,24,21,220,73,0,0         // vbroadcastss  0x49dc(%rip),%ymm10        # 654c <_sk_callback_avx+0x249>
   .byte  196,65,60,84,210                    // vandps        %ymm10,%ymm8,%ymm10
   .byte  196,65,124,91,210                   // vcvtdq2ps     %ymm10,%ymm10
-  .byte  196,98,125,24,29,177,72,0,0         // vbroadcastss  0x48b1(%rip),%ymm11        # 6434 <_sk_callback_avx+0x24d>
+  .byte  196,98,125,24,29,205,73,0,0         // vbroadcastss  0x49cd(%rip),%ymm11        # 6550 <_sk_callback_avx+0x24d>
   .byte  196,65,44,89,211                    // vmulps        %ymm11,%ymm10,%ymm10
-  .byte  196,98,125,24,29,167,72,0,0         // vbroadcastss  0x48a7(%rip),%ymm11        # 6438 <_sk_callback_avx+0x251>
+  .byte  196,98,125,24,29,195,73,0,0         // vbroadcastss  0x49c3(%rip),%ymm11        # 6554 <_sk_callback_avx+0x251>
   .byte  196,65,60,84,195                    // vandps        %ymm11,%ymm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,29,152,72,0,0         // vbroadcastss  0x4898(%rip),%ymm11        # 643c <_sk_callback_avx+0x255>
+  .byte  196,98,125,24,29,180,73,0,0         // vbroadcastss  0x49b4(%rip),%ymm11        # 6558 <_sk_callback_avx+0x255>
   .byte  196,65,60,89,195                    // vmulps        %ymm11,%ymm8,%ymm8
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
@@ -15756,7 +15756,7 @@ _sk_lerp_565_avx:
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  233,255,255,255,225                 // jmpq          ffffffffe2001c74 <_sk_callback_avx+0xffffffffe1ffba8d>
+  .byte  233,255,255,255,225                 // jmpq          ffffffffe2001c74 <_sk_callback_avx+0xffffffffe1ffb971>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
@@ -15778,21 +15778,19 @@ HIDDEN _sk_load_tables_avx
 .globl _sk_load_tables_avx
 FUNCTION(_sk_load_tables_avx)
 _sk_load_tables_avx:
-  .byte  73,137,200                          // mov           %rcx,%r8
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
-  .byte  76,3,8                              // add           (%rax),%r9
-  .byte  77,133,192                          // test          %r8,%r8
-  .byte  15,133,26,2,0,0                     // jne           1ebb <_sk_load_tables_avx+0x233>
-  .byte  196,65,124,16,17                    // vmovups       (%r9),%ymm10
+  .byte  76,139,0                            // mov           (%rax),%r8
+  .byte  72,133,201                          // test          %rcx,%rcx
+  .byte  15,133,26,2,0,0                     // jne           1eb0 <_sk_load_tables_avx+0x228>
+  .byte  196,65,124,16,4,184                 // vmovups       (%r8,%rdi,4),%ymm8
   .byte  85                                  // push          %rbp
   .byte  65,87                               // push          %r15
   .byte  65,86                               // push          %r14
   .byte  65,85                               // push          %r13
   .byte  65,84                               // push          %r12
   .byte  83                                  // push          %rbx
-  .byte  197,124,40,13,232,74,0,0            // vmovaps       0x4ae8(%rip),%ymm9        # 67a0 <_sk_callback_avx+0x5b9>
-  .byte  196,193,44,84,193                   // vandps        %ymm9,%ymm10,%ymm0
+  .byte  197,124,40,13,146,75,0,0            // vmovaps       0x4b92(%rip),%ymm9        # 6840 <_sk_callback_avx+0x53d>
+  .byte  196,193,60,84,193                   // vandps        %ymm9,%ymm8,%ymm0
   .byte  196,193,249,126,193                 // vmovq         %xmm0,%r9
   .byte  69,137,203                          // mov           %r9d,%r11d
   .byte  196,195,249,22,194,1                // vpextrq       $0x1,%xmm0,%r10
@@ -15800,26 +15798,26 @@ _sk_load_tables_avx:
   .byte  73,193,234,32                       // shr           $0x20,%r10
   .byte  73,193,233,32                       // shr           $0x20,%r9
   .byte  196,227,125,25,192,1                // vextractf128  $0x1,%ymm0,%xmm0
-  .byte  196,225,249,126,195                 // vmovq         %xmm0,%rbx
-  .byte  65,137,223                          // mov           %ebx,%r15d
-  .byte  196,227,249,22,193,1                // vpextrq       $0x1,%xmm0,%rcx
-  .byte  65,137,205                          // mov           %ecx,%r13d
-  .byte  72,193,233,32                       // shr           $0x20,%rcx
+  .byte  196,193,249,126,196                 // vmovq         %xmm0,%r12
+  .byte  69,137,231                          // mov           %r12d,%r15d
+  .byte  196,227,249,22,195,1                // vpextrq       $0x1,%xmm0,%rbx
+  .byte  65,137,221                          // mov           %ebx,%r13d
   .byte  72,193,235,32                       // shr           $0x20,%rbx
+  .byte  73,193,236,32                       // shr           $0x20,%r12
   .byte  72,139,104,8                        // mov           0x8(%rax),%rbp
-  .byte  76,139,96,16                        // mov           0x10(%rax),%r12
+  .byte  76,139,64,16                        // mov           0x10(%rax),%r8
   .byte  196,161,122,16,68,189,0             // vmovss        0x0(%rbp,%r15,4),%xmm0
-  .byte  196,227,121,33,68,157,0,16          // vinsertps     $0x10,0x0(%rbp,%rbx,4),%xmm0,%xmm0
+  .byte  196,163,121,33,68,165,0,16          // vinsertps     $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
   .byte  196,163,121,33,68,173,0,32          // vinsertps     $0x20,0x0(%rbp,%r13,4),%xmm0,%xmm0
-  .byte  196,227,121,33,68,141,0,48          // vinsertps     $0x30,0x0(%rbp,%rcx,4),%xmm0,%xmm0
+  .byte  196,227,121,33,68,157,0,48          // vinsertps     $0x30,0x0(%rbp,%rbx,4),%xmm0,%xmm0
   .byte  196,161,122,16,76,157,0             // vmovss        0x0(%rbp,%r11,4),%xmm1
   .byte  196,163,113,33,76,141,0,16          // vinsertps     $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
   .byte  196,163,113,33,76,181,0,32          // vinsertps     $0x20,0x0(%rbp,%r14,4),%xmm1,%xmm1
   .byte  196,163,113,33,76,149,0,48          // vinsertps     $0x30,0x0(%rbp,%r10,4),%xmm1,%xmm1
   .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
-  .byte  196,193,113,114,210,8               // vpsrld        $0x8,%xmm10,%xmm1
-  .byte  196,67,125,25,208,1                 // vextractf128  $0x1,%ymm10,%xmm8
-  .byte  196,193,105,114,208,8               // vpsrld        $0x8,%xmm8,%xmm2
+  .byte  196,193,113,114,208,8               // vpsrld        $0x8,%xmm8,%xmm1
+  .byte  196,67,125,25,194,1                 // vextractf128  $0x1,%ymm8,%xmm10
+  .byte  196,193,105,114,210,8               // vpsrld        $0x8,%xmm10,%xmm2
   .byte  196,227,117,24,202,1                // vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   .byte  196,193,116,84,201                  // vandps        %ymm9,%ymm1,%ymm1
   .byte  196,193,249,126,201                 // vmovq         %xmm1,%r9
@@ -15829,36 +15827,36 @@ _sk_load_tables_avx:
   .byte  73,193,234,32                       // shr           $0x20,%r10
   .byte  73,193,233,32                       // shr           $0x20,%r9
   .byte  196,227,125,25,201,1                // vextractf128  $0x1,%ymm1,%xmm1
-  .byte  196,225,249,126,203                 // vmovq         %xmm1,%rbx
-  .byte  65,137,223                          // mov           %ebx,%r15d
-  .byte  196,227,249,22,205,1                // vpextrq       $0x1,%xmm1,%rbp
-  .byte  137,233                             // mov           %ebp,%ecx
-  .byte  72,193,237,32                       // shr           $0x20,%rbp
+  .byte  196,225,249,126,205                 // vmovq         %xmm1,%rbp
+  .byte  65,137,239                          // mov           %ebp,%r15d
+  .byte  196,227,249,22,203,1                // vpextrq       $0x1,%xmm1,%rbx
+  .byte  65,137,220                          // mov           %ebx,%r12d
   .byte  72,193,235,32                       // shr           $0x20,%rbx
-  .byte  196,129,122,16,12,188               // vmovss        (%r12,%r15,4),%xmm1
-  .byte  196,195,113,33,12,156,16            // vinsertps     $0x10,(%r12,%rbx,4),%xmm1,%xmm1
-  .byte  196,193,122,16,20,140               // vmovss        (%r12,%rcx,4),%xmm2
+  .byte  72,193,237,32                       // shr           $0x20,%rbp
+  .byte  196,129,122,16,12,184               // vmovss        (%r8,%r15,4),%xmm1
+  .byte  196,195,113,33,12,168,16            // vinsertps     $0x10,(%r8,%rbp,4),%xmm1,%xmm1
+  .byte  196,129,122,16,20,160               // vmovss        (%r8,%r12,4),%xmm2
   .byte  196,227,113,33,202,32               // vinsertps     $0x20,%xmm2,%xmm1,%xmm1
-  .byte  196,193,122,16,20,172               // vmovss        (%r12,%rbp,4),%xmm2
+  .byte  196,193,122,16,20,152               // vmovss        (%r8,%rbx,4),%xmm2
   .byte  196,227,113,33,202,48               // vinsertps     $0x30,%xmm2,%xmm1,%xmm1
-  .byte  196,129,122,16,20,156               // vmovss        (%r12,%r11,4),%xmm2
-  .byte  196,131,105,33,20,140,16            // vinsertps     $0x10,(%r12,%r9,4),%xmm2,%xmm2
-  .byte  196,129,122,16,28,180               // vmovss        (%r12,%r14,4),%xmm3
+  .byte  196,129,122,16,20,152               // vmovss        (%r8,%r11,4),%xmm2
+  .byte  196,131,105,33,20,136,16            // vinsertps     $0x10,(%r8,%r9,4),%xmm2,%xmm2
+  .byte  196,129,122,16,28,176               // vmovss        (%r8,%r14,4),%xmm3
   .byte  196,227,105,33,211,32               // vinsertps     $0x20,%xmm3,%xmm2,%xmm2
-  .byte  196,129,122,16,28,148               // vmovss        (%r12,%r10,4),%xmm3
+  .byte  196,129,122,16,28,144               // vmovss        (%r8,%r10,4),%xmm3
   .byte  196,227,105,33,211,48               // vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   .byte  196,227,109,24,201,1                // vinsertf128   $0x1,%xmm1,%ymm2,%ymm1
   .byte  72,139,64,24                        // mov           0x18(%rax),%rax
-  .byte  196,193,105,114,210,16              // vpsrld        $0x10,%xmm10,%xmm2
-  .byte  196,193,97,114,208,16               // vpsrld        $0x10,%xmm8,%xmm3
+  .byte  196,193,105,114,208,16              // vpsrld        $0x10,%xmm8,%xmm2
+  .byte  196,193,97,114,210,16               // vpsrld        $0x10,%xmm10,%xmm3
   .byte  196,227,109,24,211,1                // vinsertf128   $0x1,%xmm3,%ymm2,%ymm2
   .byte  196,193,108,84,209                  // vandps        %ymm9,%ymm2,%ymm2
-  .byte  196,193,249,126,209                 // vmovq         %xmm2,%r9
-  .byte  69,137,202                          // mov           %r9d,%r10d
-  .byte  196,227,249,22,209,1                // vpextrq       $0x1,%xmm2,%rcx
-  .byte  65,137,203                          // mov           %ecx,%r11d
-  .byte  72,193,233,32                       // shr           $0x20,%rcx
+  .byte  196,193,249,126,208                 // vmovq         %xmm2,%r8
+  .byte  69,137,194                          // mov           %r8d,%r10d
+  .byte  196,195,249,22,209,1                // vpextrq       $0x1,%xmm2,%r9
+  .byte  69,137,203                          // mov           %r9d,%r11d
   .byte  73,193,233,32                       // shr           $0x20,%r9
+  .byte  73,193,232,32                       // shr           $0x20,%r8
   .byte  196,227,125,25,210,1                // vextractf128  $0x1,%ymm2,%xmm2
   .byte  196,225,249,126,213                 // vmovq         %xmm2,%rbp
   .byte  65,137,238                          // mov           %ebp,%r14d
@@ -15873,20 +15871,19 @@ _sk_load_tables_avx:
   .byte  197,250,16,28,152                   // vmovss        (%rax,%rbx,4),%xmm3
   .byte  196,99,105,33,203,48                // vinsertps     $0x30,%xmm3,%xmm2,%xmm9
   .byte  196,161,122,16,28,144               // vmovss        (%rax,%r10,4),%xmm3
-  .byte  196,163,97,33,28,136,16             // vinsertps     $0x10,(%rax,%r9,4),%xmm3,%xmm3
+  .byte  196,163,97,33,28,128,16             // vinsertps     $0x10,(%rax,%r8,4),%xmm3,%xmm3
   .byte  196,161,122,16,20,152               // vmovss        (%rax,%r11,4),%xmm2
   .byte  196,227,97,33,210,32                // vinsertps     $0x20,%xmm2,%xmm3,%xmm2
-  .byte  197,250,16,28,136                   // vmovss        (%rax,%rcx,4),%xmm3
+  .byte  196,161,122,16,28,136               // vmovss        (%rax,%r9,4),%xmm3
   .byte  196,227,105,33,211,48               // vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   .byte  196,195,109,24,209,1                // vinsertf128   $0x1,%xmm9,%ymm2,%ymm2
-  .byte  196,193,49,114,210,24               // vpsrld        $0x18,%xmm10,%xmm9
-  .byte  196,193,97,114,208,24               // vpsrld        $0x18,%xmm8,%xmm3
-  .byte  196,227,53,24,219,1                 // vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
+  .byte  196,193,57,114,208,24               // vpsrld        $0x18,%xmm8,%xmm8
+  .byte  196,193,97,114,210,24               // vpsrld        $0x18,%xmm10,%xmm3
+  .byte  196,227,61,24,219,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,155,69,0,0          // vbroadcastss  0x459b(%rip),%ymm8        # 6440 <_sk_callback_avx+0x259>
+  .byte  196,98,125,24,5,191,70,0,0          // vbroadcastss  0x46bf(%rip),%ymm8        # 655c <_sk_callback_avx+0x259>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  76,137,193                          // mov           %r8,%rcx
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,93                               // pop           %r13
@@ -15894,20 +15891,57 @@ _sk_load_tables_avx:
   .byte  65,95                               // pop           %r15
   .byte  93                                  // pop           %rbp
   .byte  255,224                             // jmpq          *%rax
-  .byte  185,8,0,0,0                         // mov           $0x8,%ecx
-  .byte  68,41,193                           // sub           %r8d,%ecx
-  .byte  192,225,3                           // shl           $0x3,%cl
-  .byte  73,199,194,255,255,255,255          // mov           $0xffffffffffffffff,%r10
-  .byte  73,211,234                          // shr           %cl,%r10
-  .byte  196,193,249,110,194                 // vmovq         %r10,%xmm0
-  .byte  196,226,121,48,192                  // vpmovzxbw     %xmm0,%xmm0
-  .byte  196,226,121,0,13,61,72,0,0          // vpshufb       0x483d(%rip),%xmm0,%xmm1        # 6720 <_sk_callback_avx+0x539>
-  .byte  196,226,121,33,201                  // vpmovsxbd     %xmm1,%xmm1
-  .byte  196,226,121,0,5,63,72,0,0           // vpshufb       0x483f(%rip),%xmm0,%xmm0        # 6730 <_sk_callback_avx+0x549>
-  .byte  196,226,121,33,192                  // vpmovsxbd     %xmm0,%xmm0
-  .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
-  .byte  196,66,125,44,17                    // vmaskmovps    (%r9),%ymm0,%ymm10
-  .byte  233,160,253,255,255                 // jmpq          1ca6 <_sk_load_tables_avx+0x1e>
+  .byte  65,137,201                          // mov           %ecx,%r9d
+  .byte  65,128,225,7                        // and           $0x7,%r9b
+  .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
+  .byte  65,254,201                          // dec           %r9b
+  .byte  65,128,249,6                        // cmp           $0x6,%r9b
+  .byte  15,135,211,253,255,255              // ja            1c9c <_sk_load_tables_avx+0x14>
+  .byte  69,15,182,201                       // movzbl        %r9b,%r9d
+  .byte  76,141,21,140,0,0,0                 // lea           0x8c(%rip),%r10        # 1f60 <_sk_load_tables_avx+0x2d8>
+  .byte  79,99,12,138                        // movslq        (%r10,%r9,4),%r9
+  .byte  77,1,209                            // add           %r10,%r9
+  .byte  65,255,225                          // jmpq          *%r9
+  .byte  196,193,121,110,68,184,24           // vmovd         0x18(%r8,%rdi,4),%xmm0
+  .byte  197,249,112,192,68                  // vpshufd       $0x44,%xmm0,%xmm0
+  .byte  196,227,125,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
+  .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
+  .byte  196,99,117,12,192,64                // vblendps      $0x40,%ymm0,%ymm1,%ymm8
+  .byte  196,99,125,25,192,1                 // vextractf128  $0x1,%ymm8,%xmm0
+  .byte  196,195,121,34,68,184,20,1          // vpinsrd       $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
+  .byte  196,99,61,24,192,1                  // vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
+  .byte  196,99,125,25,192,1                 // vextractf128  $0x1,%ymm8,%xmm0
+  .byte  196,195,121,34,68,184,16,0          // vpinsrd       $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
+  .byte  196,99,61,24,192,1                  // vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
+  .byte  196,195,57,34,68,184,12,3           // vpinsrd       $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
+  .byte  196,99,61,12,192,15                 // vblendps      $0xf,%ymm0,%ymm8,%ymm8
+  .byte  196,195,57,34,68,184,8,2            // vpinsrd       $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
+  .byte  196,99,61,12,192,15                 // vblendps      $0xf,%ymm0,%ymm8,%ymm8
+  .byte  196,195,57,34,68,184,4,1            // vpinsrd       $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
+  .byte  196,99,61,12,192,15                 // vblendps      $0xf,%ymm0,%ymm8,%ymm8
+  .byte  196,195,57,34,4,184,0               // vpinsrd       $0x0,(%r8,%rdi,4),%xmm8,%xmm0
+  .byte  196,99,61,12,192,15                 // vblendps      $0xf,%ymm0,%ymm8,%ymm8
+  .byte  233,62,253,255,255                  // jmpq          1c9c <_sk_load_tables_avx+0x14>
+  .byte  102,144                             // xchg          %ax,%ax
+  .byte  236                                 // in            (%dx),%al
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  222,255                             // fdivrp        %st,%st(7)
+  .byte  255                                 // (bad)
+  .byte  255,208                             // callq         *%rax
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255,194                             // inc           %edx
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255,174,255,255,255,154             // ljmp          *-0x65000001(%rsi)
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  126,255                             // jle           1f79 <_sk_load_tables_avx+0x2f1>
+  .byte  255                                 // (bad)
+  .byte  255                                 // .byte         0xff
 
 HIDDEN _sk_load_tables_u16_be_avx
 .globl _sk_load_tables_u16_be_avx
@@ -15917,7 +15951,7 @@ _sk_load_tables_u16_be_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,113,2,0,0                    // jne           218d <_sk_load_tables_u16_be_avx+0x287>
+  .byte  15,133,113,2,0,0                    // jne           2203 <_sk_load_tables_u16_be_avx+0x287>
   .byte  196,1,121,16,4,72                   // vmovupd       (%r8,%r9,2),%xmm8
   .byte  196,129,121,16,84,72,16             // vmovupd       0x10(%r8,%r9,2),%xmm2
   .byte  196,129,121,16,92,72,32             // vmovupd       0x20(%r8,%r9,2),%xmm3
@@ -15939,7 +15973,7 @@ _sk_load_tables_u16_be_avx:
   .byte  197,177,108,208                     // vpunpcklqdq   %xmm0,%xmm9,%xmm2
   .byte  197,177,109,200                     // vpunpckhqdq   %xmm0,%xmm9,%xmm1
   .byte  196,65,57,108,212                   // vpunpcklqdq   %xmm12,%xmm8,%xmm10
-  .byte  197,121,111,29,200,71,0,0           // vmovdqa       0x47c8(%rip),%xmm11        # 6740 <_sk_callback_avx+0x559>
+  .byte  197,121,111,29,210,72,0,0           // vmovdqa       0x48d2(%rip),%xmm11        # 68c0 <_sk_callback_avx+0x5bd>
   .byte  196,193,105,219,195                 // vpand         %xmm11,%xmm2,%xmm0
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  196,193,121,105,209                 // vpunpckhwd    %xmm9,%xmm0,%xmm2
@@ -16038,7 +16072,7 @@ _sk_load_tables_u16_be_avx:
   .byte  196,226,121,51,219                  // vpmovzxwd     %xmm3,%xmm3
   .byte  196,195,101,24,216,1                // vinsertf128   $0x1,%xmm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,202,66,0,0          // vbroadcastss  0x42ca(%rip),%ymm8        # 6444 <_sk_callback_avx+0x25d>
+  .byte  196,98,125,24,5,112,67,0,0          // vbroadcastss  0x4370(%rip),%ymm8        # 6560 <_sk_callback_avx+0x25d>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -16051,29 +16085,29 @@ _sk_load_tables_u16_be_avx:
   .byte  196,1,123,16,4,72                   // vmovsd        (%r8,%r9,2),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,85                              // je            21f3 <_sk_load_tables_u16_be_avx+0x2ed>
+  .byte  116,85                              // je            2269 <_sk_load_tables_u16_be_avx+0x2ed>
   .byte  196,1,57,22,68,72,8                 // vmovhpd       0x8(%r8,%r9,2),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,72                              // jb            21f3 <_sk_load_tables_u16_be_avx+0x2ed>
+  .byte  114,72                              // jb            2269 <_sk_load_tables_u16_be_avx+0x2ed>
   .byte  196,129,123,16,84,72,16             // vmovsd        0x10(%r8,%r9,2),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,72                              // je            2200 <_sk_load_tables_u16_be_avx+0x2fa>
+  .byte  116,72                              // je            2276 <_sk_load_tables_u16_be_avx+0x2fa>
   .byte  196,129,105,22,84,72,24             // vmovhpd       0x18(%r8,%r9,2),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,59                              // jb            2200 <_sk_load_tables_u16_be_avx+0x2fa>
+  .byte  114,59                              // jb            2276 <_sk_load_tables_u16_be_avx+0x2fa>
   .byte  196,129,123,16,92,72,32             // vmovsd        0x20(%r8,%r9,2),%xmm3
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,97,253,255,255               // je            1f37 <_sk_load_tables_u16_be_avx+0x31>
+  .byte  15,132,97,253,255,255               // je            1fad <_sk_load_tables_u16_be_avx+0x31>
   .byte  196,129,97,22,92,72,40              // vmovhpd       0x28(%r8,%r9,2),%xmm3,%xmm3
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,80,253,255,255               // jb            1f37 <_sk_load_tables_u16_be_avx+0x31>
+  .byte  15,130,80,253,255,255               // jb            1fad <_sk_load_tables_u16_be_avx+0x31>
   .byte  196,1,122,126,76,72,48              // vmovq         0x30(%r8,%r9,2),%xmm9
-  .byte  233,68,253,255,255                  // jmpq          1f37 <_sk_load_tables_u16_be_avx+0x31>
+  .byte  233,68,253,255,255                  // jmpq          1fad <_sk_load_tables_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,55,253,255,255                  // jmpq          1f37 <_sk_load_tables_u16_be_avx+0x31>
+  .byte  233,55,253,255,255                  // jmpq          1fad <_sk_load_tables_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
-  .byte  233,46,253,255,255                  // jmpq          1f37 <_sk_load_tables_u16_be_avx+0x31>
+  .byte  233,46,253,255,255                  // jmpq          1fad <_sk_load_tables_u16_be_avx+0x31>
 
 HIDDEN _sk_load_tables_rgb_u16_be_avx
 .globl _sk_load_tables_rgb_u16_be_avx
@@ -16083,7 +16117,7 @@ _sk_load_tables_rgb_u16_be_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,127                       // lea           (%rdi,%rdi,2),%r9
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,93,2,0,0                     // jne           2478 <_sk_load_tables_rgb_u16_be_avx+0x26f>
+  .byte  15,133,93,2,0,0                     // jne           24ee <_sk_load_tables_rgb_u16_be_avx+0x26f>
   .byte  196,129,122,111,4,72                // vmovdqu       (%r8,%r9,2),%xmm0
   .byte  196,129,122,111,84,72,12            // vmovdqu       0xc(%r8,%r9,2),%xmm2
   .byte  196,129,122,111,76,72,24            // vmovdqu       0x18(%r8,%r9,2),%xmm1
@@ -16110,7 +16144,7 @@ _sk_load_tables_rgb_u16_be_avx:
   .byte  197,185,108,202                     // vpunpcklqdq   %xmm2,%xmm8,%xmm1
   .byte  197,185,109,210                     // vpunpckhqdq   %xmm2,%xmm8,%xmm2
   .byte  197,121,108,195                     // vpunpcklqdq   %xmm3,%xmm0,%xmm8
-  .byte  197,121,111,13,193,68,0,0           // vmovdqa       0x44c1(%rip),%xmm9        # 6750 <_sk_callback_avx+0x569>
+  .byte  197,121,111,13,203,69,0,0           // vmovdqa       0x45cb(%rip),%xmm9        # 68d0 <_sk_callback_avx+0x5cd>
   .byte  196,193,113,219,193                 // vpand         %xmm9,%xmm1,%xmm0
   .byte  196,65,41,239,210                   // vpxor         %xmm10,%xmm10,%xmm10
   .byte  196,193,121,105,202                 // vpunpckhwd    %xmm10,%xmm0,%xmm1
@@ -16202,7 +16236,7 @@ _sk_load_tables_rgb_u16_be_avx:
   .byte  196,227,105,33,211,48               // vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   .byte  196,195,109,24,208,1                // vinsertf128   $0x1,%xmm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,220,63,0,0        // vbroadcastss  0x3fdc(%rip),%ymm3        # 6448 <_sk_callback_avx+0x261>
+  .byte  196,226,125,24,29,130,64,0,0        // vbroadcastss  0x4082(%rip),%ymm3        # 6564 <_sk_callback_avx+0x261>
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,93                               // pop           %r13
@@ -16213,36 +16247,36 @@ _sk_load_tables_rgb_u16_be_avx:
   .byte  196,129,121,110,4,72                // vmovd         (%r8,%r9,2),%xmm0
   .byte  196,129,121,196,68,72,4,2           // vpinsrw       $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  117,5                               // jne           2491 <_sk_load_tables_rgb_u16_be_avx+0x288>
-  .byte  233,190,253,255,255                 // jmpq          224f <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  117,5                               // jne           2507 <_sk_load_tables_rgb_u16_be_avx+0x288>
+  .byte  233,190,253,255,255                 // jmpq          22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
   .byte  196,129,121,110,76,72,6             // vmovd         0x6(%r8,%r9,2),%xmm1
   .byte  196,1,113,196,68,72,10,2            // vpinsrw       $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,26                              // jb            24c0 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
+  .byte  114,26                              // jb            2536 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
   .byte  196,129,121,110,76,72,12            // vmovd         0xc(%r8,%r9,2),%xmm1
   .byte  196,129,113,196,84,72,16,2          // vpinsrw       $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  117,10                              // jne           24c5 <_sk_load_tables_rgb_u16_be_avx+0x2bc>
-  .byte  233,143,253,255,255                 // jmpq          224f <_sk_load_tables_rgb_u16_be_avx+0x46>
-  .byte  233,138,253,255,255                 // jmpq          224f <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           253b <_sk_load_tables_rgb_u16_be_avx+0x2bc>
+  .byte  233,143,253,255,255                 // jmpq          22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,138,253,255,255                 // jmpq          22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
   .byte  196,129,121,110,76,72,18            // vmovd         0x12(%r8,%r9,2),%xmm1
   .byte  196,1,113,196,76,72,22,2            // vpinsrw       $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,26                              // jb            24f4 <_sk_load_tables_rgb_u16_be_avx+0x2eb>
+  .byte  114,26                              // jb            256a <_sk_load_tables_rgb_u16_be_avx+0x2eb>
   .byte  196,129,121,110,76,72,24            // vmovd         0x18(%r8,%r9,2),%xmm1
   .byte  196,129,113,196,76,72,28,2          // vpinsrw       $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  117,10                              // jne           24f9 <_sk_load_tables_rgb_u16_be_avx+0x2f0>
-  .byte  233,91,253,255,255                  // jmpq          224f <_sk_load_tables_rgb_u16_be_avx+0x46>
-  .byte  233,86,253,255,255                  // jmpq          224f <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           256f <_sk_load_tables_rgb_u16_be_avx+0x2f0>
+  .byte  233,91,253,255,255                  // jmpq          22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,86,253,255,255                  // jmpq          22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
   .byte  196,129,121,110,92,72,30            // vmovd         0x1e(%r8,%r9,2),%xmm3
   .byte  196,1,97,196,92,72,34,2             // vpinsrw       $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,20                              // jb            2522 <_sk_load_tables_rgb_u16_be_avx+0x319>
+  .byte  114,20                              // jb            2598 <_sk_load_tables_rgb_u16_be_avx+0x319>
   .byte  196,129,121,110,92,72,36            // vmovd         0x24(%r8,%r9,2),%xmm3
   .byte  196,129,97,196,92,72,40,2           // vpinsrw       $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
-  .byte  233,45,253,255,255                  // jmpq          224f <_sk_load_tables_rgb_u16_be_avx+0x46>
-  .byte  233,40,253,255,255                  // jmpq          224f <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,45,253,255,255                  // jmpq          22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  .byte  233,40,253,255,255                  // jmpq          22c5 <_sk_load_tables_rgb_u16_be_avx+0x46>
 
 HIDDEN _sk_byte_tables_avx
 .globl _sk_byte_tables_avx
@@ -16255,7 +16289,7 @@ _sk_byte_tables_avx:
   .byte  65,84                               // push          %r12
   .byte  83                                  // push          %rbx
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,16,63,0,0           // vbroadcastss  0x3f10(%rip),%ymm8        # 644c <_sk_callback_avx+0x265>
+  .byte  196,98,125,24,5,182,63,0,0          // vbroadcastss  0x3fb6(%rip),%ymm8        # 6568 <_sk_callback_avx+0x265>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,195,249,22,192,1                // vpextrq       $0x1,%xmm0,%r8
@@ -16292,7 +16326,7 @@ _sk_byte_tables_avx:
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,53,24,192,1                 // vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,94,62,0,0          // vbroadcastss  0x3e5e(%rip),%ymm9        # 6450 <_sk_callback_avx+0x269>
+  .byte  196,98,125,24,13,4,63,0,0           // vbroadcastss  0x3f04(%rip),%ymm9        # 656c <_sk_callback_avx+0x269>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -16454,7 +16488,7 @@ _sk_byte_tables_rgb_avx:
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,53,24,192,1                 // vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,132,59,0,0         // vbroadcastss  0x3b84(%rip),%ymm9        # 6454 <_sk_callback_avx+0x26d>
+  .byte  196,98,125,24,13,42,60,0,0          // vbroadcastss  0x3c2a(%rip),%ymm9        # 6570 <_sk_callback_avx+0x26d>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -16751,36 +16785,36 @@ _sk_parametric_r_avx:
   .byte  196,193,124,88,195                  // vaddps        %ymm11,%ymm0,%ymm0
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,216                      // vcvtdq2ps     %ymm0,%ymm11
-  .byte  196,98,125,24,37,226,54,0,0         // vbroadcastss  0x36e2(%rip),%ymm12        # 6458 <_sk_callback_avx+0x271>
+  .byte  196,98,125,24,37,136,55,0,0         // vbroadcastss  0x3788(%rip),%ymm12        # 6574 <_sk_callback_avx+0x271>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,216,54,0,0         // vbroadcastss  0x36d8(%rip),%ymm12        # 645c <_sk_callback_avx+0x275>
+  .byte  196,98,125,24,37,126,55,0,0         // vbroadcastss  0x377e(%rip),%ymm12        # 6578 <_sk_callback_avx+0x275>
   .byte  196,193,124,84,196                  // vandps        %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,206,54,0,0         // vbroadcastss  0x36ce(%rip),%ymm12        # 6460 <_sk_callback_avx+0x279>
+  .byte  196,98,125,24,37,116,55,0,0         // vbroadcastss  0x3774(%rip),%ymm12        # 657c <_sk_callback_avx+0x279>
   .byte  196,193,124,86,196                  // vorps         %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,196,54,0,0         // vbroadcastss  0x36c4(%rip),%ymm12        # 6464 <_sk_callback_avx+0x27d>
+  .byte  196,98,125,24,37,106,55,0,0         // vbroadcastss  0x376a(%rip),%ymm12        # 6580 <_sk_callback_avx+0x27d>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,186,54,0,0         // vbroadcastss  0x36ba(%rip),%ymm12        # 6468 <_sk_callback_avx+0x281>
+  .byte  196,98,125,24,37,96,55,0,0          // vbroadcastss  0x3760(%rip),%ymm12        # 6584 <_sk_callback_avx+0x281>
   .byte  196,65,124,89,228                   // vmulps        %ymm12,%ymm0,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,171,54,0,0         // vbroadcastss  0x36ab(%rip),%ymm12        # 646c <_sk_callback_avx+0x285>
+  .byte  196,98,125,24,37,81,55,0,0          // vbroadcastss  0x3751(%rip),%ymm12        # 6588 <_sk_callback_avx+0x285>
   .byte  196,193,124,88,196                  // vaddps        %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,161,54,0,0         // vbroadcastss  0x36a1(%rip),%ymm12        # 6470 <_sk_callback_avx+0x289>
+  .byte  196,98,125,24,37,71,55,0,0          // vbroadcastss  0x3747(%rip),%ymm12        # 658c <_sk_callback_avx+0x289>
   .byte  197,156,94,192                      // vdivps        %ymm0,%ymm12,%ymm0
   .byte  197,164,92,192                      // vsubps        %ymm0,%ymm11,%ymm0
   .byte  197,172,89,192                      // vmulps        %ymm0,%ymm10,%ymm0
   .byte  196,99,125,8,208,1                  // vroundps      $0x1,%ymm0,%ymm10
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  196,98,125,24,29,133,54,0,0         // vbroadcastss  0x3685(%rip),%ymm11        # 6474 <_sk_callback_avx+0x28d>
+  .byte  196,98,125,24,29,43,55,0,0          // vbroadcastss  0x372b(%rip),%ymm11        # 6590 <_sk_callback_avx+0x28d>
   .byte  196,193,124,88,195                  // vaddps        %ymm11,%ymm0,%ymm0
-  .byte  196,98,125,24,29,123,54,0,0         // vbroadcastss  0x367b(%rip),%ymm11        # 6478 <_sk_callback_avx+0x291>
+  .byte  196,98,125,24,29,33,55,0,0          // vbroadcastss  0x3721(%rip),%ymm11        # 6594 <_sk_callback_avx+0x291>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,124,92,195                  // vsubps        %ymm11,%ymm0,%ymm0
-  .byte  196,98,125,24,29,108,54,0,0         // vbroadcastss  0x366c(%rip),%ymm11        # 647c <_sk_callback_avx+0x295>
+  .byte  196,98,125,24,29,18,55,0,0          // vbroadcastss  0x3712(%rip),%ymm11        # 6598 <_sk_callback_avx+0x295>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,98,54,0,0          // vbroadcastss  0x3662(%rip),%ymm11        # 6480 <_sk_callback_avx+0x299>
+  .byte  196,98,125,24,29,8,55,0,0           // vbroadcastss  0x3708(%rip),%ymm11        # 659c <_sk_callback_avx+0x299>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,124,88,194                  // vaddps        %ymm10,%ymm0,%ymm0
-  .byte  196,98,125,24,21,83,54,0,0          // vbroadcastss  0x3653(%rip),%ymm10        # 6484 <_sk_callback_avx+0x29d>
+  .byte  196,98,125,24,21,249,54,0,0         // vbroadcastss  0x36f9(%rip),%ymm10        # 65a0 <_sk_callback_avx+0x29d>
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -16788,7 +16822,7 @@ _sk_parametric_r_avx:
   .byte  196,195,125,74,193,128              // vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,124,95,192                  // vmaxps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,42,54,0,0           // vbroadcastss  0x362a(%rip),%ymm8        # 6488 <_sk_callback_avx+0x2a1>
+  .byte  196,98,125,24,5,208,54,0,0          // vbroadcastss  0x36d0(%rip),%ymm8        # 65a4 <_sk_callback_avx+0x2a1>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -16810,36 +16844,36 @@ _sk_parametric_g_avx:
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,217                      // vcvtdq2ps     %ymm1,%ymm11
-  .byte  196,98,125,24,37,219,53,0,0         // vbroadcastss  0x35db(%rip),%ymm12        # 648c <_sk_callback_avx+0x2a5>
+  .byte  196,98,125,24,37,129,54,0,0         // vbroadcastss  0x3681(%rip),%ymm12        # 65a8 <_sk_callback_avx+0x2a5>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,209,53,0,0         // vbroadcastss  0x35d1(%rip),%ymm12        # 6490 <_sk_callback_avx+0x2a9>
+  .byte  196,98,125,24,37,119,54,0,0         // vbroadcastss  0x3677(%rip),%ymm12        # 65ac <_sk_callback_avx+0x2a9>
   .byte  196,193,116,84,204                  // vandps        %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,199,53,0,0         // vbroadcastss  0x35c7(%rip),%ymm12        # 6494 <_sk_callback_avx+0x2ad>
+  .byte  196,98,125,24,37,109,54,0,0         // vbroadcastss  0x366d(%rip),%ymm12        # 65b0 <_sk_callback_avx+0x2ad>
   .byte  196,193,116,86,204                  // vorps         %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,189,53,0,0         // vbroadcastss  0x35bd(%rip),%ymm12        # 6498 <_sk_callback_avx+0x2b1>
+  .byte  196,98,125,24,37,99,54,0,0          // vbroadcastss  0x3663(%rip),%ymm12        # 65b4 <_sk_callback_avx+0x2b1>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,179,53,0,0         // vbroadcastss  0x35b3(%rip),%ymm12        # 649c <_sk_callback_avx+0x2b5>
+  .byte  196,98,125,24,37,89,54,0,0          // vbroadcastss  0x3659(%rip),%ymm12        # 65b8 <_sk_callback_avx+0x2b5>
   .byte  196,65,116,89,228                   // vmulps        %ymm12,%ymm1,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,164,53,0,0         // vbroadcastss  0x35a4(%rip),%ymm12        # 64a0 <_sk_callback_avx+0x2b9>
+  .byte  196,98,125,24,37,74,54,0,0          // vbroadcastss  0x364a(%rip),%ymm12        # 65bc <_sk_callback_avx+0x2b9>
   .byte  196,193,116,88,204                  // vaddps        %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,154,53,0,0         // vbroadcastss  0x359a(%rip),%ymm12        # 64a4 <_sk_callback_avx+0x2bd>
+  .byte  196,98,125,24,37,64,54,0,0          // vbroadcastss  0x3640(%rip),%ymm12        # 65c0 <_sk_callback_avx+0x2bd>
   .byte  197,156,94,201                      // vdivps        %ymm1,%ymm12,%ymm1
   .byte  197,164,92,201                      // vsubps        %ymm1,%ymm11,%ymm1
   .byte  197,172,89,201                      // vmulps        %ymm1,%ymm10,%ymm1
   .byte  196,99,125,8,209,1                  // vroundps      $0x1,%ymm1,%ymm10
   .byte  196,65,116,92,210                   // vsubps        %ymm10,%ymm1,%ymm10
-  .byte  196,98,125,24,29,126,53,0,0         // vbroadcastss  0x357e(%rip),%ymm11        # 64a8 <_sk_callback_avx+0x2c1>
+  .byte  196,98,125,24,29,36,54,0,0          // vbroadcastss  0x3624(%rip),%ymm11        # 65c4 <_sk_callback_avx+0x2c1>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,116,53,0,0         // vbroadcastss  0x3574(%rip),%ymm11        # 64ac <_sk_callback_avx+0x2c5>
+  .byte  196,98,125,24,29,26,54,0,0          // vbroadcastss  0x361a(%rip),%ymm11        # 65c8 <_sk_callback_avx+0x2c5>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,116,92,203                  // vsubps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,101,53,0,0         // vbroadcastss  0x3565(%rip),%ymm11        # 64b0 <_sk_callback_avx+0x2c9>
+  .byte  196,98,125,24,29,11,54,0,0          // vbroadcastss  0x360b(%rip),%ymm11        # 65cc <_sk_callback_avx+0x2c9>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,91,53,0,0          // vbroadcastss  0x355b(%rip),%ymm11        # 64b4 <_sk_callback_avx+0x2cd>
+  .byte  196,98,125,24,29,1,54,0,0           // vbroadcastss  0x3601(%rip),%ymm11        # 65d0 <_sk_callback_avx+0x2cd>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,116,88,202                  // vaddps        %ymm10,%ymm1,%ymm1
-  .byte  196,98,125,24,21,76,53,0,0          // vbroadcastss  0x354c(%rip),%ymm10        # 64b8 <_sk_callback_avx+0x2d1>
+  .byte  196,98,125,24,21,242,53,0,0         // vbroadcastss  0x35f2(%rip),%ymm10        # 65d4 <_sk_callback_avx+0x2d1>
   .byte  196,193,116,89,202                  // vmulps        %ymm10,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -16847,7 +16881,7 @@ _sk_parametric_g_avx:
   .byte  196,195,117,74,201,128              // vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,116,95,200                  // vmaxps        %ymm8,%ymm1,%ymm1
-  .byte  196,98,125,24,5,35,53,0,0           // vbroadcastss  0x3523(%rip),%ymm8        # 64bc <_sk_callback_avx+0x2d5>
+  .byte  196,98,125,24,5,201,53,0,0          // vbroadcastss  0x35c9(%rip),%ymm8        # 65d8 <_sk_callback_avx+0x2d5>
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -16869,36 +16903,36 @@ _sk_parametric_b_avx:
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,218                      // vcvtdq2ps     %ymm2,%ymm11
-  .byte  196,98,125,24,37,212,52,0,0         // vbroadcastss  0x34d4(%rip),%ymm12        # 64c0 <_sk_callback_avx+0x2d9>
+  .byte  196,98,125,24,37,122,53,0,0         // vbroadcastss  0x357a(%rip),%ymm12        # 65dc <_sk_callback_avx+0x2d9>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,202,52,0,0         // vbroadcastss  0x34ca(%rip),%ymm12        # 64c4 <_sk_callback_avx+0x2dd>
+  .byte  196,98,125,24,37,112,53,0,0         // vbroadcastss  0x3570(%rip),%ymm12        # 65e0 <_sk_callback_avx+0x2dd>
   .byte  196,193,108,84,212                  // vandps        %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,192,52,0,0         // vbroadcastss  0x34c0(%rip),%ymm12        # 64c8 <_sk_callback_avx+0x2e1>
+  .byte  196,98,125,24,37,102,53,0,0         // vbroadcastss  0x3566(%rip),%ymm12        # 65e4 <_sk_callback_avx+0x2e1>
   .byte  196,193,108,86,212                  // vorps         %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,182,52,0,0         // vbroadcastss  0x34b6(%rip),%ymm12        # 64cc <_sk_callback_avx+0x2e5>
+  .byte  196,98,125,24,37,92,53,0,0          // vbroadcastss  0x355c(%rip),%ymm12        # 65e8 <_sk_callback_avx+0x2e5>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,172,52,0,0         // vbroadcastss  0x34ac(%rip),%ymm12        # 64d0 <_sk_callback_avx+0x2e9>
+  .byte  196,98,125,24,37,82,53,0,0          // vbroadcastss  0x3552(%rip),%ymm12        # 65ec <_sk_callback_avx+0x2e9>
   .byte  196,65,108,89,228                   // vmulps        %ymm12,%ymm2,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,157,52,0,0         // vbroadcastss  0x349d(%rip),%ymm12        # 64d4 <_sk_callback_avx+0x2ed>
+  .byte  196,98,125,24,37,67,53,0,0          // vbroadcastss  0x3543(%rip),%ymm12        # 65f0 <_sk_callback_avx+0x2ed>
   .byte  196,193,108,88,212                  // vaddps        %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,147,52,0,0         // vbroadcastss  0x3493(%rip),%ymm12        # 64d8 <_sk_callback_avx+0x2f1>
+  .byte  196,98,125,24,37,57,53,0,0          // vbroadcastss  0x3539(%rip),%ymm12        # 65f4 <_sk_callback_avx+0x2f1>
   .byte  197,156,94,210                      // vdivps        %ymm2,%ymm12,%ymm2
   .byte  197,164,92,210                      // vsubps        %ymm2,%ymm11,%ymm2
   .byte  197,172,89,210                      // vmulps        %ymm2,%ymm10,%ymm2
   .byte  196,99,125,8,210,1                  // vroundps      $0x1,%ymm2,%ymm10
   .byte  196,65,108,92,210                   // vsubps        %ymm10,%ymm2,%ymm10
-  .byte  196,98,125,24,29,119,52,0,0         // vbroadcastss  0x3477(%rip),%ymm11        # 64dc <_sk_callback_avx+0x2f5>
+  .byte  196,98,125,24,29,29,53,0,0          // vbroadcastss  0x351d(%rip),%ymm11        # 65f8 <_sk_callback_avx+0x2f5>
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
-  .byte  196,98,125,24,29,109,52,0,0         // vbroadcastss  0x346d(%rip),%ymm11        # 64e0 <_sk_callback_avx+0x2f9>
+  .byte  196,98,125,24,29,19,53,0,0          // vbroadcastss  0x3513(%rip),%ymm11        # 65fc <_sk_callback_avx+0x2f9>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,108,92,211                  // vsubps        %ymm11,%ymm2,%ymm2
-  .byte  196,98,125,24,29,94,52,0,0          // vbroadcastss  0x345e(%rip),%ymm11        # 64e4 <_sk_callback_avx+0x2fd>
+  .byte  196,98,125,24,29,4,53,0,0           // vbroadcastss  0x3504(%rip),%ymm11        # 6600 <_sk_callback_avx+0x2fd>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,84,52,0,0          // vbroadcastss  0x3454(%rip),%ymm11        # 64e8 <_sk_callback_avx+0x301>
+  .byte  196,98,125,24,29,250,52,0,0         // vbroadcastss  0x34fa(%rip),%ymm11        # 6604 <_sk_callback_avx+0x301>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,108,88,210                  // vaddps        %ymm10,%ymm2,%ymm2
-  .byte  196,98,125,24,21,69,52,0,0          // vbroadcastss  0x3445(%rip),%ymm10        # 64ec <_sk_callback_avx+0x305>
+  .byte  196,98,125,24,21,235,52,0,0         // vbroadcastss  0x34eb(%rip),%ymm10        # 6608 <_sk_callback_avx+0x305>
   .byte  196,193,108,89,210                  // vmulps        %ymm10,%ymm2,%ymm2
   .byte  197,253,91,210                      // vcvtps2dq     %ymm2,%ymm2
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -16906,7 +16940,7 @@ _sk_parametric_b_avx:
   .byte  196,195,109,74,209,128              // vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,28,52,0,0           // vbroadcastss  0x341c(%rip),%ymm8        # 64f0 <_sk_callback_avx+0x309>
+  .byte  196,98,125,24,5,194,52,0,0          // vbroadcastss  0x34c2(%rip),%ymm8        # 660c <_sk_callback_avx+0x309>
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -16928,36 +16962,36 @@ _sk_parametric_a_avx:
   .byte  196,193,100,88,219                  // vaddps        %ymm11,%ymm3,%ymm3
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,219                      // vcvtdq2ps     %ymm3,%ymm11
-  .byte  196,98,125,24,37,205,51,0,0         // vbroadcastss  0x33cd(%rip),%ymm12        # 64f4 <_sk_callback_avx+0x30d>
+  .byte  196,98,125,24,37,115,52,0,0         // vbroadcastss  0x3473(%rip),%ymm12        # 6610 <_sk_callback_avx+0x30d>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,195,51,0,0         // vbroadcastss  0x33c3(%rip),%ymm12        # 64f8 <_sk_callback_avx+0x311>
+  .byte  196,98,125,24,37,105,52,0,0         // vbroadcastss  0x3469(%rip),%ymm12        # 6614 <_sk_callback_avx+0x311>
   .byte  196,193,100,84,220                  // vandps        %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,185,51,0,0         // vbroadcastss  0x33b9(%rip),%ymm12        # 64fc <_sk_callback_avx+0x315>
+  .byte  196,98,125,24,37,95,52,0,0          // vbroadcastss  0x345f(%rip),%ymm12        # 6618 <_sk_callback_avx+0x315>
   .byte  196,193,100,86,220                  // vorps         %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,175,51,0,0         // vbroadcastss  0x33af(%rip),%ymm12        # 6500 <_sk_callback_avx+0x319>
+  .byte  196,98,125,24,37,85,52,0,0          // vbroadcastss  0x3455(%rip),%ymm12        # 661c <_sk_callback_avx+0x319>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,165,51,0,0         // vbroadcastss  0x33a5(%rip),%ymm12        # 6504 <_sk_callback_avx+0x31d>
+  .byte  196,98,125,24,37,75,52,0,0          // vbroadcastss  0x344b(%rip),%ymm12        # 6620 <_sk_callback_avx+0x31d>
   .byte  196,65,100,89,228                   // vmulps        %ymm12,%ymm3,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,150,51,0,0         // vbroadcastss  0x3396(%rip),%ymm12        # 6508 <_sk_callback_avx+0x321>
+  .byte  196,98,125,24,37,60,52,0,0          // vbroadcastss  0x343c(%rip),%ymm12        # 6624 <_sk_callback_avx+0x321>
   .byte  196,193,100,88,220                  // vaddps        %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,140,51,0,0         // vbroadcastss  0x338c(%rip),%ymm12        # 650c <_sk_callback_avx+0x325>
+  .byte  196,98,125,24,37,50,52,0,0          // vbroadcastss  0x3432(%rip),%ymm12        # 6628 <_sk_callback_avx+0x325>
   .byte  197,156,94,219                      // vdivps        %ymm3,%ymm12,%ymm3
   .byte  197,164,92,219                      // vsubps        %ymm3,%ymm11,%ymm3
   .byte  197,172,89,219                      // vmulps        %ymm3,%ymm10,%ymm3
   .byte  196,99,125,8,211,1                  // vroundps      $0x1,%ymm3,%ymm10
   .byte  196,65,100,92,210                   // vsubps        %ymm10,%ymm3,%ymm10
-  .byte  196,98,125,24,29,112,51,0,0         // vbroadcastss  0x3370(%rip),%ymm11        # 6510 <_sk_callback_avx+0x329>
+  .byte  196,98,125,24,29,22,52,0,0          // vbroadcastss  0x3416(%rip),%ymm11        # 662c <_sk_callback_avx+0x329>
   .byte  196,193,100,88,219                  // vaddps        %ymm11,%ymm3,%ymm3
-  .byte  196,98,125,24,29,102,51,0,0         // vbroadcastss  0x3366(%rip),%ymm11        # 6514 <_sk_callback_avx+0x32d>
+  .byte  196,98,125,24,29,12,52,0,0          // vbroadcastss  0x340c(%rip),%ymm11        # 6630 <_sk_callback_avx+0x32d>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,100,92,219                  // vsubps        %ymm11,%ymm3,%ymm3
-  .byte  196,98,125,24,29,87,51,0,0          // vbroadcastss  0x3357(%rip),%ymm11        # 6518 <_sk_callback_avx+0x331>
+  .byte  196,98,125,24,29,253,51,0,0         // vbroadcastss  0x33fd(%rip),%ymm11        # 6634 <_sk_callback_avx+0x331>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,77,51,0,0          // vbroadcastss  0x334d(%rip),%ymm11        # 651c <_sk_callback_avx+0x335>
+  .byte  196,98,125,24,29,243,51,0,0         // vbroadcastss  0x33f3(%rip),%ymm11        # 6638 <_sk_callback_avx+0x335>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,100,88,218                  // vaddps        %ymm10,%ymm3,%ymm3
-  .byte  196,98,125,24,21,62,51,0,0          // vbroadcastss  0x333e(%rip),%ymm10        # 6520 <_sk_callback_avx+0x339>
+  .byte  196,98,125,24,21,228,51,0,0         // vbroadcastss  0x33e4(%rip),%ymm10        # 663c <_sk_callback_avx+0x339>
   .byte  196,193,100,89,218                  // vmulps        %ymm10,%ymm3,%ymm3
   .byte  197,253,91,219                      // vcvtps2dq     %ymm3,%ymm3
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -16965,7 +16999,7 @@ _sk_parametric_a_avx:
   .byte  196,195,101,74,217,128              // vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,100,95,216                  // vmaxps        %ymm8,%ymm3,%ymm3
-  .byte  196,98,125,24,5,21,51,0,0           // vbroadcastss  0x3315(%rip),%ymm8        # 6524 <_sk_callback_avx+0x33d>
+  .byte  196,98,125,24,5,187,51,0,0          // vbroadcastss  0x33bb(%rip),%ymm8        # 6640 <_sk_callback_avx+0x33d>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -16974,31 +17008,31 @@ HIDDEN _sk_lab_to_xyz_avx
 .globl _sk_lab_to_xyz_avx
 FUNCTION(_sk_lab_to_xyz_avx)
 _sk_lab_to_xyz_avx:
-  .byte  196,98,125,24,5,7,51,0,0            // vbroadcastss  0x3307(%rip),%ymm8        # 6528 <_sk_callback_avx+0x341>
+  .byte  196,98,125,24,5,173,51,0,0          // vbroadcastss  0x33ad(%rip),%ymm8        # 6644 <_sk_callback_avx+0x341>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,253,50,0,0          // vbroadcastss  0x32fd(%rip),%ymm8        # 652c <_sk_callback_avx+0x345>
+  .byte  196,98,125,24,5,163,51,0,0          // vbroadcastss  0x33a3(%rip),%ymm8        # 6648 <_sk_callback_avx+0x345>
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  196,98,125,24,13,243,50,0,0         // vbroadcastss  0x32f3(%rip),%ymm9        # 6530 <_sk_callback_avx+0x349>
+  .byte  196,98,125,24,13,153,51,0,0         // vbroadcastss  0x3399(%rip),%ymm9        # 664c <_sk_callback_avx+0x349>
   .byte  196,193,116,88,201                  // vaddps        %ymm9,%ymm1,%ymm1
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  196,193,108,88,209                  // vaddps        %ymm9,%ymm2,%ymm2
-  .byte  196,98,125,24,5,223,50,0,0          // vbroadcastss  0x32df(%rip),%ymm8        # 6534 <_sk_callback_avx+0x34d>
+  .byte  196,98,125,24,5,133,51,0,0          // vbroadcastss  0x3385(%rip),%ymm8        # 6650 <_sk_callback_avx+0x34d>
   .byte  196,193,124,88,192                  // vaddps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,213,50,0,0          // vbroadcastss  0x32d5(%rip),%ymm8        # 6538 <_sk_callback_avx+0x351>
+  .byte  196,98,125,24,5,123,51,0,0          // vbroadcastss  0x337b(%rip),%ymm8        # 6654 <_sk_callback_avx+0x351>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,203,50,0,0          // vbroadcastss  0x32cb(%rip),%ymm8        # 653c <_sk_callback_avx+0x355>
+  .byte  196,98,125,24,5,113,51,0,0          // vbroadcastss  0x3371(%rip),%ymm8        # 6658 <_sk_callback_avx+0x355>
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  197,252,88,201                      // vaddps        %ymm1,%ymm0,%ymm1
-  .byte  196,98,125,24,5,189,50,0,0          // vbroadcastss  0x32bd(%rip),%ymm8        # 6540 <_sk_callback_avx+0x359>
+  .byte  196,98,125,24,5,99,51,0,0           // vbroadcastss  0x3363(%rip),%ymm8        # 665c <_sk_callback_avx+0x359>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  197,252,92,210                      // vsubps        %ymm2,%ymm0,%ymm2
   .byte  197,116,89,193                      // vmulps        %ymm1,%ymm1,%ymm8
   .byte  196,65,116,89,192                   // vmulps        %ymm8,%ymm1,%ymm8
-  .byte  196,98,125,24,13,166,50,0,0         // vbroadcastss  0x32a6(%rip),%ymm9        # 6544 <_sk_callback_avx+0x35d>
+  .byte  196,98,125,24,13,76,51,0,0          // vbroadcastss  0x334c(%rip),%ymm9        # 6660 <_sk_callback_avx+0x35d>
   .byte  196,65,52,194,208,1                 // vcmpltps      %ymm8,%ymm9,%ymm10
-  .byte  196,98,125,24,29,155,50,0,0         // vbroadcastss  0x329b(%rip),%ymm11        # 6548 <_sk_callback_avx+0x361>
+  .byte  196,98,125,24,29,65,51,0,0          // vbroadcastss  0x3341(%rip),%ymm11        # 6664 <_sk_callback_avx+0x361>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,37,145,50,0,0         // vbroadcastss  0x3291(%rip),%ymm12        # 654c <_sk_callback_avx+0x365>
+  .byte  196,98,125,24,37,55,51,0,0          // vbroadcastss  0x3337(%rip),%ymm12        # 6668 <_sk_callback_avx+0x365>
   .byte  196,193,116,89,204                  // vmulps        %ymm12,%ymm1,%ymm1
   .byte  196,67,117,74,192,160               // vblendvps     %ymm10,%ymm8,%ymm1,%ymm8
   .byte  197,252,89,200                      // vmulps        %ymm0,%ymm0,%ymm1
@@ -17013,9 +17047,9 @@ _sk_lab_to_xyz_avx:
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,193,108,89,212                  // vmulps        %ymm12,%ymm2,%ymm2
   .byte  196,227,109,74,208,144              // vblendvps     %ymm9,%ymm0,%ymm2,%ymm2
-  .byte  196,226,125,24,5,71,50,0,0          // vbroadcastss  0x3247(%rip),%ymm0        # 6550 <_sk_callback_avx+0x369>
+  .byte  196,226,125,24,5,237,50,0,0         // vbroadcastss  0x32ed(%rip),%ymm0        # 666c <_sk_callback_avx+0x369>
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
-  .byte  196,98,125,24,5,62,50,0,0           // vbroadcastss  0x323e(%rip),%ymm8        # 6554 <_sk_callback_avx+0x36d>
+  .byte  196,98,125,24,5,228,50,0,0          // vbroadcastss  0x32e4(%rip),%ymm8        # 6670 <_sk_callback_avx+0x36d>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17029,14 +17063,14 @@ _sk_load_a8_avx:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,62                              // jne           336d <_sk_load_a8_avx+0x4e>
+  .byte  117,62                              // jne           33e3 <_sk_load_a8_avx+0x4e>
   .byte  197,250,126,0                       // vmovq         (%rax),%xmm0
   .byte  196,226,121,49,200                  // vpmovzxbd     %xmm0,%xmm1
   .byte  196,227,121,4,192,229               // vpermilps     $0xe5,%xmm0,%xmm0
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,2,50,0,0          // vbroadcastss  0x3202(%rip),%ymm1        # 6558 <_sk_callback_avx+0x371>
+  .byte  196,226,125,24,13,168,50,0,0        // vbroadcastss  0x32a8(%rip),%ymm1        # 6674 <_sk_callback_avx+0x371>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -17053,9 +17087,9 @@ _sk_load_a8_avx:
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           3375 <_sk_load_a8_avx+0x56>
+  .byte  117,234                             // jne           33eb <_sk_load_a8_avx+0x56>
   .byte  196,193,249,110,193                 // vmovq         %r9,%xmm0
-  .byte  235,161                             // jmp           3333 <_sk_load_a8_avx+0x14>
+  .byte  235,161                             // jmp           33a9 <_sk_load_a8_avx+0x14>
 
 HIDDEN _sk_gather_a8_avx
 .globl _sk_gather_a8_avx
@@ -17105,7 +17139,7 @@ _sk_gather_a8_avx:
   .byte  196,226,121,49,201                  // vpmovzxbd     %xmm1,%xmm1
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,247,48,0,0        // vbroadcastss  0x30f7(%rip),%ymm1        # 655c <_sk_callback_avx+0x375>
+  .byte  196,226,125,24,13,157,49,0,0        // vbroadcastss  0x319d(%rip),%ymm1        # 6678 <_sk_callback_avx+0x375>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -17123,14 +17157,14 @@ FUNCTION(_sk_store_a8_avx)
 _sk_store_a8_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,210,48,0,0          // vbroadcastss  0x30d2(%rip),%ymm8        # 6560 <_sk_callback_avx+0x379>
+  .byte  196,98,125,24,5,120,49,0,0          // vbroadcastss  0x3178(%rip),%ymm8        # 667c <_sk_callback_avx+0x379>
   .byte  196,65,100,89,192                   // vmulps        %ymm8,%ymm3,%ymm8
   .byte  196,65,125,91,192                   // vcvtps2dq     %ymm8,%ymm8
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  196,65,57,103,192                   // vpackuswb     %xmm8,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           34b7 <_sk_store_a8_avx+0x37>
+  .byte  117,10                              // jne           352d <_sk_store_a8_avx+0x37>
   .byte  196,65,123,17,4,58                  // vmovsd        %xmm8,(%r10,%rdi,1)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17138,10 +17172,10 @@ _sk_store_a8_avx:
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            34b3 <_sk_store_a8_avx+0x33>
+  .byte  119,236                             // ja            3529 <_sk_store_a8_avx+0x33>
   .byte  196,66,121,48,192                   // vpmovzxbw     %xmm8,%xmm8
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,69,0,0,0                  // lea           0x45(%rip),%r9        # 351c <_sk_store_a8_avx+0x9c>
+  .byte  76,141,13,67,0,0,0                  // lea           0x43(%rip),%r9        # 3590 <_sk_store_a8_avx+0x9a>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17152,28 +17186,27 @@ _sk_store_a8_avx:
   .byte  196,67,121,20,68,58,2,4             // vpextrb       $0x4,%xmm8,0x2(%r10,%rdi,1)
   .byte  196,67,121,20,68,58,1,2             // vpextrb       $0x2,%xmm8,0x1(%r10,%rdi,1)
   .byte  196,67,121,20,4,58,0                // vpextrb       $0x0,%xmm8,(%r10,%rdi,1)
-  .byte  235,154                             // jmp           34b3 <_sk_store_a8_avx+0x33>
-  .byte  15,31,0                             // nopl          (%rax)
-  .byte  244                                 // hlt
-  .byte  255                                 // (bad)
+  .byte  235,154                             // jmp           3529 <_sk_store_a8_avx+0x33>
+  .byte  144                                 // nop
+  .byte  246,255                             // idiv          %bh
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  236                                 // in            (%dx),%al
+  .byte  238                                 // out           %al,(%dx)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,228                             // jmpq          *%rsp
+  .byte  255,230                             // jmpq          *%rsi
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  220,255                             // fdivr         %st,%st(7)
+  .byte  222,255                             // fdivrp        %st,%st(7)
   .byte  255                                 // (bad)
-  .byte  255,212                             // callq         *%rsp
+  .byte  255,214                             // callq         *%rsi
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,204                             // dec           %esp
+  .byte  255,206                             // dec           %esi
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,196                             // inc           %esp
+  .byte  255,198                             // inc           %esi
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // .byte         0xff
@@ -17187,17 +17220,17 @@ _sk_load_g8_avx:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,1,248                            // add           %rdi,%rax
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,67                              // jne           358b <_sk_load_g8_avx+0x53>
+  .byte  117,67                              // jne           35ff <_sk_load_g8_avx+0x53>
   .byte  197,250,126,0                       // vmovq         (%rax),%xmm0
   .byte  196,226,121,49,200                  // vpmovzxbd     %xmm0,%xmm1
   .byte  196,227,121,4,192,229               // vpermilps     $0xe5,%xmm0,%xmm0
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,245,47,0,0        // vbroadcastss  0x2ff5(%rip),%ymm1        # 6564 <_sk_callback_avx+0x37d>
+  .byte  196,226,125,24,13,157,48,0,0        // vbroadcastss  0x309d(%rip),%ymm1        # 6680 <_sk_callback_avx+0x37d>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,234,47,0,0        // vbroadcastss  0x2fea(%rip),%ymm3        # 6568 <_sk_callback_avx+0x381>
+  .byte  196,226,125,24,29,146,48,0,0        // vbroadcastss  0x3092(%rip),%ymm3        # 6684 <_sk_callback_avx+0x381>
   .byte  76,137,193                          // mov           %r8,%rcx
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
@@ -17211,9 +17244,9 @@ _sk_load_g8_avx:
   .byte  77,9,217                            // or            %r11,%r9
   .byte  72,131,193,8                        // add           $0x8,%rcx
   .byte  73,255,202                          // dec           %r10
-  .byte  117,234                             // jne           3593 <_sk_load_g8_avx+0x5b>
+  .byte  117,234                             // jne           3607 <_sk_load_g8_avx+0x5b>
   .byte  196,193,249,110,193                 // vmovq         %r9,%xmm0
-  .byte  235,156                             // jmp           354c <_sk_load_g8_avx+0x14>
+  .byte  235,156                             // jmp           35c0 <_sk_load_g8_avx+0x14>
 
 HIDDEN _sk_gather_g8_avx
 .globl _sk_gather_g8_avx
@@ -17263,10 +17296,10 @@ _sk_gather_g8_avx:
   .byte  196,226,121,49,201                  // vpmovzxbd     %xmm1,%xmm1
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,233,46,0,0        // vbroadcastss  0x2ee9(%rip),%ymm1        # 656c <_sk_callback_avx+0x385>
+  .byte  196,226,125,24,13,145,47,0,0        // vbroadcastss  0x2f91(%rip),%ymm1        # 6688 <_sk_callback_avx+0x385>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,222,46,0,0        // vbroadcastss  0x2ede(%rip),%ymm3        # 6570 <_sk_callback_avx+0x389>
+  .byte  196,226,125,24,29,134,47,0,0        // vbroadcastss  0x2f86(%rip),%ymm3        # 668c <_sk_callback_avx+0x389>
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
   .byte  91                                  // pop           %rbx
@@ -17282,9 +17315,9 @@ _sk_gather_i8_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  73,137,192                          // mov           %rax,%r8
   .byte  77,133,192                          // test          %r8,%r8
-  .byte  116,5                               // je            36b2 <_sk_gather_i8_avx+0xf>
+  .byte  116,5                               // je            3726 <_sk_gather_i8_avx+0xf>
   .byte  76,137,192                          // mov           %r8,%rax
-  .byte  235,2                               // jmp           36b4 <_sk_gather_i8_avx+0x11>
+  .byte  235,2                               // jmp           3728 <_sk_gather_i8_avx+0x11>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,87                               // push          %r15
   .byte  65,86                               // push          %r14
@@ -17346,10 +17379,10 @@ _sk_gather_i8_avx:
   .byte  196,163,121,34,4,163,2              // vpinsrd       $0x2,(%rbx,%r12,4),%xmm0,%xmm0
   .byte  196,163,121,34,28,19,3              // vpinsrd       $0x3,(%rbx,%r10,1),%xmm0,%xmm3
   .byte  196,227,61,24,195,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  .byte  197,124,40,21,226,47,0,0            // vmovaps       0x2fe2(%rip),%ymm10        # 67c0 <_sk_callback_avx+0x5d9>
+  .byte  197,124,40,21,14,48,0,0             // vmovaps       0x300e(%rip),%ymm10        # 6860 <_sk_callback_avx+0x55d>
   .byte  196,193,124,84,194                  // vandps        %ymm10,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,132,45,0,0         // vbroadcastss  0x2d84(%rip),%ymm9        # 6574 <_sk_callback_avx+0x38d>
+  .byte  196,98,125,24,13,44,46,0,0          // vbroadcastss  0x2e2c(%rip),%ymm9        # 6690 <_sk_callback_avx+0x38d>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,113,114,208,8               // vpsrld        $0x8,%xmm8,%xmm1
   .byte  197,233,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm2
@@ -17383,38 +17416,38 @@ _sk_load_565_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,128,0,0,0                    // jne           38e8 <_sk_load_565_avx+0x8e>
+  .byte  15,133,128,0,0,0                    // jne           395c <_sk_load_565_avx+0x8e>
   .byte  196,193,122,111,4,122               // vmovdqu       (%r10,%rdi,2),%xmm0
   .byte  197,241,239,201                     // vpxor         %xmm1,%xmm1,%xmm1
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,209,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  .byte  196,226,125,24,5,238,44,0,0         // vbroadcastss  0x2cee(%rip),%ymm0        # 6578 <_sk_callback_avx+0x391>
+  .byte  196,226,125,24,5,150,45,0,0         // vbroadcastss  0x2d96(%rip),%ymm0        # 6694 <_sk_callback_avx+0x391>
   .byte  197,236,84,192                      // vandps        %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,225,44,0,0        // vbroadcastss  0x2ce1(%rip),%ymm1        # 657c <_sk_callback_avx+0x395>
+  .byte  196,226,125,24,13,137,45,0,0        // vbroadcastss  0x2d89(%rip),%ymm1        # 6698 <_sk_callback_avx+0x395>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,216,44,0,0        // vbroadcastss  0x2cd8(%rip),%ymm1        # 6580 <_sk_callback_avx+0x399>
+  .byte  196,226,125,24,13,128,45,0,0        // vbroadcastss  0x2d80(%rip),%ymm1        # 669c <_sk_callback_avx+0x399>
   .byte  197,236,84,201                      // vandps        %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,203,44,0,0        // vbroadcastss  0x2ccb(%rip),%ymm3        # 6584 <_sk_callback_avx+0x39d>
+  .byte  196,226,125,24,29,115,45,0,0        // vbroadcastss  0x2d73(%rip),%ymm3        # 66a0 <_sk_callback_avx+0x39d>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,24,29,194,44,0,0        // vbroadcastss  0x2cc2(%rip),%ymm3        # 6588 <_sk_callback_avx+0x3a1>
+  .byte  196,226,125,24,29,106,45,0,0        // vbroadcastss  0x2d6a(%rip),%ymm3        # 66a4 <_sk_callback_avx+0x3a1>
   .byte  197,236,84,211                      // vandps        %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,181,44,0,0        // vbroadcastss  0x2cb5(%rip),%ymm3        # 658c <_sk_callback_avx+0x3a5>
+  .byte  196,226,125,24,29,93,45,0,0         // vbroadcastss  0x2d5d(%rip),%ymm3        # 66a8 <_sk_callback_avx+0x3a5>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,170,44,0,0        // vbroadcastss  0x2caa(%rip),%ymm3        # 6590 <_sk_callback_avx+0x3a9>
+  .byte  196,226,125,24,29,82,45,0,0         // vbroadcastss  0x2d52(%rip),%ymm3        # 66ac <_sk_callback_avx+0x3a9>
   .byte  255,224                             // jmpq          *%rax
   .byte  65,137,200                          // mov           %ecx,%r8d
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  197,249,239,192                     // vpxor         %xmm0,%xmm0,%xmm0
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,110,255,255,255              // ja            386e <_sk_load_565_avx+0x14>
+  .byte  15,135,110,255,255,255              // ja            38e2 <_sk_load_565_avx+0x14>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,73,0,0,0                  // lea           0x49(%rip),%r9        # 3954 <_sk_load_565_avx+0xfa>
+  .byte  76,141,13,73,0,0,0                  // lea           0x49(%rip),%r9        # 39c8 <_sk_load_565_avx+0xfa>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17426,7 +17459,7 @@ _sk_load_565_avx:
   .byte  196,193,121,196,68,122,4,2          // vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,68,122,2,1          // vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,4,122,0             // vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  .byte  233,26,255,255,255                  // jmpq          386e <_sk_load_565_avx+0x14>
+  .byte  233,26,255,255,255                  // jmpq          38e2 <_sk_load_565_avx+0x14>
   .byte  244                                 // hlt
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
@@ -17504,23 +17537,23 @@ _sk_gather_565_avx:
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,209,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  .byte  196,226,125,24,5,74,43,0,0          // vbroadcastss  0x2b4a(%rip),%ymm0        # 6594 <_sk_callback_avx+0x3ad>
+  .byte  196,226,125,24,5,242,43,0,0         // vbroadcastss  0x2bf2(%rip),%ymm0        # 66b0 <_sk_callback_avx+0x3ad>
   .byte  197,236,84,192                      // vandps        %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,61,43,0,0         // vbroadcastss  0x2b3d(%rip),%ymm1        # 6598 <_sk_callback_avx+0x3b1>
+  .byte  196,226,125,24,13,229,43,0,0        // vbroadcastss  0x2be5(%rip),%ymm1        # 66b4 <_sk_callback_avx+0x3b1>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,52,43,0,0         // vbroadcastss  0x2b34(%rip),%ymm1        # 659c <_sk_callback_avx+0x3b5>
+  .byte  196,226,125,24,13,220,43,0,0        // vbroadcastss  0x2bdc(%rip),%ymm1        # 66b8 <_sk_callback_avx+0x3b5>
   .byte  197,236,84,201                      // vandps        %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,39,43,0,0         // vbroadcastss  0x2b27(%rip),%ymm3        # 65a0 <_sk_callback_avx+0x3b9>
+  .byte  196,226,125,24,29,207,43,0,0        // vbroadcastss  0x2bcf(%rip),%ymm3        # 66bc <_sk_callback_avx+0x3b9>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,24,29,30,43,0,0         // vbroadcastss  0x2b1e(%rip),%ymm3        # 65a4 <_sk_callback_avx+0x3bd>
+  .byte  196,226,125,24,29,198,43,0,0        // vbroadcastss  0x2bc6(%rip),%ymm3        # 66c0 <_sk_callback_avx+0x3bd>
   .byte  197,236,84,211                      // vandps        %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,17,43,0,0         // vbroadcastss  0x2b11(%rip),%ymm3        # 65a8 <_sk_callback_avx+0x3c1>
+  .byte  196,226,125,24,29,185,43,0,0        // vbroadcastss  0x2bb9(%rip),%ymm3        # 66c4 <_sk_callback_avx+0x3c1>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,6,43,0,0          // vbroadcastss  0x2b06(%rip),%ymm3        # 65ac <_sk_callback_avx+0x3c5>
+  .byte  196,226,125,24,29,174,43,0,0        // vbroadcastss  0x2bae(%rip),%ymm3        # 66c8 <_sk_callback_avx+0x3c5>
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,94                               // pop           %r14
@@ -17534,14 +17567,14 @@ FUNCTION(_sk_store_565_avx)
 _sk_store_565_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,242,42,0,0          // vbroadcastss  0x2af2(%rip),%ymm8        # 65b0 <_sk_callback_avx+0x3c9>
+  .byte  196,98,125,24,5,154,43,0,0          // vbroadcastss  0x2b9a(%rip),%ymm8        # 66cc <_sk_callback_avx+0x3c9>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,41,114,241,11               // vpslld        $0xb,%xmm9,%xmm10
   .byte  196,67,125,25,201,1                 // vextractf128  $0x1,%ymm9,%xmm9
   .byte  196,193,49,114,241,11               // vpslld        $0xb,%xmm9,%xmm9
   .byte  196,67,45,24,201,1                  // vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
-  .byte  196,98,125,24,21,203,42,0,0         // vbroadcastss  0x2acb(%rip),%ymm10        # 65b4 <_sk_callback_avx+0x3cd>
+  .byte  196,98,125,24,21,115,43,0,0         // vbroadcastss  0x2b73(%rip),%ymm10        # 66d0 <_sk_callback_avx+0x3cd>
   .byte  196,65,116,89,210                   // vmulps        %ymm10,%ymm1,%ymm10
   .byte  196,65,125,91,210                   // vcvtps2dq     %ymm10,%ymm10
   .byte  196,193,33,114,242,5                // vpslld        $0x5,%xmm10,%xmm11
@@ -17555,7 +17588,7 @@ _sk_store_565_avx:
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           3b39 <_sk_store_565_avx+0x89>
+  .byte  117,10                              // jne           3bad <_sk_store_565_avx+0x89>
   .byte  196,65,122,127,4,122                // vmovdqu       %xmm8,(%r10,%rdi,2)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17563,9 +17596,9 @@ _sk_store_565_avx:
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            3b35 <_sk_store_565_avx+0x85>
+  .byte  119,236                             // ja            3ba9 <_sk_store_565_avx+0x85>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,68,0,0,0                  // lea           0x44(%rip),%r9        # 3b98 <_sk_store_565_avx+0xe8>
+  .byte  76,141,13,68,0,0,0                  // lea           0x44(%rip),%r9        # 3c0c <_sk_store_565_avx+0xe8>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17576,7 +17609,7 @@ _sk_store_565_avx:
   .byte  196,67,121,21,68,122,4,2            // vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   .byte  196,67,121,21,68,122,2,1            // vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   .byte  196,67,121,21,4,122,0               // vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  .byte  235,159                             // jmp           3b35 <_sk_store_565_avx+0x85>
+  .byte  235,159                             // jmp           3ba9 <_sk_store_565_avx+0x85>
   .byte  102,144                             // xchg          %ax,%ax
   .byte  245                                 // cmc
   .byte  255                                 // (bad)
@@ -17609,31 +17642,31 @@ _sk_load_4444_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,152,0,0,0                    // jne           3c5a <_sk_load_4444_avx+0xa6>
+  .byte  15,133,152,0,0,0                    // jne           3cce <_sk_load_4444_avx+0xa6>
   .byte  196,193,122,111,4,122               // vmovdqu       (%r10,%rdi,2),%xmm0
   .byte  197,241,239,201                     // vpxor         %xmm1,%xmm1,%xmm1
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,217,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  .byte  196,226,125,24,5,212,41,0,0         // vbroadcastss  0x29d4(%rip),%ymm0        # 65b8 <_sk_callback_avx+0x3d1>
+  .byte  196,226,125,24,5,124,42,0,0         // vbroadcastss  0x2a7c(%rip),%ymm0        # 66d4 <_sk_callback_avx+0x3d1>
   .byte  197,228,84,192                      // vandps        %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,199,41,0,0        // vbroadcastss  0x29c7(%rip),%ymm1        # 65bc <_sk_callback_avx+0x3d5>
+  .byte  196,226,125,24,13,111,42,0,0        // vbroadcastss  0x2a6f(%rip),%ymm1        # 66d8 <_sk_callback_avx+0x3d5>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,190,41,0,0        // vbroadcastss  0x29be(%rip),%ymm1        # 65c0 <_sk_callback_avx+0x3d9>
+  .byte  196,226,125,24,13,102,42,0,0        // vbroadcastss  0x2a66(%rip),%ymm1        # 66dc <_sk_callback_avx+0x3d9>
   .byte  197,228,84,201                      // vandps        %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,177,41,0,0        // vbroadcastss  0x29b1(%rip),%ymm2        # 65c4 <_sk_callback_avx+0x3dd>
+  .byte  196,226,125,24,21,89,42,0,0         // vbroadcastss  0x2a59(%rip),%ymm2        # 66e0 <_sk_callback_avx+0x3dd>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,168,41,0,0        // vbroadcastss  0x29a8(%rip),%ymm2        # 65c8 <_sk_callback_avx+0x3e1>
+  .byte  196,226,125,24,21,80,42,0,0         // vbroadcastss  0x2a50(%rip),%ymm2        # 66e4 <_sk_callback_avx+0x3e1>
   .byte  197,228,84,210                      // vandps        %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,155,41,0,0          // vbroadcastss  0x299b(%rip),%ymm8        # 65cc <_sk_callback_avx+0x3e5>
+  .byte  196,98,125,24,5,67,42,0,0           // vbroadcastss  0x2a43(%rip),%ymm8        # 66e8 <_sk_callback_avx+0x3e5>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,145,41,0,0          // vbroadcastss  0x2991(%rip),%ymm8        # 65d0 <_sk_callback_avx+0x3e9>
+  .byte  196,98,125,24,5,57,42,0,0           // vbroadcastss  0x2a39(%rip),%ymm8        # 66ec <_sk_callback_avx+0x3e9>
   .byte  196,193,100,84,216                  // vandps        %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,131,41,0,0          // vbroadcastss  0x2983(%rip),%ymm8        # 65d4 <_sk_callback_avx+0x3ed>
+  .byte  196,98,125,24,5,43,42,0,0           // vbroadcastss  0x2a2b(%rip),%ymm8        # 66f0 <_sk_callback_avx+0x3ed>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17642,9 +17675,9 @@ _sk_load_4444_avx:
   .byte  197,249,239,192                     // vpxor         %xmm0,%xmm0,%xmm0
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  15,135,86,255,255,255               // ja            3bc8 <_sk_load_4444_avx+0x14>
+  .byte  15,135,86,255,255,255               // ja            3c3c <_sk_load_4444_avx+0x14>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,75,0,0,0                  // lea           0x4b(%rip),%r9        # 3cc8 <_sk_load_4444_avx+0x114>
+  .byte  76,141,13,75,0,0,0                  // lea           0x4b(%rip),%r9        # 3d3c <_sk_load_4444_avx+0x114>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17656,7 +17689,7 @@ _sk_load_4444_avx:
   .byte  196,193,121,196,68,122,4,2          // vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,68,122,2,1          // vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   .byte  196,193,121,196,4,122,0             // vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  .byte  233,2,255,255,255                   // jmpq          3bc8 <_sk_load_4444_avx+0x14>
+  .byte  233,2,255,255,255                   // jmpq          3c3c <_sk_load_4444_avx+0x14>
   .byte  102,144                             // xchg          %ax,%ax
   .byte  242,255                             // repnz         (bad)
   .byte  255                                 // (bad)
@@ -17735,25 +17768,25 @@ _sk_gather_4444_avx:
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,217,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  .byte  196,226,125,24,5,26,40,0,0          // vbroadcastss  0x281a(%rip),%ymm0        # 65d8 <_sk_callback_avx+0x3f1>
+  .byte  196,226,125,24,5,194,40,0,0         // vbroadcastss  0x28c2(%rip),%ymm0        # 66f4 <_sk_callback_avx+0x3f1>
   .byte  197,228,84,192                      // vandps        %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,13,40,0,0         // vbroadcastss  0x280d(%rip),%ymm1        # 65dc <_sk_callback_avx+0x3f5>
+  .byte  196,226,125,24,13,181,40,0,0        // vbroadcastss  0x28b5(%rip),%ymm1        # 66f8 <_sk_callback_avx+0x3f5>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,4,40,0,0          // vbroadcastss  0x2804(%rip),%ymm1        # 65e0 <_sk_callback_avx+0x3f9>
+  .byte  196,226,125,24,13,172,40,0,0        // vbroadcastss  0x28ac(%rip),%ymm1        # 66fc <_sk_callback_avx+0x3f9>
   .byte  197,228,84,201                      // vandps        %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,247,39,0,0        // vbroadcastss  0x27f7(%rip),%ymm2        # 65e4 <_sk_callback_avx+0x3fd>
+  .byte  196,226,125,24,21,159,40,0,0        // vbroadcastss  0x289f(%rip),%ymm2        # 6700 <_sk_callback_avx+0x3fd>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,238,39,0,0        // vbroadcastss  0x27ee(%rip),%ymm2        # 65e8 <_sk_callback_avx+0x401>
+  .byte  196,226,125,24,21,150,40,0,0        // vbroadcastss  0x2896(%rip),%ymm2        # 6704 <_sk_callback_avx+0x401>
   .byte  197,228,84,210                      // vandps        %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,225,39,0,0          // vbroadcastss  0x27e1(%rip),%ymm8        # 65ec <_sk_callback_avx+0x405>
+  .byte  196,98,125,24,5,137,40,0,0          // vbroadcastss  0x2889(%rip),%ymm8        # 6708 <_sk_callback_avx+0x405>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,215,39,0,0          // vbroadcastss  0x27d7(%rip),%ymm8        # 65f0 <_sk_callback_avx+0x409>
+  .byte  196,98,125,24,5,127,40,0,0          // vbroadcastss  0x287f(%rip),%ymm8        # 670c <_sk_callback_avx+0x409>
   .byte  196,193,100,84,216                  // vandps        %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,201,39,0,0          // vbroadcastss  0x27c9(%rip),%ymm8        # 65f4 <_sk_callback_avx+0x40d>
+  .byte  196,98,125,24,5,113,40,0,0          // vbroadcastss  0x2871(%rip),%ymm8        # 6710 <_sk_callback_avx+0x40d>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -17769,7 +17802,7 @@ FUNCTION(_sk_store_4444_avx)
 _sk_store_4444_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,174,39,0,0          // vbroadcastss  0x27ae(%rip),%ymm8        # 65f8 <_sk_callback_avx+0x411>
+  .byte  196,98,125,24,5,86,40,0,0           // vbroadcastss  0x2856(%rip),%ymm8        # 6714 <_sk_callback_avx+0x411>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,41,114,241,12               // vpslld        $0xc,%xmm9,%xmm10
@@ -17796,7 +17829,7 @@ _sk_store_4444_avx:
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
   .byte  196,66,57,43,193                    // vpackusdw     %xmm9,%xmm8,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,10                              // jne           3ee3 <_sk_store_4444_avx+0xa7>
+  .byte  117,10                              // jne           3f57 <_sk_store_4444_avx+0xa7>
   .byte  196,65,122,127,4,122                // vmovdqu       %xmm8,(%r10,%rdi,2)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17804,9 +17837,9 @@ _sk_store_4444_avx:
   .byte  65,128,224,7                        // and           $0x7,%r8b
   .byte  65,254,200                          // dec           %r8b
   .byte  65,128,248,6                        // cmp           $0x6,%r8b
-  .byte  119,236                             // ja            3edf <_sk_store_4444_avx+0xa3>
+  .byte  119,236                             // ja            3f53 <_sk_store_4444_avx+0xa3>
   .byte  69,15,182,192                       // movzbl        %r8b,%r8d
-  .byte  76,141,13,66,0,0,0                  // lea           0x42(%rip),%r9        # 3f40 <_sk_store_4444_avx+0x104>
+  .byte  76,141,13,66,0,0,0                  // lea           0x42(%rip),%r9        # 3fb4 <_sk_store_4444_avx+0x104>
   .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
   .byte  76,1,200                            // add           %r9,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17817,7 +17850,7 @@ _sk_store_4444_avx:
   .byte  196,67,121,21,68,122,4,2            // vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   .byte  196,67,121,21,68,122,2,1            // vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   .byte  196,67,121,21,4,122,0               // vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  .byte  235,159                             // jmp           3edf <_sk_store_4444_avx+0xa3>
+  .byte  235,159                             // jmp           3f53 <_sk_store_4444_avx+0xa3>
   .byte  247,255                             // idiv          %edi
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
@@ -17845,53 +17878,87 @@ HIDDEN _sk_load_8888_avx
 .globl _sk_load_8888_avx
 FUNCTION(_sk_load_8888_avx)
 _sk_load_8888_avx:
-  .byte  73,137,200                          // mov           %rcx,%r8
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
-  .byte  76,3,8                              // add           (%rax),%r9
-  .byte  77,133,192                          // test          %r8,%r8
-  .byte  15,133,137,0,0,0                    // jne           3ffe <_sk_load_8888_avx+0xa2>
-  .byte  196,193,124,16,25                   // vmovups       (%r9),%ymm3
-  .byte  197,124,40,21,94,40,0,0             // vmovaps       0x285e(%rip),%ymm10        # 67e0 <_sk_callback_avx+0x5f9>
-  .byte  196,193,100,84,194                  // vandps        %ymm10,%ymm3,%ymm0
+  .byte  76,139,16                           // mov           (%rax),%r10
+  .byte  72,133,201                          // test          %rcx,%rcx
+  .byte  15,133,135,0,0,0                    // jne           4065 <_sk_load_8888_avx+0x95>
+  .byte  196,65,124,16,12,186                // vmovups       (%r10,%rdi,4),%ymm9
+  .byte  197,124,40,21,148,40,0,0            // vmovaps       0x2894(%rip),%ymm10        # 6880 <_sk_callback_avx+0x57d>
+  .byte  196,193,52,84,194                   // vandps        %ymm10,%ymm9,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,5,104,38,0,0          // vbroadcastss  0x2668(%rip),%ymm8        # 65fc <_sk_callback_avx+0x415>
+  .byte  196,98,125,24,5,26,39,0,0           // vbroadcastss  0x271a(%rip),%ymm8        # 6718 <_sk_callback_avx+0x415>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  197,241,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm1
-  .byte  196,195,125,25,217,1                // vextractf128  $0x1,%ymm3,%xmm9
-  .byte  196,193,105,114,209,8               // vpsrld        $0x8,%xmm9,%xmm2
+  .byte  196,193,113,114,209,8               // vpsrld        $0x8,%xmm9,%xmm1
+  .byte  196,99,125,25,203,1                 // vextractf128  $0x1,%ymm9,%xmm3
+  .byte  197,233,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm2
   .byte  196,227,117,24,202,1                // vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   .byte  196,193,116,84,202                  // vandps        %ymm10,%ymm1,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  197,161,114,211,16                  // vpsrld        $0x10,%xmm3,%xmm11
-  .byte  196,193,105,114,209,16              // vpsrld        $0x10,%xmm9,%xmm2
+  .byte  196,193,33,114,209,16               // vpsrld        $0x10,%xmm9,%xmm11
+  .byte  197,233,114,211,16                  // vpsrld        $0x10,%xmm3,%xmm2
   .byte  196,227,37,24,210,1                 // vinsertf128   $0x1,%xmm2,%ymm11,%ymm2
   .byte  196,193,108,84,210                  // vandps        %ymm10,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  197,169,114,211,24                  // vpsrld        $0x18,%xmm3,%xmm10
-  .byte  196,193,97,114,209,24               // vpsrld        $0x18,%xmm9,%xmm3
-  .byte  196,227,45,24,219,1                 // vinsertf128   $0x1,%xmm3,%ymm10,%ymm3
+  .byte  196,193,49,114,209,24               // vpsrld        $0x18,%xmm9,%xmm9
+  .byte  197,225,114,211,24                  // vpsrld        $0x18,%xmm3,%xmm3
+  .byte  196,227,53,24,219,1                 // vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  76,137,193                          // mov           %r8,%rcx
   .byte  255,224                             // jmpq          *%rax
-  .byte  185,8,0,0,0                         // mov           $0x8,%ecx
-  .byte  68,41,193                           // sub           %r8d,%ecx
-  .byte  192,225,3                           // shl           $0x3,%cl
-  .byte  72,199,192,255,255,255,255          // mov           $0xffffffffffffffff,%rax
-  .byte  72,211,232                          // shr           %cl,%rax
-  .byte  196,225,249,110,192                 // vmovq         %rax,%xmm0
-  .byte  196,226,121,48,192                  // vpmovzxbw     %xmm0,%xmm0
-  .byte  196,226,121,0,13,58,39,0,0          // vpshufb       0x273a(%rip),%xmm0,%xmm1        # 6760 <_sk_callback_avx+0x579>
-  .byte  196,226,121,33,201                  // vpmovsxbd     %xmm1,%xmm1
-  .byte  196,226,121,0,5,60,39,0,0           // vpshufb       0x273c(%rip),%xmm0,%xmm0        # 6770 <_sk_callback_avx+0x589>
-  .byte  196,226,121,33,192                  // vpmovsxbd     %xmm0,%xmm0
-  .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
-  .byte  196,194,125,44,25                   // vmaskmovps    (%r9),%ymm0,%ymm3
-  .byte  233,49,255,255,255                  // jmpq          3f7a <_sk_load_8888_avx+0x1e>
+  .byte  65,137,200                          // mov           %ecx,%r8d
+  .byte  65,128,224,7                        // and           $0x7,%r8b
+  .byte  196,65,52,87,201                    // vxorps        %ymm9,%ymm9,%ymm9
+  .byte  65,254,200                          // dec           %r8b
+  .byte  65,128,248,6                        // cmp           $0x6,%r8b
+  .byte  15,135,102,255,255,255              // ja            3fe4 <_sk_load_8888_avx+0x14>
+  .byte  69,15,182,192                       // movzbl        %r8b,%r8d
+  .byte  76,141,13,139,0,0,0                 // lea           0x8b(%rip),%r9        # 4114 <_sk_load_8888_avx+0x144>
+  .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
+  .byte  76,1,200                            // add           %r9,%rax
+  .byte  255,224                             // jmpq          *%rax
+  .byte  196,193,121,110,68,186,24           // vmovd         0x18(%r10,%rdi,4),%xmm0
+  .byte  197,249,112,192,68                  // vpshufd       $0x44,%xmm0,%xmm0
+  .byte  196,227,125,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
+  .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
+  .byte  196,99,117,12,200,64                // vblendps      $0x40,%ymm0,%ymm1,%ymm9
+  .byte  196,99,125,25,200,1                 // vextractf128  $0x1,%ymm9,%xmm0
+  .byte  196,195,121,34,68,186,20,1          // vpinsrd       $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
+  .byte  196,99,53,24,200,1                  // vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
+  .byte  196,99,125,25,200,1                 // vextractf128  $0x1,%ymm9,%xmm0
+  .byte  196,195,121,34,68,186,16,0          // vpinsrd       $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
+  .byte  196,99,53,24,200,1                  // vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
+  .byte  196,195,49,34,68,186,12,3           // vpinsrd       $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
+  .byte  196,99,53,12,200,15                 // vblendps      $0xf,%ymm0,%ymm9,%ymm9
+  .byte  196,195,49,34,68,186,8,2            // vpinsrd       $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
+  .byte  196,99,53,12,200,15                 // vblendps      $0xf,%ymm0,%ymm9,%ymm9
+  .byte  196,195,49,34,68,186,4,1            // vpinsrd       $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
+  .byte  196,99,53,12,200,15                 // vblendps      $0xf,%ymm0,%ymm9,%ymm9
+  .byte  196,195,49,34,4,186,0               // vpinsrd       $0x0,(%r10,%rdi,4),%xmm9,%xmm0
+  .byte  196,99,53,12,200,15                 // vblendps      $0xf,%ymm0,%ymm9,%ymm9
+  .byte  233,210,254,255,255                 // jmpq          3fe4 <_sk_load_8888_avx+0x14>
+  .byte  102,144                             // xchg          %ax,%ax
+  .byte  236                                 // in            (%dx),%al
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  222,255                             // fdivrp        %st,%st(7)
+  .byte  255                                 // (bad)
+  .byte  255,208                             // callq         *%rax
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255,194                             // inc           %edx
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255,174,255,255,255,154             // ljmp          *-0x65000001(%rsi)
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  126,255                             // jle           412d <_sk_load_8888_avx+0x15d>
+  .byte  255                                 // (bad)
+  .byte  255                                 // .byte         0xff
 
 HIDDEN _sk_gather_8888_avx
 .globl _sk_gather_8888_avx
@@ -17934,10 +18001,10 @@ _sk_gather_8888_avx:
   .byte  196,131,121,34,4,152,2              // vpinsrd       $0x2,(%r8,%r11,4),%xmm0,%xmm0
   .byte  196,131,121,34,28,144,3             // vpinsrd       $0x3,(%r8,%r10,4),%xmm0,%xmm3
   .byte  196,227,61,24,195,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  .byte  197,124,40,21,5,39,0,0              // vmovaps       0x2705(%rip),%ymm10        # 6800 <_sk_callback_avx+0x619>
+  .byte  197,124,40,21,190,38,0,0            // vmovaps       0x26be(%rip),%ymm10        # 68a0 <_sk_callback_avx+0x59d>
   .byte  196,193,124,84,194                  // vandps        %ymm10,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,243,36,0,0         // vbroadcastss  0x24f3(%rip),%ymm9        # 6600 <_sk_callback_avx+0x419>
+  .byte  196,98,125,24,13,40,37,0,0          // vbroadcastss  0x2528(%rip),%ymm9        # 671c <_sk_callback_avx+0x419>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,113,114,208,8               // vpsrld        $0x8,%xmm8,%xmm1
   .byte  197,233,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm2
@@ -17967,11 +18034,9 @@ HIDDEN _sk_store_8888_avx
 .globl _sk_store_8888_avx
 FUNCTION(_sk_store_8888_avx)
 _sk_store_8888_avx:
-  .byte  73,137,200                          // mov           %rcx,%r8
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
-  .byte  76,3,8                              // add           (%rax),%r9
-  .byte  196,98,125,24,5,118,36,0,0          // vbroadcastss  0x2476(%rip),%ymm8        # 6604 <_sk_callback_avx+0x41d>
+  .byte  76,139,16                           // mov           (%rax),%r10
+  .byte  196,98,125,24,5,182,36,0,0          // vbroadcastss  0x24b6(%rip),%ymm8        # 6720 <_sk_callback_avx+0x41d>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,65,116,89,208                   // vmulps        %ymm8,%ymm1,%ymm10
@@ -17995,26 +18060,56 @@ _sk_store_8888_avx:
   .byte  196,67,37,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm11,%ymm8
   .byte  196,65,45,86,192                    // vorpd         %ymm8,%ymm10,%ymm8
   .byte  196,65,53,86,192                    // vorpd         %ymm8,%ymm9,%ymm8
-  .byte  77,133,192                          // test          %r8,%r8
-  .byte  117,12                              // jne           421e <_sk_store_8888_avx+0xa9>
-  .byte  196,65,124,17,1                     // vmovups       %ymm8,(%r9)
+  .byte  72,133,201                          // test          %rcx,%rcx
+  .byte  117,10                              // jne           42f8 <_sk_store_8888_avx+0x9c>
+  .byte  196,65,124,17,4,186                 // vmovups       %ymm8,(%r10,%rdi,4)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  76,137,193                          // mov           %r8,%rcx
   .byte  255,224                             // jmpq          *%rax
-  .byte  185,8,0,0,0                         // mov           $0x8,%ecx
-  .byte  68,41,193                           // sub           %r8d,%ecx
-  .byte  192,225,3                           // shl           $0x3,%cl
-  .byte  72,199,192,255,255,255,255          // mov           $0xffffffffffffffff,%rax
-  .byte  72,211,232                          // shr           %cl,%rax
-  .byte  196,97,249,110,200                  // vmovq         %rax,%xmm9
-  .byte  196,66,121,48,201                   // vpmovzxbw     %xmm9,%xmm9
-  .byte  196,98,49,0,21,58,37,0,0            // vpshufb       0x253a(%rip),%xmm9,%xmm10        # 6780 <_sk_callback_avx+0x599>
-  .byte  196,66,121,33,210                   // vpmovsxbd     %xmm10,%xmm10
-  .byte  196,98,49,0,13,60,37,0,0            // vpshufb       0x253c(%rip),%xmm9,%xmm9        # 6790 <_sk_callback_avx+0x5a9>
-  .byte  196,66,121,33,201                   // vpmovsxbd     %xmm9,%xmm9
-  .byte  196,67,45,24,201,1                  // vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
-  .byte  196,66,53,46,1                      // vmaskmovps    %ymm8,%ymm9,(%r9)
-  .byte  235,177                             // jmp           4217 <_sk_store_8888_avx+0xa2>
+  .byte  65,137,200                          // mov           %ecx,%r8d
+  .byte  65,128,224,7                        // and           $0x7,%r8b
+  .byte  65,254,200                          // dec           %r8b
+  .byte  65,128,248,6                        // cmp           $0x6,%r8b
+  .byte  119,236                             // ja            42f4 <_sk_store_8888_avx+0x98>
+  .byte  69,15,182,192                       // movzbl        %r8b,%r8d
+  .byte  76,141,13,85,0,0,0                  // lea           0x55(%rip),%r9        # 4368 <_sk_store_8888_avx+0x10c>
+  .byte  75,99,4,129                         // movslq        (%r9,%r8,4),%rax
+  .byte  76,1,200                            // add           %r9,%rax
+  .byte  255,224                             // jmpq          *%rax
+  .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
+  .byte  196,67,121,22,76,186,24,2           // vpextrd       $0x2,%xmm9,0x18(%r10,%rdi,4)
+  .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
+  .byte  196,67,121,22,76,186,20,1           // vpextrd       $0x1,%xmm9,0x14(%r10,%rdi,4)
+  .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
+  .byte  196,65,122,17,76,186,16             // vmovss        %xmm9,0x10(%r10,%rdi,4)
+  .byte  196,67,121,22,68,186,12,3           // vpextrd       $0x3,%xmm8,0xc(%r10,%rdi,4)
+  .byte  196,67,121,22,68,186,8,2            // vpextrd       $0x2,%xmm8,0x8(%r10,%rdi,4)
+  .byte  196,67,121,22,68,186,4,1            // vpextrd       $0x1,%xmm8,0x4(%r10,%rdi,4)
+  .byte  196,65,121,126,4,186                // vmovd         %xmm8,(%r10,%rdi,4)
+  .byte  235,143                             // jmp           42f4 <_sk_store_8888_avx+0x98>
+  .byte  15,31,0                             // nopl          (%rax)
+  .byte  245                                 // cmc
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  237                                 // in            (%dx),%eax
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255,229                             // jmpq          *%rbp
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  221,255                             // (bad)
+  .byte  255                                 // (bad)
+  .byte  255,208                             // callq         *%rax
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255,194                             // inc           %edx
+  .byte  255                                 // (bad)
+  .byte  255                                 // (bad)
+  .byte  255                                 // .byte         0xff
+  .byte  180,255                             // mov           $0xff,%ah
+  .byte  255                                 // (bad)
+  .byte  255                                 // .byte         0xff
 
 HIDDEN _sk_load_f16_avx
 .globl _sk_load_f16_avx
@@ -18028,7 +18123,7 @@ _sk_load_f16_avx:
   .byte  197,252,17,116,36,192               // vmovups       %ymm6,-0x40(%rsp)
   .byte  197,252,17,108,36,160               // vmovups       %ymm5,-0x60(%rsp)
   .byte  197,254,127,100,36,128              // vmovdqu       %ymm4,-0x80(%rsp)
-  .byte  15,133,141,2,0,0                    // jne           451d <_sk_load_f16_avx+0x2b7>
+  .byte  15,133,141,2,0,0                    // jne           463b <_sk_load_f16_avx+0x2b7>
   .byte  197,121,16,4,248                    // vmovupd       (%rax,%rdi,8),%xmm8
   .byte  197,249,16,84,248,16                // vmovupd       0x10(%rax,%rdi,8),%xmm2
   .byte  197,249,16,76,248,32                // vmovupd       0x20(%rax,%rdi,8),%xmm1
@@ -18046,13 +18141,13 @@ _sk_load_f16_avx:
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
-  .byte  196,98,125,24,37,31,35,0,0          // vbroadcastss  0x231f(%rip),%ymm12        # 6608 <_sk_callback_avx+0x421>
+  .byte  196,98,125,24,37,29,35,0,0          // vbroadcastss  0x231d(%rip),%ymm12        # 6724 <_sk_callback_avx+0x421>
   .byte  196,193,124,84,204                  // vandps        %ymm12,%ymm0,%ymm1
   .byte  197,252,87,193                      // vxorps        %ymm1,%ymm0,%ymm0
   .byte  196,195,125,25,198,1                // vextractf128  $0x1,%ymm0,%xmm14
-  .byte  196,98,121,24,29,11,35,0,0          // vbroadcastss  0x230b(%rip),%xmm11        # 660c <_sk_callback_avx+0x425>
+  .byte  196,98,121,24,29,9,35,0,0           // vbroadcastss  0x2309(%rip),%xmm11        # 6728 <_sk_callback_avx+0x425>
   .byte  196,193,8,87,219                    // vxorps        %xmm11,%xmm14,%xmm3
-  .byte  196,98,121,24,45,1,35,0,0           // vbroadcastss  0x2301(%rip),%xmm13        # 6610 <_sk_callback_avx+0x429>
+  .byte  196,98,121,24,45,255,34,0,0         // vbroadcastss  0x22ff(%rip),%xmm13        # 672c <_sk_callback_avx+0x429>
   .byte  197,145,102,219                     // vpcmpgtd      %xmm3,%xmm13,%xmm3
   .byte  196,65,120,87,211                   // vxorps        %xmm11,%xmm0,%xmm10
   .byte  196,65,17,102,210                   // vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -18066,7 +18161,7 @@ _sk_load_f16_avx:
   .byte  196,227,125,24,195,1                // vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   .byte  197,252,86,193                      // vorps         %ymm1,%ymm0,%ymm0
   .byte  196,227,125,25,193,1                // vextractf128  $0x1,%ymm0,%xmm1
-  .byte  196,226,121,24,29,183,34,0,0        // vbroadcastss  0x22b7(%rip),%xmm3        # 6614 <_sk_callback_avx+0x42d>
+  .byte  196,226,121,24,29,181,34,0,0        // vbroadcastss  0x22b5(%rip),%xmm3        # 6730 <_sk_callback_avx+0x42d>
   .byte  197,241,254,203                     // vpaddd        %xmm3,%xmm1,%xmm1
   .byte  197,249,254,195                     // vpaddd        %xmm3,%xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
@@ -18159,29 +18254,29 @@ _sk_load_f16_avx:
   .byte  197,123,16,4,248                    // vmovsd        (%rax,%rdi,8),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,79                              // je            457c <_sk_load_f16_avx+0x316>
+  .byte  116,79                              // je            469a <_sk_load_f16_avx+0x316>
   .byte  197,57,22,68,248,8                  // vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,67                              // jb            457c <_sk_load_f16_avx+0x316>
+  .byte  114,67                              // jb            469a <_sk_load_f16_avx+0x316>
   .byte  197,251,16,84,248,16                // vmovsd        0x10(%rax,%rdi,8),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,68                              // je            4589 <_sk_load_f16_avx+0x323>
+  .byte  116,68                              // je            46a7 <_sk_load_f16_avx+0x323>
   .byte  197,233,22,84,248,24                // vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,56                              // jb            4589 <_sk_load_f16_avx+0x323>
+  .byte  114,56                              // jb            46a7 <_sk_load_f16_avx+0x323>
   .byte  197,251,16,76,248,32                // vmovsd        0x20(%rax,%rdi,8),%xmm1
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,70,253,255,255               // je            42a7 <_sk_load_f16_avx+0x41>
+  .byte  15,132,70,253,255,255               // je            43c5 <_sk_load_f16_avx+0x41>
   .byte  197,241,22,76,248,40                // vmovhpd       0x28(%rax,%rdi,8),%xmm1,%xmm1
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,54,253,255,255               // jb            42a7 <_sk_load_f16_avx+0x41>
+  .byte  15,130,54,253,255,255               // jb            43c5 <_sk_load_f16_avx+0x41>
   .byte  197,122,126,76,248,48               // vmovq         0x30(%rax,%rdi,8),%xmm9
-  .byte  233,43,253,255,255                  // jmpq          42a7 <_sk_load_f16_avx+0x41>
+  .byte  233,43,253,255,255                  // jmpq          43c5 <_sk_load_f16_avx+0x41>
   .byte  197,241,87,201                      // vxorpd        %xmm1,%xmm1,%xmm1
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,30,253,255,255                  // jmpq          42a7 <_sk_load_f16_avx+0x41>
+  .byte  233,30,253,255,255                  // jmpq          43c5 <_sk_load_f16_avx+0x41>
   .byte  197,241,87,201                      // vxorpd        %xmm1,%xmm1,%xmm1
-  .byte  233,21,253,255,255                  // jmpq          42a7 <_sk_load_f16_avx+0x41>
+  .byte  233,21,253,255,255                  // jmpq          43c5 <_sk_load_f16_avx+0x41>
 
 HIDDEN _sk_gather_f16_avx
 .globl _sk_gather_f16_avx
@@ -18245,13 +18340,13 @@ _sk_gather_f16_avx:
   .byte  197,249,105,210                     // vpunpckhwd    %xmm2,%xmm0,%xmm2
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,194,1                // vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
-  .byte  196,98,125,24,37,123,31,0,0         // vbroadcastss  0x1f7b(%rip),%ymm12        # 6618 <_sk_callback_avx+0x431>
+  .byte  196,98,125,24,37,121,31,0,0         // vbroadcastss  0x1f79(%rip),%ymm12        # 6734 <_sk_callback_avx+0x431>
   .byte  196,193,124,84,212                  // vandps        %ymm12,%ymm0,%ymm2
   .byte  197,252,87,194                      // vxorps        %ymm2,%ymm0,%ymm0
   .byte  196,195,125,25,198,1                // vextractf128  $0x1,%ymm0,%xmm14
-  .byte  196,98,121,24,29,103,31,0,0         // vbroadcastss  0x1f67(%rip),%xmm11        # 661c <_sk_callback_avx+0x435>
+  .byte  196,98,121,24,29,101,31,0,0         // vbroadcastss  0x1f65(%rip),%xmm11        # 6738 <_sk_callback_avx+0x435>
   .byte  196,193,8,87,219                    // vxorps        %xmm11,%xmm14,%xmm3
-  .byte  196,98,121,24,45,93,31,0,0          // vbroadcastss  0x1f5d(%rip),%xmm13        # 6620 <_sk_callback_avx+0x439>
+  .byte  196,98,121,24,45,91,31,0,0          // vbroadcastss  0x1f5b(%rip),%xmm13        # 673c <_sk_callback_avx+0x439>
   .byte  197,145,102,219                     // vpcmpgtd      %xmm3,%xmm13,%xmm3
   .byte  196,65,120,87,211                   // vxorps        %xmm11,%xmm0,%xmm10
   .byte  196,65,17,102,210                   // vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -18265,7 +18360,7 @@ _sk_gather_f16_avx:
   .byte  196,227,125,24,195,1                // vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   .byte  197,252,86,194                      // vorps         %ymm2,%ymm0,%ymm0
   .byte  196,227,125,25,194,1                // vextractf128  $0x1,%ymm0,%xmm2
-  .byte  196,226,121,24,29,19,31,0,0         // vbroadcastss  0x1f13(%rip),%xmm3        # 6624 <_sk_callback_avx+0x43d>
+  .byte  196,226,121,24,29,17,31,0,0         // vbroadcastss  0x1f11(%rip),%xmm3        # 6740 <_sk_callback_avx+0x43d>
   .byte  197,233,254,211                     // vpaddd        %xmm3,%xmm2,%xmm2
   .byte  197,249,254,195                     // vpaddd        %xmm3,%xmm0,%xmm0
   .byte  196,227,125,24,194,1                // vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
@@ -18369,12 +18464,12 @@ _sk_store_f16_avx:
   .byte  197,252,17,52,36                    // vmovups       %ymm6,(%rsp)
   .byte  197,252,17,108,36,224               // vmovups       %ymm5,-0x20(%rsp)
   .byte  197,252,17,100,36,192               // vmovups       %ymm4,-0x40(%rsp)
-  .byte  196,98,125,24,13,44,29,0,0          // vbroadcastss  0x1d2c(%rip),%ymm9        # 6628 <_sk_callback_avx+0x441>
+  .byte  196,98,125,24,13,42,29,0,0          // vbroadcastss  0x1d2a(%rip),%ymm9        # 6744 <_sk_callback_avx+0x441>
   .byte  196,65,124,84,209                   // vandps        %ymm9,%ymm0,%ymm10
   .byte  197,252,17,68,36,128                // vmovups       %ymm0,-0x80(%rsp)
   .byte  196,65,124,87,218                   // vxorps        %ymm10,%ymm0,%ymm11
   .byte  196,67,125,25,220,1                 // vextractf128  $0x1,%ymm11,%xmm12
-  .byte  196,98,121,24,5,17,29,0,0           // vbroadcastss  0x1d11(%rip),%xmm8        # 662c <_sk_callback_avx+0x445>
+  .byte  196,98,121,24,5,15,29,0,0           // vbroadcastss  0x1d0f(%rip),%xmm8        # 6748 <_sk_callback_avx+0x445>
   .byte  196,65,57,102,236                   // vpcmpgtd      %xmm12,%xmm8,%xmm13
   .byte  196,65,57,102,243                   // vpcmpgtd      %xmm11,%xmm8,%xmm14
   .byte  196,67,13,24,237,1                  // vinsertf128   $0x1,%xmm13,%ymm14,%ymm13
@@ -18384,7 +18479,7 @@ _sk_store_f16_avx:
   .byte  196,67,13,24,242,1                  // vinsertf128   $0x1,%xmm10,%ymm14,%ymm14
   .byte  196,193,33,114,211,13               // vpsrld        $0xd,%xmm11,%xmm11
   .byte  196,193,25,114,212,13               // vpsrld        $0xd,%xmm12,%xmm12
-  .byte  196,98,125,24,21,216,28,0,0         // vbroadcastss  0x1cd8(%rip),%ymm10        # 6630 <_sk_callback_avx+0x449>
+  .byte  196,98,125,24,21,214,28,0,0         // vbroadcastss  0x1cd6(%rip),%ymm10        # 674c <_sk_callback_avx+0x449>
   .byte  196,65,12,86,242                    // vorps         %ymm10,%ymm14,%ymm14
   .byte  196,67,125,25,247,1                 // vextractf128  $0x1,%ymm14,%xmm15
   .byte  196,65,1,254,228                    // vpaddd        %xmm12,%xmm15,%xmm12
@@ -18466,7 +18561,7 @@ _sk_store_f16_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,66                              // jne           4b36 <_sk_store_f16_avx+0x25e>
+  .byte  117,66                              // jne           4c54 <_sk_store_f16_avx+0x25e>
   .byte  197,120,17,28,248                   // vmovups       %xmm11,(%rax,%rdi,8)
   .byte  197,120,17,84,248,16                // vmovups       %xmm10,0x10(%rax,%rdi,8)
   .byte  197,120,17,76,248,32                // vmovups       %xmm9,0x20(%rax,%rdi,8)
@@ -18482,22 +18577,22 @@ _sk_store_f16_avx:
   .byte  255,224                             // jmpq          *%rax
   .byte  197,121,214,28,248                  // vmovq         %xmm11,(%rax,%rdi,8)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,202                             // je            4b0b <_sk_store_f16_avx+0x233>
+  .byte  116,202                             // je            4c29 <_sk_store_f16_avx+0x233>
   .byte  197,121,23,92,248,8                 // vmovhpd       %xmm11,0x8(%rax,%rdi,8)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,190                             // jb            4b0b <_sk_store_f16_avx+0x233>
+  .byte  114,190                             // jb            4c29 <_sk_store_f16_avx+0x233>
   .byte  197,121,214,84,248,16               // vmovq         %xmm10,0x10(%rax,%rdi,8)
-  .byte  116,182                             // je            4b0b <_sk_store_f16_avx+0x233>
+  .byte  116,182                             // je            4c29 <_sk_store_f16_avx+0x233>
   .byte  197,121,23,84,248,24                // vmovhpd       %xmm10,0x18(%rax,%rdi,8)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,170                             // jb            4b0b <_sk_store_f16_avx+0x233>
+  .byte  114,170                             // jb            4c29 <_sk_store_f16_avx+0x233>
   .byte  197,121,214,76,248,32               // vmovq         %xmm9,0x20(%rax,%rdi,8)
-  .byte  116,162                             // je            4b0b <_sk_store_f16_avx+0x233>
+  .byte  116,162                             // je            4c29 <_sk_store_f16_avx+0x233>
   .byte  197,121,23,76,248,40                // vmovhpd       %xmm9,0x28(%rax,%rdi,8)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,150                             // jb            4b0b <_sk_store_f16_avx+0x233>
+  .byte  114,150                             // jb            4c29 <_sk_store_f16_avx+0x233>
   .byte  197,121,214,68,248,48               // vmovq         %xmm8,0x30(%rax,%rdi,8)
-  .byte  235,142                             // jmp           4b0b <_sk_store_f16_avx+0x233>
+  .byte  235,142                             // jmp           4c29 <_sk_store_f16_avx+0x233>
 
 HIDDEN _sk_load_u16_be_avx
 .globl _sk_load_u16_be_avx
@@ -18507,7 +18602,7 @@ _sk_load_u16_be_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,189,0,0,0,0                // lea           0x0(,%rdi,4),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,253,0,0,0                    // jne           4c90 <_sk_load_u16_be_avx+0x113>
+  .byte  15,133,253,0,0,0                    // jne           4dae <_sk_load_u16_be_avx+0x113>
   .byte  196,65,121,16,4,64                  // vmovupd       (%r8,%rax,2),%xmm8
   .byte  196,193,121,16,84,64,16             // vmovupd       0x10(%r8,%rax,2),%xmm2
   .byte  196,193,121,16,92,64,32             // vmovupd       0x20(%r8,%rax,2),%xmm3
@@ -18529,7 +18624,7 @@ _sk_load_u16_be_avx:
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,29,48,26,0,0          // vbroadcastss  0x1a30(%rip),%ymm11        # 6634 <_sk_callback_avx+0x44d>
+  .byte  196,98,125,24,29,46,26,0,0          // vbroadcastss  0x1a2e(%rip),%ymm11        # 6750 <_sk_callback_avx+0x44d>
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,177,109,202                     // vpunpckhqdq   %xmm2,%xmm9,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -18563,29 +18658,29 @@ _sk_load_u16_be_avx:
   .byte  196,65,123,16,4,64                  // vmovsd        (%r8,%rax,2),%xmm8
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,85                              // je            4cf6 <_sk_load_u16_be_avx+0x179>
+  .byte  116,85                              // je            4e14 <_sk_load_u16_be_avx+0x179>
   .byte  196,65,57,22,68,64,8                // vmovhpd       0x8(%r8,%rax,2),%xmm8,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,72                              // jb            4cf6 <_sk_load_u16_be_avx+0x179>
+  .byte  114,72                              // jb            4e14 <_sk_load_u16_be_avx+0x179>
   .byte  196,193,123,16,84,64,16             // vmovsd        0x10(%r8,%rax,2),%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  116,72                              // je            4d03 <_sk_load_u16_be_avx+0x186>
+  .byte  116,72                              // je            4e21 <_sk_load_u16_be_avx+0x186>
   .byte  196,193,105,22,84,64,24             // vmovhpd       0x18(%r8,%rax,2),%xmm2,%xmm2
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,59                              // jb            4d03 <_sk_load_u16_be_avx+0x186>
+  .byte  114,59                              // jb            4e21 <_sk_load_u16_be_avx+0x186>
   .byte  196,193,123,16,92,64,32             // vmovsd        0x20(%r8,%rax,2),%xmm3
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  15,132,213,254,255,255              // je            4bae <_sk_load_u16_be_avx+0x31>
+  .byte  15,132,213,254,255,255              // je            4ccc <_sk_load_u16_be_avx+0x31>
   .byte  196,193,97,22,92,64,40              // vmovhpd       0x28(%r8,%rax,2),%xmm3,%xmm3
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  15,130,196,254,255,255              // jb            4bae <_sk_load_u16_be_avx+0x31>
+  .byte  15,130,196,254,255,255              // jb            4ccc <_sk_load_u16_be_avx+0x31>
   .byte  196,65,122,126,76,64,48             // vmovq         0x30(%r8,%rax,2),%xmm9
-  .byte  233,184,254,255,255                 // jmpq          4bae <_sk_load_u16_be_avx+0x31>
+  .byte  233,184,254,255,255                 // jmpq          4ccc <_sk_load_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
   .byte  197,233,87,210                      // vxorpd        %xmm2,%xmm2,%xmm2
-  .byte  233,171,254,255,255                 // jmpq          4bae <_sk_load_u16_be_avx+0x31>
+  .byte  233,171,254,255,255                 // jmpq          4ccc <_sk_load_u16_be_avx+0x31>
   .byte  197,225,87,219                      // vxorpd        %xmm3,%xmm3,%xmm3
-  .byte  233,162,254,255,255                 // jmpq          4bae <_sk_load_u16_be_avx+0x31>
+  .byte  233,162,254,255,255                 // jmpq          4ccc <_sk_load_u16_be_avx+0x31>
 
 HIDDEN _sk_load_rgb_u16_be_avx
 .globl _sk_load_rgb_u16_be_avx
@@ -18595,7 +18690,7 @@ _sk_load_rgb_u16_be_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,127                        // lea           (%rdi,%rdi,2),%rax
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  15,133,243,0,0,0                    // jne           4e11 <_sk_load_rgb_u16_be_avx+0x105>
+  .byte  15,133,243,0,0,0                    // jne           4f2f <_sk_load_rgb_u16_be_avx+0x105>
   .byte  196,193,122,111,4,64                // vmovdqu       (%r8,%rax,2),%xmm0
   .byte  196,193,122,111,84,64,12            // vmovdqu       0xc(%r8,%rax,2),%xmm2
   .byte  196,193,122,111,76,64,24            // vmovdqu       0x18(%r8,%rax,2),%xmm1
@@ -18622,7 +18717,7 @@ _sk_load_rgb_u16_be_avx:
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,29,144,24,0,0         // vbroadcastss  0x1890(%rip),%ymm11        # 6638 <_sk_callback_avx+0x451>
+  .byte  196,98,125,24,29,142,24,0,0         // vbroadcastss  0x188e(%rip),%ymm11        # 6754 <_sk_callback_avx+0x451>
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,185,109,202                     // vpunpckhqdq   %xmm2,%xmm8,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -18643,41 +18738,41 @@ _sk_load_rgb_u16_be_avx:
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,211                  // vmulps        %ymm11,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,45,24,0,0         // vbroadcastss  0x182d(%rip),%ymm3        # 663c <_sk_callback_avx+0x455>
+  .byte  196,226,125,24,29,43,24,0,0         // vbroadcastss  0x182b(%rip),%ymm3        # 6758 <_sk_callback_avx+0x455>
   .byte  255,224                             // jmpq          *%rax
   .byte  196,193,121,110,4,64                // vmovd         (%r8,%rax,2),%xmm0
   .byte  196,193,121,196,68,64,4,2           // vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  117,5                               // jne           4e2a <_sk_load_rgb_u16_be_avx+0x11e>
-  .byte  233,40,255,255,255                  // jmpq          4d52 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  117,5                               // jne           4f48 <_sk_load_rgb_u16_be_avx+0x11e>
+  .byte  233,40,255,255,255                  // jmpq          4e70 <_sk_load_rgb_u16_be_avx+0x46>
   .byte  196,193,121,110,76,64,6             // vmovd         0x6(%r8,%rax,2),%xmm1
   .byte  196,65,113,196,68,64,10,2           // vpinsrw       $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,26                              // jb            4e59 <_sk_load_rgb_u16_be_avx+0x14d>
+  .byte  114,26                              // jb            4f77 <_sk_load_rgb_u16_be_avx+0x14d>
   .byte  196,193,121,110,76,64,12            // vmovd         0xc(%r8,%rax,2),%xmm1
   .byte  196,193,113,196,84,64,16,2          // vpinsrw       $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  117,10                              // jne           4e5e <_sk_load_rgb_u16_be_avx+0x152>
-  .byte  233,249,254,255,255                 // jmpq          4d52 <_sk_load_rgb_u16_be_avx+0x46>
-  .byte  233,244,254,255,255                 // jmpq          4d52 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           4f7c <_sk_load_rgb_u16_be_avx+0x152>
+  .byte  233,249,254,255,255                 // jmpq          4e70 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,244,254,255,255                 // jmpq          4e70 <_sk_load_rgb_u16_be_avx+0x46>
   .byte  196,193,121,110,76,64,18            // vmovd         0x12(%r8,%rax,2),%xmm1
   .byte  196,65,113,196,76,64,22,2           // vpinsrw       $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,26                              // jb            4e8d <_sk_load_rgb_u16_be_avx+0x181>
+  .byte  114,26                              // jb            4fab <_sk_load_rgb_u16_be_avx+0x181>
   .byte  196,193,121,110,76,64,24            // vmovd         0x18(%r8,%rax,2),%xmm1
   .byte  196,193,113,196,76,64,28,2          // vpinsrw       $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  117,10                              // jne           4e92 <_sk_load_rgb_u16_be_avx+0x186>
-  .byte  233,197,254,255,255                 // jmpq          4d52 <_sk_load_rgb_u16_be_avx+0x46>
-  .byte  233,192,254,255,255                 // jmpq          4d52 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  117,10                              // jne           4fb0 <_sk_load_rgb_u16_be_avx+0x186>
+  .byte  233,197,254,255,255                 // jmpq          4e70 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,192,254,255,255                 // jmpq          4e70 <_sk_load_rgb_u16_be_avx+0x46>
   .byte  196,193,121,110,92,64,30            // vmovd         0x1e(%r8,%rax,2),%xmm3
   .byte  196,65,97,196,92,64,34,2            // vpinsrw       $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,20                              // jb            4ebb <_sk_load_rgb_u16_be_avx+0x1af>
+  .byte  114,20                              // jb            4fd9 <_sk_load_rgb_u16_be_avx+0x1af>
   .byte  196,193,121,110,92,64,36            // vmovd         0x24(%r8,%rax,2),%xmm3
   .byte  196,193,97,196,92,64,40,2           // vpinsrw       $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
-  .byte  233,151,254,255,255                 // jmpq          4d52 <_sk_load_rgb_u16_be_avx+0x46>
-  .byte  233,146,254,255,255                 // jmpq          4d52 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,151,254,255,255                 // jmpq          4e70 <_sk_load_rgb_u16_be_avx+0x46>
+  .byte  233,146,254,255,255                 // jmpq          4e70 <_sk_load_rgb_u16_be_avx+0x46>
 
 HIDDEN _sk_store_u16_be_avx
 .globl _sk_store_u16_be_avx
@@ -18686,7 +18781,7 @@ _sk_store_u16_be_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,189,0,0,0,0                // lea           0x0(,%rdi,4),%rax
-  .byte  196,98,125,24,5,106,23,0,0          // vbroadcastss  0x176a(%rip),%ymm8        # 6640 <_sk_callback_avx+0x459>
+  .byte  196,98,125,24,5,104,23,0,0          // vbroadcastss  0x1768(%rip),%ymm8        # 675c <_sk_callback_avx+0x459>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,67,125,25,202,1                 // vextractf128  $0x1,%ymm9,%xmm10
@@ -18724,7 +18819,7 @@ _sk_store_u16_be_avx:
   .byte  196,65,17,98,200                    // vpunpckldq    %xmm8,%xmm13,%xmm9
   .byte  196,65,17,106,192                   // vpunpckhdq    %xmm8,%xmm13,%xmm8
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,31                              // jne           4fba <_sk_store_u16_be_avx+0xfa>
+  .byte  117,31                              // jne           50d8 <_sk_store_u16_be_avx+0xfa>
   .byte  196,65,120,17,28,64                 // vmovups       %xmm11,(%r8,%rax,2)
   .byte  196,65,120,17,84,64,16              // vmovups       %xmm10,0x10(%r8,%rax,2)
   .byte  196,65,120,17,76,64,32              // vmovups       %xmm9,0x20(%r8,%rax,2)
@@ -18733,22 +18828,22 @@ _sk_store_u16_be_avx:
   .byte  255,224                             // jmpq          *%rax
   .byte  196,65,121,214,28,64                // vmovq         %xmm11,(%r8,%rax,2)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,240                             // je            4fb6 <_sk_store_u16_be_avx+0xf6>
+  .byte  116,240                             // je            50d4 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,23,92,64,8               // vmovhpd       %xmm11,0x8(%r8,%rax,2)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,227                             // jb            4fb6 <_sk_store_u16_be_avx+0xf6>
+  .byte  114,227                             // jb            50d4 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,214,84,64,16             // vmovq         %xmm10,0x10(%r8,%rax,2)
-  .byte  116,218                             // je            4fb6 <_sk_store_u16_be_avx+0xf6>
+  .byte  116,218                             // je            50d4 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,23,84,64,24              // vmovhpd       %xmm10,0x18(%r8,%rax,2)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,205                             // jb            4fb6 <_sk_store_u16_be_avx+0xf6>
+  .byte  114,205                             // jb            50d4 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,214,76,64,32             // vmovq         %xmm9,0x20(%r8,%rax,2)
-  .byte  116,196                             // je            4fb6 <_sk_store_u16_be_avx+0xf6>
+  .byte  116,196                             // je            50d4 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,23,76,64,40              // vmovhpd       %xmm9,0x28(%r8,%rax,2)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,183                             // jb            4fb6 <_sk_store_u16_be_avx+0xf6>
+  .byte  114,183                             // jb            50d4 <_sk_store_u16_be_avx+0xf6>
   .byte  196,65,121,214,68,64,48             // vmovq         %xmm8,0x30(%r8,%rax,2)
-  .byte  235,174                             // jmp           4fb6 <_sk_store_u16_be_avx+0xf6>
+  .byte  235,174                             // jmp           50d4 <_sk_store_u16_be_avx+0xf6>
 
 HIDDEN _sk_load_f32_avx
 .globl _sk_load_f32_avx
@@ -18756,10 +18851,10 @@ FUNCTION(_sk_load_f32_avx)
 _sk_load_f32_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  119,110                             // ja            507e <_sk_load_f32_avx+0x76>
+  .byte  119,110                             // ja            519c <_sk_load_f32_avx+0x76>
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
-  .byte  76,141,21,134,0,0,0                 // lea           0x86(%rip),%r10        # 50a8 <_sk_load_f32_avx+0xa0>
+  .byte  76,141,21,132,0,0,0                 // lea           0x84(%rip),%r10        # 51c4 <_sk_load_f32_avx+0x9e>
   .byte  73,99,4,138                         // movslq        (%r10,%rcx,4),%rax
   .byte  76,1,208                            // add           %r10,%rax
   .byte  255,224                             // jmpq          *%rax
@@ -18785,19 +18880,19 @@ _sk_load_f32_avx:
   .byte  196,193,101,21,216                  // vunpckhpd     %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
-  .byte  102,144                             // xchg          %ax,%ax
-  .byte  131,255,255                         // cmp           $0xffffffff,%edi
-  .byte  255,202                             // dec           %edx
+  .byte  133,255                             // test          %edi,%edi
+  .byte  255                                 // (bad)
+  .byte  255,204                             // dec           %esp
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  189,255,255,255,176                 // mov           $0xb0ffffff,%ebp
+  .byte  191,255,255,255,178                 // mov           $0xb2ffffff,%edi
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,163,255,255,255,155             // jmpq          *-0x64000001(%rbx)
+  .byte  255,165,255,255,255,157             // jmpq          *-0x62000001(%rbp)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,147,255,255,255,139             // callq         *-0x74000001(%rbx)
+  .byte  255,149,255,255,255,141             // callq         *-0x72000001(%rbp)
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255                                 // .byte         0xff
@@ -18818,7 +18913,7 @@ _sk_store_f32_avx:
   .byte  196,65,37,20,196                    // vunpcklpd     %ymm12,%ymm11,%ymm8
   .byte  196,65,37,21,220                    // vunpckhpd     %ymm12,%ymm11,%ymm11
   .byte  72,133,201                          // test          %rcx,%rcx
-  .byte  117,55                              // jne           5135 <_sk_store_f32_avx+0x6d>
+  .byte  117,55                              // jne           5251 <_sk_store_f32_avx+0x6d>
   .byte  196,67,45,24,225,1                  // vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
   .byte  196,67,61,24,235,1                  // vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
   .byte  196,67,45,6,201,49                  // vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
@@ -18831,22 +18926,22 @@ _sk_store_f32_avx:
   .byte  255,224                             // jmpq          *%rax
   .byte  196,65,121,17,20,128                // vmovupd       %xmm10,(%r8,%rax,4)
   .byte  72,131,249,1                        // cmp           $0x1,%rcx
-  .byte  116,240                             // je            5131 <_sk_store_f32_avx+0x69>
+  .byte  116,240                             // je            524d <_sk_store_f32_avx+0x69>
   .byte  196,65,121,17,76,128,16             // vmovupd       %xmm9,0x10(%r8,%rax,4)
   .byte  72,131,249,3                        // cmp           $0x3,%rcx
-  .byte  114,227                             // jb            5131 <_sk_store_f32_avx+0x69>
+  .byte  114,227                             // jb            524d <_sk_store_f32_avx+0x69>
   .byte  196,65,121,17,68,128,32             // vmovupd       %xmm8,0x20(%r8,%rax,4)
-  .byte  116,218                             // je            5131 <_sk_store_f32_avx+0x69>
+  .byte  116,218                             // je            524d <_sk_store_f32_avx+0x69>
   .byte  196,65,121,17,92,128,48             // vmovupd       %xmm11,0x30(%r8,%rax,4)
   .byte  72,131,249,5                        // cmp           $0x5,%rcx
-  .byte  114,205                             // jb            5131 <_sk_store_f32_avx+0x69>
+  .byte  114,205                             // jb            524d <_sk_store_f32_avx+0x69>
   .byte  196,67,125,25,84,128,64,1           // vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
-  .byte  116,195                             // je            5131 <_sk_store_f32_avx+0x69>
+  .byte  116,195                             // je            524d <_sk_store_f32_avx+0x69>
   .byte  196,67,125,25,76,128,80,1           // vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
   .byte  72,131,249,7                        // cmp           $0x7,%rcx
-  .byte  114,181                             // jb            5131 <_sk_store_f32_avx+0x69>
+  .byte  114,181                             // jb            524d <_sk_store_f32_avx+0x69>
   .byte  196,67,125,25,68,128,96,1           // vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
-  .byte  235,171                             // jmp           5131 <_sk_store_f32_avx+0x69>
+  .byte  235,171                             // jmp           524d <_sk_store_f32_avx+0x69>
 
 HIDDEN _sk_clamp_x_avx
 .globl _sk_clamp_x_avx
@@ -18952,12 +19047,12 @@ HIDDEN _sk_luminance_to_alpha_avx
 .globl _sk_luminance_to_alpha_avx
 FUNCTION(_sk_luminance_to_alpha_avx)
 _sk_luminance_to_alpha_avx:
-  .byte  196,226,125,24,29,143,19,0,0        // vbroadcastss  0x138f(%rip),%ymm3        # 6644 <_sk_callback_avx+0x45d>
+  .byte  196,226,125,24,29,143,19,0,0        // vbroadcastss  0x138f(%rip),%ymm3        # 6760 <_sk_callback_avx+0x45d>
   .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
-  .byte  196,226,125,24,29,134,19,0,0        // vbroadcastss  0x1386(%rip),%ymm3        # 6648 <_sk_callback_avx+0x461>
+  .byte  196,226,125,24,29,134,19,0,0        // vbroadcastss  0x1386(%rip),%ymm3        # 6764 <_sk_callback_avx+0x461>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,121,19,0,0        // vbroadcastss  0x1379(%rip),%ymm1        # 664c <_sk_callback_avx+0x465>
+  .byte  196,226,125,24,13,121,19,0,0        // vbroadcastss  0x1379(%rip),%ymm1        # 6768 <_sk_callback_avx+0x465>
   .byte  197,236,89,201                      // vmulps        %ymm1,%ymm2,%ymm1
   .byte  197,252,88,217                      // vaddps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19176,9 +19271,9 @@ _sk_evenly_spaced_gradient_avx:
   .byte  72,139,24                           // mov           (%rax),%rbx
   .byte  72,139,104,8                        // mov           0x8(%rax),%rbp
   .byte  72,255,203                          // dec           %rbx
-  .byte  120,7                               // js            5629 <_sk_evenly_spaced_gradient_avx+0x1f>
+  .byte  120,7                               // js            5745 <_sk_evenly_spaced_gradient_avx+0x1f>
   .byte  196,225,242,42,203                  // vcvtsi2ss     %rbx,%xmm1,%xmm1
-  .byte  235,21                              // jmp           563e <_sk_evenly_spaced_gradient_avx+0x34>
+  .byte  235,21                              // jmp           575a <_sk_evenly_spaced_gradient_avx+0x34>
   .byte  73,137,216                          // mov           %rbx,%r8
   .byte  73,209,232                          // shr           %r8
   .byte  131,227,1                           // and           $0x1,%ebx
@@ -19345,12 +19440,12 @@ _sk_gradient_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  73,131,248,2                        // cmp           $0x2,%r8
-  .byte  114,80                              // jb            59cc <_sk_gradient_avx+0x69>
+  .byte  114,80                              // jb            5ae8 <_sk_gradient_avx+0x69>
   .byte  72,139,88,72                        // mov           0x48(%rax),%rbx
   .byte  73,255,200                          // dec           %r8
   .byte  72,131,195,4                        // add           $0x4,%rbx
   .byte  196,65,52,87,201                    // vxorps        %ymm9,%ymm9,%ymm9
-  .byte  196,98,125,24,21,187,12,0,0         // vbroadcastss  0xcbb(%rip),%ymm10        # 6650 <_sk_callback_avx+0x469>
+  .byte  196,98,125,24,21,187,12,0,0         // vbroadcastss  0xcbb(%rip),%ymm10        # 676c <_sk_callback_avx+0x469>
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  196,98,125,24,3                     // vbroadcastss  (%rbx),%ymm8
   .byte  197,60,194,192,2                    // vcmpleps      %ymm0,%ymm8,%ymm8
@@ -19362,7 +19457,7 @@ _sk_gradient_avx:
   .byte  196,227,117,24,202,1                // vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   .byte  72,131,195,4                        // add           $0x4,%rbx
   .byte  73,255,200                          // dec           %r8
-  .byte  117,205                             // jne           5999 <_sk_gradient_avx+0x36>
+  .byte  117,205                             // jne           5ab5 <_sk_gradient_avx+0x36>
   .byte  196,195,249,22,200,1                // vpextrq       $0x1,%xmm1,%r8
   .byte  69,137,193                          // mov           %r8d,%r9d
   .byte  73,193,232,32                       // shr           $0x20,%r8
@@ -19544,27 +19639,27 @@ _sk_xy_to_unit_angle_avx:
   .byte  196,65,52,95,226                    // vmaxps        %ymm10,%ymm9,%ymm12
   .byte  196,65,36,94,220                    // vdivps        %ymm12,%ymm11,%ymm11
   .byte  196,65,36,89,227                    // vmulps        %ymm11,%ymm11,%ymm12
-  .byte  196,98,125,24,45,223,8,0,0          // vbroadcastss  0x8df(%rip),%ymm13        # 6654 <_sk_callback_avx+0x46d>
+  .byte  196,98,125,24,45,223,8,0,0          // vbroadcastss  0x8df(%rip),%ymm13        # 6770 <_sk_callback_avx+0x46d>
   .byte  196,65,28,89,237                    // vmulps        %ymm13,%ymm12,%ymm13
-  .byte  196,98,125,24,53,213,8,0,0          // vbroadcastss  0x8d5(%rip),%ymm14        # 6658 <_sk_callback_avx+0x471>
+  .byte  196,98,125,24,53,213,8,0,0          // vbroadcastss  0x8d5(%rip),%ymm14        # 6774 <_sk_callback_avx+0x471>
   .byte  196,65,20,88,238                    // vaddps        %ymm14,%ymm13,%ymm13
   .byte  196,65,28,89,237                    // vmulps        %ymm13,%ymm12,%ymm13
-  .byte  196,98,125,24,53,198,8,0,0          // vbroadcastss  0x8c6(%rip),%ymm14        # 665c <_sk_callback_avx+0x475>
+  .byte  196,98,125,24,53,198,8,0,0          // vbroadcastss  0x8c6(%rip),%ymm14        # 6778 <_sk_callback_avx+0x475>
   .byte  196,65,20,88,238                    // vaddps        %ymm14,%ymm13,%ymm13
   .byte  196,65,28,89,229                    // vmulps        %ymm13,%ymm12,%ymm12
-  .byte  196,98,125,24,45,183,8,0,0          // vbroadcastss  0x8b7(%rip),%ymm13        # 6660 <_sk_callback_avx+0x479>
+  .byte  196,98,125,24,45,183,8,0,0          // vbroadcastss  0x8b7(%rip),%ymm13        # 677c <_sk_callback_avx+0x479>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
   .byte  196,65,52,194,202,1                 // vcmpltps      %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,162,8,0,0          // vbroadcastss  0x8a2(%rip),%ymm10        # 6664 <_sk_callback_avx+0x47d>
+  .byte  196,98,125,24,21,162,8,0,0          // vbroadcastss  0x8a2(%rip),%ymm10        # 6780 <_sk_callback_avx+0x47d>
   .byte  196,65,44,92,211                    // vsubps        %ymm11,%ymm10,%ymm10
   .byte  196,67,37,74,202,144                // vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   .byte  196,193,124,194,192,1               // vcmpltps      %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,21,140,8,0,0          // vbroadcastss  0x88c(%rip),%ymm10        # 6668 <_sk_callback_avx+0x481>
+  .byte  196,98,125,24,21,140,8,0,0          // vbroadcastss  0x88c(%rip),%ymm10        # 6784 <_sk_callback_avx+0x481>
   .byte  196,65,44,92,209                    // vsubps        %ymm9,%ymm10,%ymm10
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  196,65,116,194,200,1                // vcmpltps      %ymm8,%ymm1,%ymm9
-  .byte  196,98,125,24,21,118,8,0,0          // vbroadcastss  0x876(%rip),%ymm10        # 666c <_sk_callback_avx+0x485>
+  .byte  196,98,125,24,21,118,8,0,0          // vbroadcastss  0x876(%rip),%ymm10        # 6788 <_sk_callback_avx+0x485>
   .byte  197,44,92,208                       // vsubps        %ymm0,%ymm10,%ymm10
   .byte  196,195,125,74,194,144              // vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   .byte  196,65,124,194,200,3                // vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -19588,7 +19683,7 @@ HIDDEN _sk_save_xy_avx
 FUNCTION(_sk_save_xy_avx)
 _sk_save_xy_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,64,8,0,0            // vbroadcastss  0x840(%rip),%ymm8        # 6670 <_sk_callback_avx+0x489>
+  .byte  196,98,125,24,5,64,8,0,0            // vbroadcastss  0x840(%rip),%ymm8        # 678c <_sk_callback_avx+0x489>
   .byte  196,65,124,88,200                   // vaddps        %ymm8,%ymm0,%ymm9
   .byte  196,67,125,8,209,1                  // vroundps      $0x1,%ymm9,%ymm10
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
@@ -19625,9 +19720,9 @@ HIDDEN _sk_bilinear_nx_avx
 FUNCTION(_sk_bilinear_nx_avx)
 _sk_bilinear_nx_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,204,7,0,0          // vbroadcastss  0x7cc(%rip),%ymm0        # 6674 <_sk_callback_avx+0x48d>
+  .byte  196,226,125,24,5,204,7,0,0          // vbroadcastss  0x7cc(%rip),%ymm0        # 6790 <_sk_callback_avx+0x48d>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,195,7,0,0           // vbroadcastss  0x7c3(%rip),%ymm8        # 6678 <_sk_callback_avx+0x491>
+  .byte  196,98,125,24,5,195,7,0,0           // vbroadcastss  0x7c3(%rip),%ymm8        # 6794 <_sk_callback_avx+0x491>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19638,7 +19733,7 @@ HIDDEN _sk_bilinear_px_avx
 FUNCTION(_sk_bilinear_px_avx)
 _sk_bilinear_px_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,171,7,0,0          // vbroadcastss  0x7ab(%rip),%ymm0        # 667c <_sk_callback_avx+0x495>
+  .byte  196,226,125,24,5,171,7,0,0          // vbroadcastss  0x7ab(%rip),%ymm0        # 6798 <_sk_callback_avx+0x495>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -19650,9 +19745,9 @@ HIDDEN _sk_bilinear_ny_avx
 FUNCTION(_sk_bilinear_ny_avx)
 _sk_bilinear_ny_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,143,7,0,0         // vbroadcastss  0x78f(%rip),%ymm1        # 6680 <_sk_callback_avx+0x499>
+  .byte  196,226,125,24,13,143,7,0,0         // vbroadcastss  0x78f(%rip),%ymm1        # 679c <_sk_callback_avx+0x499>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,133,7,0,0           // vbroadcastss  0x785(%rip),%ymm8        # 6684 <_sk_callback_avx+0x49d>
+  .byte  196,98,125,24,5,133,7,0,0           // vbroadcastss  0x785(%rip),%ymm8        # 67a0 <_sk_callback_avx+0x49d>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19663,7 +19758,7 @@ HIDDEN _sk_bilinear_py_avx
 FUNCTION(_sk_bilinear_py_avx)
 _sk_bilinear_py_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,109,7,0,0         // vbroadcastss  0x76d(%rip),%ymm1        # 6688 <_sk_callback_avx+0x4a1>
+  .byte  196,226,125,24,13,109,7,0,0         // vbroadcastss  0x76d(%rip),%ymm1        # 67a4 <_sk_callback_avx+0x4a1>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -19675,14 +19770,14 @@ HIDDEN _sk_bicubic_n3x_avx
 FUNCTION(_sk_bicubic_n3x_avx)
 _sk_bicubic_n3x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,80,7,0,0           // vbroadcastss  0x750(%rip),%ymm0        # 668c <_sk_callback_avx+0x4a5>
+  .byte  196,226,125,24,5,80,7,0,0           // vbroadcastss  0x750(%rip),%ymm0        # 67a8 <_sk_callback_avx+0x4a5>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,71,7,0,0            // vbroadcastss  0x747(%rip),%ymm8        # 6690 <_sk_callback_avx+0x4a9>
+  .byte  196,98,125,24,5,71,7,0,0            // vbroadcastss  0x747(%rip),%ymm8        # 67ac <_sk_callback_avx+0x4a9>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,56,7,0,0           // vbroadcastss  0x738(%rip),%ymm10        # 6694 <_sk_callback_avx+0x4ad>
+  .byte  196,98,125,24,21,56,7,0,0           // vbroadcastss  0x738(%rip),%ymm10        # 67b0 <_sk_callback_avx+0x4ad>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,46,7,0,0           // vbroadcastss  0x72e(%rip),%ymm10        # 6698 <_sk_callback_avx+0x4b1>
+  .byte  196,98,125,24,21,46,7,0,0           // vbroadcastss  0x72e(%rip),%ymm10        # 67b4 <_sk_callback_avx+0x4b1>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -19694,19 +19789,19 @@ HIDDEN _sk_bicubic_n1x_avx
 FUNCTION(_sk_bicubic_n1x_avx)
 _sk_bicubic_n1x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,17,7,0,0           // vbroadcastss  0x711(%rip),%ymm0        # 669c <_sk_callback_avx+0x4b5>
+  .byte  196,226,125,24,5,17,7,0,0           // vbroadcastss  0x711(%rip),%ymm0        # 67b8 <_sk_callback_avx+0x4b5>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,8,7,0,0             // vbroadcastss  0x708(%rip),%ymm8        # 66a0 <_sk_callback_avx+0x4b9>
+  .byte  196,98,125,24,5,8,7,0,0             // vbroadcastss  0x708(%rip),%ymm8        # 67bc <_sk_callback_avx+0x4b9>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,254,6,0,0          // vbroadcastss  0x6fe(%rip),%ymm9        # 66a4 <_sk_callback_avx+0x4bd>
+  .byte  196,98,125,24,13,254,6,0,0          // vbroadcastss  0x6fe(%rip),%ymm9        # 67c0 <_sk_callback_avx+0x4bd>
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,244,6,0,0          // vbroadcastss  0x6f4(%rip),%ymm10        # 66a8 <_sk_callback_avx+0x4c1>
+  .byte  196,98,125,24,21,244,6,0,0          // vbroadcastss  0x6f4(%rip),%ymm10        # 67c4 <_sk_callback_avx+0x4c1>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,229,6,0,0          // vbroadcastss  0x6e5(%rip),%ymm10        # 66ac <_sk_callback_avx+0x4c5>
+  .byte  196,98,125,24,21,229,6,0,0          // vbroadcastss  0x6e5(%rip),%ymm10        # 67c8 <_sk_callback_avx+0x4c5>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,214,6,0,0          // vbroadcastss  0x6d6(%rip),%ymm9        # 66b0 <_sk_callback_avx+0x4c9>
+  .byte  196,98,125,24,13,214,6,0,0          // vbroadcastss  0x6d6(%rip),%ymm9        # 67cc <_sk_callback_avx+0x4c9>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19717,17 +19812,17 @@ HIDDEN _sk_bicubic_p1x_avx
 FUNCTION(_sk_bicubic_p1x_avx)
 _sk_bicubic_p1x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,190,6,0,0           // vbroadcastss  0x6be(%rip),%ymm8        # 66b4 <_sk_callback_avx+0x4cd>
+  .byte  196,98,125,24,5,190,6,0,0           // vbroadcastss  0x6be(%rip),%ymm8        # 67d0 <_sk_callback_avx+0x4cd>
   .byte  197,188,88,0                        // vaddps        (%rax),%ymm8,%ymm0
   .byte  197,124,16,72,64                    // vmovups       0x40(%rax),%ymm9
-  .byte  196,98,125,24,21,176,6,0,0          // vbroadcastss  0x6b0(%rip),%ymm10        # 66b8 <_sk_callback_avx+0x4d1>
+  .byte  196,98,125,24,21,176,6,0,0          // vbroadcastss  0x6b0(%rip),%ymm10        # 67d4 <_sk_callback_avx+0x4d1>
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
-  .byte  196,98,125,24,29,166,6,0,0          // vbroadcastss  0x6a6(%rip),%ymm11        # 66bc <_sk_callback_avx+0x4d5>
+  .byte  196,98,125,24,29,166,6,0,0          // vbroadcastss  0x6a6(%rip),%ymm11        # 67d8 <_sk_callback_avx+0x4d5>
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
   .byte  196,65,44,88,192                    // vaddps        %ymm8,%ymm10,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
-  .byte  196,98,125,24,13,141,6,0,0          // vbroadcastss  0x68d(%rip),%ymm9        # 66c0 <_sk_callback_avx+0x4d9>
+  .byte  196,98,125,24,13,141,6,0,0          // vbroadcastss  0x68d(%rip),%ymm9        # 67dc <_sk_callback_avx+0x4d9>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19738,13 +19833,13 @@ HIDDEN _sk_bicubic_p3x_avx
 FUNCTION(_sk_bicubic_p3x_avx)
 _sk_bicubic_p3x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,117,6,0,0          // vbroadcastss  0x675(%rip),%ymm0        # 66c4 <_sk_callback_avx+0x4dd>
+  .byte  196,226,125,24,5,117,6,0,0          // vbroadcastss  0x675(%rip),%ymm0        # 67e0 <_sk_callback_avx+0x4dd>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,98,6,0,0           // vbroadcastss  0x662(%rip),%ymm10        # 66c8 <_sk_callback_avx+0x4e1>
+  .byte  196,98,125,24,21,98,6,0,0           // vbroadcastss  0x662(%rip),%ymm10        # 67e4 <_sk_callback_avx+0x4e1>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,88,6,0,0           // vbroadcastss  0x658(%rip),%ymm10        # 66cc <_sk_callback_avx+0x4e5>
+  .byte  196,98,125,24,21,88,6,0,0           // vbroadcastss  0x658(%rip),%ymm10        # 67e8 <_sk_callback_avx+0x4e5>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -19756,14 +19851,14 @@ HIDDEN _sk_bicubic_n3y_avx
 FUNCTION(_sk_bicubic_n3y_avx)
 _sk_bicubic_n3y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,59,6,0,0          // vbroadcastss  0x63b(%rip),%ymm1        # 66d0 <_sk_callback_avx+0x4e9>
+  .byte  196,226,125,24,13,59,6,0,0          // vbroadcastss  0x63b(%rip),%ymm1        # 67ec <_sk_callback_avx+0x4e9>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,49,6,0,0            // vbroadcastss  0x631(%rip),%ymm8        # 66d4 <_sk_callback_avx+0x4ed>
+  .byte  196,98,125,24,5,49,6,0,0            // vbroadcastss  0x631(%rip),%ymm8        # 67f0 <_sk_callback_avx+0x4ed>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,34,6,0,0           // vbroadcastss  0x622(%rip),%ymm10        # 66d8 <_sk_callback_avx+0x4f1>
+  .byte  196,98,125,24,21,34,6,0,0           // vbroadcastss  0x622(%rip),%ymm10        # 67f4 <_sk_callback_avx+0x4f1>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,24,6,0,0           // vbroadcastss  0x618(%rip),%ymm10        # 66dc <_sk_callback_avx+0x4f5>
+  .byte  196,98,125,24,21,24,6,0,0           // vbroadcastss  0x618(%rip),%ymm10        # 67f8 <_sk_callback_avx+0x4f5>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -19775,19 +19870,19 @@ HIDDEN _sk_bicubic_n1y_avx
 FUNCTION(_sk_bicubic_n1y_avx)
 _sk_bicubic_n1y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,251,5,0,0         // vbroadcastss  0x5fb(%rip),%ymm1        # 66e0 <_sk_callback_avx+0x4f9>
+  .byte  196,226,125,24,13,251,5,0,0         // vbroadcastss  0x5fb(%rip),%ymm1        # 67fc <_sk_callback_avx+0x4f9>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,241,5,0,0           // vbroadcastss  0x5f1(%rip),%ymm8        # 66e4 <_sk_callback_avx+0x4fd>
+  .byte  196,98,125,24,5,241,5,0,0           // vbroadcastss  0x5f1(%rip),%ymm8        # 6800 <_sk_callback_avx+0x4fd>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,231,5,0,0          // vbroadcastss  0x5e7(%rip),%ymm9        # 66e8 <_sk_callback_avx+0x501>
+  .byte  196,98,125,24,13,231,5,0,0          // vbroadcastss  0x5e7(%rip),%ymm9        # 6804 <_sk_callback_avx+0x501>
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,221,5,0,0          // vbroadcastss  0x5dd(%rip),%ymm10        # 66ec <_sk_callback_avx+0x505>
+  .byte  196,98,125,24,21,221,5,0,0          // vbroadcastss  0x5dd(%rip),%ymm10        # 6808 <_sk_callback_avx+0x505>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,206,5,0,0          // vbroadcastss  0x5ce(%rip),%ymm10        # 66f0 <_sk_callback_avx+0x509>
+  .byte  196,98,125,24,21,206,5,0,0          // vbroadcastss  0x5ce(%rip),%ymm10        # 680c <_sk_callback_avx+0x509>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,191,5,0,0          // vbroadcastss  0x5bf(%rip),%ymm9        # 66f4 <_sk_callback_avx+0x50d>
+  .byte  196,98,125,24,13,191,5,0,0          // vbroadcastss  0x5bf(%rip),%ymm9        # 6810 <_sk_callback_avx+0x50d>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19798,17 +19893,17 @@ HIDDEN _sk_bicubic_p1y_avx
 FUNCTION(_sk_bicubic_p1y_avx)
 _sk_bicubic_p1y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,167,5,0,0           // vbroadcastss  0x5a7(%rip),%ymm8        # 66f8 <_sk_callback_avx+0x511>
+  .byte  196,98,125,24,5,167,5,0,0           // vbroadcastss  0x5a7(%rip),%ymm8        # 6814 <_sk_callback_avx+0x511>
   .byte  197,188,88,72,32                    // vaddps        0x20(%rax),%ymm8,%ymm1
   .byte  197,124,16,72,96                    // vmovups       0x60(%rax),%ymm9
-  .byte  196,98,125,24,21,152,5,0,0          // vbroadcastss  0x598(%rip),%ymm10        # 66fc <_sk_callback_avx+0x515>
+  .byte  196,98,125,24,21,152,5,0,0          // vbroadcastss  0x598(%rip),%ymm10        # 6818 <_sk_callback_avx+0x515>
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
-  .byte  196,98,125,24,29,142,5,0,0          // vbroadcastss  0x58e(%rip),%ymm11        # 6700 <_sk_callback_avx+0x519>
+  .byte  196,98,125,24,29,142,5,0,0          // vbroadcastss  0x58e(%rip),%ymm11        # 681c <_sk_callback_avx+0x519>
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
   .byte  196,65,44,88,192                    // vaddps        %ymm8,%ymm10,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
-  .byte  196,98,125,24,13,117,5,0,0          // vbroadcastss  0x575(%rip),%ymm9        # 6704 <_sk_callback_avx+0x51d>
+  .byte  196,98,125,24,13,117,5,0,0          // vbroadcastss  0x575(%rip),%ymm9        # 6820 <_sk_callback_avx+0x51d>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19819,13 +19914,13 @@ HIDDEN _sk_bicubic_p3y_avx
 FUNCTION(_sk_bicubic_p3y_avx)
 _sk_bicubic_p3y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,93,5,0,0          // vbroadcastss  0x55d(%rip),%ymm1        # 6708 <_sk_callback_avx+0x521>
+  .byte  196,226,125,24,13,93,5,0,0          // vbroadcastss  0x55d(%rip),%ymm1        # 6824 <_sk_callback_avx+0x521>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,73,5,0,0           // vbroadcastss  0x549(%rip),%ymm10        # 670c <_sk_callback_avx+0x525>
+  .byte  196,98,125,24,21,73,5,0,0           // vbroadcastss  0x549(%rip),%ymm10        # 6828 <_sk_callback_avx+0x525>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,63,5,0,0           // vbroadcastss  0x53f(%rip),%ymm10        # 6710 <_sk_callback_avx+0x529>
+  .byte  196,98,125,24,21,63,5,0,0           // vbroadcastss  0x53f(%rip),%ymm10        # 682c <_sk_callback_avx+0x529>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -19949,25 +20044,25 @@ BALIGN4
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 63bd <.literal4+0xb1>
+  .byte  71,225,61                           // rex.RXB       loope 64d9 <.literal4+0xb1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 63cd <.literal4+0xc1>
+  .byte  71,225,61                           // rex.RXB       loope 64e9 <.literal4+0xc1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 63dd <.literal4+0xd1>
+  .byte  71,225,61                           // rex.RXB       loope 64f9 <.literal4+0xd1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 63ed <.literal4+0xe1>
+  .byte  71,225,61                           // rex.RXB       loope 6509 <.literal4+0xe1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -20015,7 +20110,7 @@ BALIGN4
   .byte  190,129,128,128,59                  // mov           $0x3b808081,%esi
   .byte  129,128,128,59,0,248,0,0,8,33       // addl          $0x21080000,-0x7ffc480(%rax)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        6439 <.literal4+0x12d>
+  .byte  224,7                               // loopne        6555 <.literal4+0x12d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -20031,10 +20126,10 @@ BALIGN4
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
   .byte  0,52,255                            // add           %dh,(%rdi,%rdi,8)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            6460 <.literal4+0x154>
+  .byte  127,0                               // jg            657c <.literal4+0x154>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            64d9 <.literal4+0x1cd>
+  .byte  119,115                             // ja            65f5 <.literal4+0x1cd>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -20048,10 +20143,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            6494 <.literal4+0x188>
+  .byte  127,0                               // jg            65b0 <.literal4+0x188>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            650d <.literal4+0x201>
+  .byte  119,115                             // ja            6629 <.literal4+0x201>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -20065,10 +20160,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            64c8 <.literal4+0x1bc>
+  .byte  127,0                               // jg            65e4 <.literal4+0x1bc>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            6541 <.literal4+0x235>
+  .byte  119,115                             // ja            665d <.literal4+0x235>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -20082,10 +20177,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            64fc <.literal4+0x1f0>
+  .byte  127,0                               // jg            6618 <.literal4+0x1f0>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            6575 <.literal4+0x269>
+  .byte  119,115                             // ja            6691 <.literal4+0x269>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -20098,7 +20193,7 @@ BALIGN4
   .byte  0,75,0                              // add           %cl,0x0(%rbx)
   .byte  0,128,63,0,0,200                    // add           %al,-0x37ffffc1(%rax)
   .byte  66,0,0                              // rex.X         add %al,(%rax)
-  .byte  127,67                              // jg            6573 <.literal4+0x267>
+  .byte  127,67                              // jg            668f <.literal4+0x267>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,195                               // add           %al,%bl
   .byte  0,0                                 // add           %al,(%rax)
@@ -20110,10 +20205,10 @@ BALIGN4
   .byte  190,80,128,3,62                     // mov           $0x3e038050,%esi
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           6593 <.literal4+0x287>
+  .byte  118,63                              // jbe           66af <.literal4+0x287>
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            65a7 <.literal4+0x29b>
+  .byte  127,67                              // jg            66c3 <.literal4+0x29b>
   .byte  129,128,128,59,0,0,128,63,129,128   // addl          $0x80813f80,0x3b80(%rax)
   .byte  128,59,0                            // cmpb          $0x0,(%rbx)
   .byte  0,128,63,129,128,128                // add           %al,-0x7f7f7ec1(%rax)
@@ -20122,7 +20217,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        6589 <.literal4+0x27d>
+  .byte  224,7                               // loopne        66a5 <.literal4+0x27d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -20134,7 +20229,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        65a5 <.literal4+0x299>
+  .byte  224,7                               // loopne        66c1 <.literal4+0x299>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -20145,7 +20240,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  248                                 // clc
   .byte  65,0,0                              // add           %al,(%r8)
-  .byte  124,66                              // jl            65fa <.literal4+0x2ee>
+  .byte  124,66                              // jl            6716 <.literal4+0x2ee>
   .byte  0,240                               // add           %dh,%al
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,55,0,15                 // mov           %ecx,0xf003788(%rax)
@@ -20163,9 +20258,9 @@ BALIGN4
   .byte  137,136,136,59,15,0                 // mov           %ecx,0xf3b88(%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,61,0,0                  // mov           %ecx,0x3d88(%rax)
-  .byte  112,65                              // jo            663d <.literal4+0x331>
+  .byte  112,65                              // jo            6759 <.literal4+0x331>
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            664b <.literal4+0x33f>
+  .byte  127,67                              // jg            6767 <.literal4+0x33f>
   .byte  0,128,0,0,0,0                       // add           %al,0x0(%rax)
   .byte  0,128,0,4,0,128                     // add           %al,-0x7ffffc00(%rax)
   .byte  0,0                                 // add           %al,(%rax)
@@ -20181,7 +20276,7 @@ BALIGN4
   .byte  0,128,55,0,0,128                    // add           %al,-0x7fffffc9(%rax)
   .byte  63                                  // (bad)
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            668b <.literal4+0x37f>
+  .byte  127,71                              // jg            67a7 <.literal4+0x37f>
   .byte  208                                 // (bad)
   .byte  179,89                              // mov           $0x59,%bl
   .byte  62,89                               // ds            pop %rcx
@@ -20268,72 +20363,6 @@ BALIGN4
   .byte  170                                 // stos          %al,%es:(%rdi)
   .byte  190                                 // .byte         0xbe
 
-BALIGN16
-  .byte  0,2                                 // add           %al,(%rdx)
-  .byte  4,6                                 // add           $0x6,%al
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  8,10                                // or            %cl,(%rdx)
-  .byte  12,14                               // or            $0xe,%al
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  255,0                               // incl          (%rax)
-  .byte  0,2                                 // add           %al,(%rdx)
-  .byte  4,6                                 // add           $0x6,%al
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  8,10                                // or            %cl,(%rdx)
-  .byte  12,14                               // or            $0xe,%al
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,2                                 // add           %al,(%rdx)
-  .byte  4,6                                 // add           $0x6,%al
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  8,10                                // or            %cl,(%rdx)
-  .byte  12,14                               // or            $0xe,%al
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-  .byte  0,0                                 // add           %al,(%rax)
-
 BALIGN32
   .byte  255,0                               // incl          (%rax)
   .byte  0,0                                 // add           %al,(%rax)
@@ -20399,6 +20428,24 @@ BALIGN32
   .byte  0,0                                 // add           %al,(%rax)
   .byte  255,0                               // incl          (%rax)
   .byte  0,0                                 // add           %al,(%rax)
+
+BALIGN16
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
+  .byte  255,0                               // incl          (%rax)
 BALIGN32
 
 HIDDEN _sk_start_pipeline_sse41
index 0177a13..268cd26 100644 (file)
@@ -1647,8 +1647,8 @@ _sk_load_tables_hsw LABEL PROC
   DB  76,3,8                              ; add           (%rax),%r9
   DB  77,133,192                          ; test          %r8,%r8
   DB  117,105                             ; jne           1b0a <_sk_load_tables_hsw+0x7e>
-  DB  196,193,124,16,25                   ; vmovups       (%r9),%ymm3
-  DB  197,228,84,13,18,50,0,0             ; vandps        0x3212(%rip),%ymm3,%ymm1        # 4cc0 <_sk_callback_hsw+0x513>
+  DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
+  DB  197,229,219,13,18,50,0,0            ; vpand         0x3212(%rip),%ymm3,%ymm1        # 4cc0 <_sk_callback_hsw+0x513>
   DB  196,65,61,118,192                   ; vpcmpeqd      %ymm8,%ymm8,%ymm8
   DB  72,139,72,8                         ; mov           0x8(%rax),%rcx
   DB  76,139,72,16                        ; mov           0x10(%rax),%r9
@@ -1674,7 +1674,7 @@ _sk_load_tables_hsw LABEL PROC
   DB  73,211,234                          ; shr           %cl,%r10
   DB  196,193,249,110,194                 ; vmovq         %r10,%xmm0
   DB  196,226,125,33,192                  ; vpmovsxbd     %xmm0,%ymm0
-  DB  196,194,125,44,25                   ; vmaskmovps    (%r9),%ymm0,%ymm3
+  DB  196,194,125,140,25                  ; vpmaskmovd    (%r9),%ymm0,%ymm3
   DB  233,115,255,255,255                 ; jmpq          1aa6 <_sk_load_tables_hsw+0x1a>
 
 PUBLIC _sk_load_tables_u16_be_hsw
@@ -3147,8 +3147,8 @@ _sk_load_8888_hsw LABEL PROC
   DB  76,3,8                              ; add           (%rax),%r9
   DB  77,133,192                          ; test          %r8,%r8
   DB  117,88                              ; jne           342d <_sk_load_8888_hsw+0x6d>
-  DB  196,193,124,16,25                   ; vmovups       (%r9),%ymm3
-  DB  197,228,84,5,158,25,0,0             ; vandps        0x199e(%rip),%ymm3,%ymm0        # 4d80 <_sk_callback_hsw+0x5d3>
+  DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
+  DB  197,229,219,5,158,25,0,0            ; vpand         0x199e(%rip),%ymm3,%ymm0        # 4d80 <_sk_callback_hsw+0x5d3>
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
   DB  196,98,125,24,5,205,23,0,0          ; vbroadcastss  0x17cd(%rip),%ymm8        # 4bbc <_sk_callback_hsw+0x40f>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
@@ -3171,7 +3171,7 @@ _sk_load_8888_hsw LABEL PROC
   DB  72,211,232                          ; shr           %cl,%rax
   DB  196,225,249,110,192                 ; vmovq         %rax,%xmm0
   DB  196,226,125,33,192                  ; vpmovsxbd     %xmm0,%ymm0
-  DB  196,194,125,44,25                   ; vmaskmovps    (%r9),%ymm0,%ymm3
+  DB  196,194,125,140,25                  ; vpmaskmovd    (%r9),%ymm0,%ymm3
   DB  235,135                             ; jmp           33da <_sk_load_8888_hsw+0x1a>
 
 PUBLIC _sk_gather_8888_hsw
@@ -3224,7 +3224,7 @@ _sk_store_8888_hsw LABEL PROC
   DB  196,65,53,235,192                   ; vpor          %ymm8,%ymm9,%ymm8
   DB  77,133,192                          ; test          %r8,%r8
   DB  117,12                              ; jne           353c <_sk_store_8888_hsw+0x73>
-  DB  196,65,124,17,1                     ; vmovups       %ymm8,(%r9)
+  DB  196,65,126,127,1                    ; vmovdqu       %ymm8,(%r9)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,137,193                          ; mov           %r8,%rcx
   DB  255,224                             ; jmpq          *%rax
@@ -3235,7 +3235,7 @@ _sk_store_8888_hsw LABEL PROC
   DB  72,211,232                          ; shr           %cl,%rax
   DB  196,97,249,110,200                  ; vmovq         %rax,%xmm9
   DB  196,66,125,33,201                   ; vpmovsxbd     %xmm9,%ymm9
-  DB  196,66,53,46,1                      ; vmaskmovps    %ymm8,%ymm9,(%r9)
+  DB  196,66,53,142,1                     ; vpmaskmovd    %ymm8,%ymm9,(%r9)
   DB  235,211                             ; jmp           3535 <_sk_store_8888_hsw+0x6c>
 
 PUBLIC _sk_load_f16_hsw
@@ -5076,14 +5076,14 @@ _sk_seed_shader_avx LABEL PROC
   DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
   DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,132,98,0,0        ; vbroadcastss  0x6284(%rip),%ymm1        # 63e4 <_sk_callback_avx+0x119>
+  DB  196,226,125,24,13,152,99,0,0        ; vbroadcastss  0x6398(%rip),%ymm1        # 64f8 <_sk_callback_avx+0x119>
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
   DB  197,252,88,2                        ; vaddps        (%rdx),%ymm0,%ymm0
   DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,21,104,98,0,0        ; vbroadcastss  0x6268(%rip),%ymm2        # 63e8 <_sk_callback_avx+0x11d>
+  DB  196,226,125,24,21,124,99,0,0        ; vbroadcastss  0x637c(%rip),%ymm2        # 64fc <_sk_callback_avx+0x11d>
   DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
   DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
   DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
@@ -5104,7 +5104,7 @@ _sk_dither_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  196,66,125,24,8                     ; vbroadcastss  (%r8),%ymm9
   DB  196,65,60,87,209                    ; vxorps        %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,29,25,98,0,0          ; vbroadcastss  0x6219(%rip),%ymm11        # 63ec <_sk_callback_avx+0x121>
+  DB  196,98,125,24,29,45,99,0,0          ; vbroadcastss  0x632d(%rip),%ymm11        # 6500 <_sk_callback_avx+0x121>
   DB  196,65,44,84,203                    ; vandps        %ymm11,%ymm10,%ymm9
   DB  196,193,25,114,241,5                ; vpslld        $0x5,%xmm9,%xmm12
   DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
@@ -5115,8 +5115,8 @@ _sk_dither_avx LABEL PROC
   DB  196,67,125,25,219,1                 ; vextractf128  $0x1,%ymm11,%xmm11
   DB  196,193,33,114,243,4                ; vpslld        $0x4,%xmm11,%xmm11
   DB  196,67,29,24,219,1                  ; vinsertf128   $0x1,%xmm11,%ymm12,%ymm11
-  DB  196,98,125,24,37,218,97,0,0         ; vbroadcastss  0x61da(%rip),%ymm12        # 63f0 <_sk_callback_avx+0x125>
-  DB  196,98,125,24,45,213,97,0,0         ; vbroadcastss  0x61d5(%rip),%ymm13        # 63f4 <_sk_callback_avx+0x129>
+  DB  196,98,125,24,37,238,98,0,0         ; vbroadcastss  0x62ee(%rip),%ymm12        # 6504 <_sk_callback_avx+0x125>
+  DB  196,98,125,24,45,233,98,0,0         ; vbroadcastss  0x62e9(%rip),%ymm13        # 6508 <_sk_callback_avx+0x129>
   DB  196,65,44,84,245                    ; vandps        %ymm13,%ymm10,%ymm14
   DB  196,193,1,114,246,2                 ; vpslld        $0x2,%xmm14,%xmm15
   DB  196,67,125,25,246,1                 ; vextractf128  $0x1,%ymm14,%xmm14
@@ -5143,9 +5143,9 @@ _sk_dither_avx LABEL PROC
   DB  196,65,12,86,202                    ; vorps         %ymm10,%ymm14,%ymm9
   DB  196,65,60,86,193                    ; vorps         %ymm9,%ymm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,64,97,0,0          ; vbroadcastss  0x6140(%rip),%ymm9        # 63f8 <_sk_callback_avx+0x12d>
+  DB  196,98,125,24,13,84,98,0,0          ; vbroadcastss  0x6254(%rip),%ymm9        # 650c <_sk_callback_avx+0x12d>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,54,97,0,0          ; vbroadcastss  0x6136(%rip),%ymm9        # 63fc <_sk_callback_avx+0x131>
+  DB  196,98,125,24,13,74,98,0,0          ; vbroadcastss  0x624a(%rip),%ymm9        # 6510 <_sk_callback_avx+0x131>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  196,98,125,24,72,8                  ; vbroadcastss  0x8(%rax),%ymm9
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
@@ -5204,7 +5204,7 @@ _sk_clear_avx LABEL PROC
 PUBLIC _sk_srcatop_avx
 _sk_srcatop_avx LABEL PROC
   DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
-  DB  196,98,125,24,5,141,96,0,0          ; vbroadcastss  0x608d(%rip),%ymm8        # 6400 <_sk_callback_avx+0x135>
+  DB  196,98,125,24,5,161,97,0,0          ; vbroadcastss  0x61a1(%rip),%ymm8        # 6514 <_sk_callback_avx+0x135>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -5223,7 +5223,7 @@ _sk_srcatop_avx LABEL PROC
 PUBLIC _sk_dstatop_avx
 _sk_dstatop_avx LABEL PROC
   DB  197,100,89,196                      ; vmulps        %ymm4,%ymm3,%ymm8
-  DB  196,98,125,24,13,79,96,0,0          ; vbroadcastss  0x604f(%rip),%ymm9        # 6404 <_sk_callback_avx+0x139>
+  DB  196,98,125,24,13,99,97,0,0          ; vbroadcastss  0x6163(%rip),%ymm9        # 6518 <_sk_callback_avx+0x139>
   DB  197,52,92,207                       ; vsubps        %ymm7,%ymm9,%ymm9
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
   DB  197,188,88,192                      ; vaddps        %ymm0,%ymm8,%ymm0
@@ -5259,7 +5259,7 @@ _sk_dstin_avx LABEL PROC
 
 PUBLIC _sk_srcout_avx
 _sk_srcout_avx LABEL PROC
-  DB  196,98,125,24,5,238,95,0,0          ; vbroadcastss  0x5fee(%rip),%ymm8        # 6408 <_sk_callback_avx+0x13d>
+  DB  196,98,125,24,5,2,97,0,0            ; vbroadcastss  0x6102(%rip),%ymm8        # 651c <_sk_callback_avx+0x13d>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -5270,7 +5270,7 @@ _sk_srcout_avx LABEL PROC
 
 PUBLIC _sk_dstout_avx
 _sk_dstout_avx LABEL PROC
-  DB  196,226,125,24,5,209,95,0,0         ; vbroadcastss  0x5fd1(%rip),%ymm0        # 640c <_sk_callback_avx+0x141>
+  DB  196,226,125,24,5,229,96,0,0         ; vbroadcastss  0x60e5(%rip),%ymm0        # 6520 <_sk_callback_avx+0x141>
   DB  197,252,92,219                      ; vsubps        %ymm3,%ymm0,%ymm3
   DB  197,228,89,196                      ; vmulps        %ymm4,%ymm3,%ymm0
   DB  197,228,89,205                      ; vmulps        %ymm5,%ymm3,%ymm1
@@ -5281,7 +5281,7 @@ _sk_dstout_avx LABEL PROC
 
 PUBLIC _sk_srcover_avx
 _sk_srcover_avx LABEL PROC
-  DB  196,98,125,24,5,180,95,0,0          ; vbroadcastss  0x5fb4(%rip),%ymm8        # 6410 <_sk_callback_avx+0x145>
+  DB  196,98,125,24,5,200,96,0,0          ; vbroadcastss  0x60c8(%rip),%ymm8        # 6524 <_sk_callback_avx+0x145>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -5296,7 +5296,7 @@ _sk_srcover_avx LABEL PROC
 
 PUBLIC _sk_dstover_avx
 _sk_dstover_avx LABEL PROC
-  DB  196,98,125,24,5,135,95,0,0          ; vbroadcastss  0x5f87(%rip),%ymm8        # 6414 <_sk_callback_avx+0x149>
+  DB  196,98,125,24,5,155,96,0,0          ; vbroadcastss  0x609b(%rip),%ymm8        # 6528 <_sk_callback_avx+0x149>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
@@ -5320,7 +5320,7 @@ _sk_modulate_avx LABEL PROC
 
 PUBLIC _sk_multiply_avx
 _sk_multiply_avx LABEL PROC
-  DB  196,98,125,24,5,70,95,0,0           ; vbroadcastss  0x5f46(%rip),%ymm8        # 6418 <_sk_callback_avx+0x14d>
+  DB  196,98,125,24,5,90,96,0,0           ; vbroadcastss  0x605a(%rip),%ymm8        # 652c <_sk_callback_avx+0x14d>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,208                       ; vmulps        %ymm0,%ymm9,%ymm10
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5374,7 +5374,7 @@ _sk_screen_avx LABEL PROC
 
 PUBLIC _sk_xor__avx
 _sk_xor__avx LABEL PROC
-  DB  196,98,125,24,5,149,94,0,0          ; vbroadcastss  0x5e95(%rip),%ymm8        # 641c <_sk_callback_avx+0x151>
+  DB  196,98,125,24,5,169,95,0,0          ; vbroadcastss  0x5fa9(%rip),%ymm8        # 6530 <_sk_callback_avx+0x151>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5409,7 +5409,7 @@ _sk_darken_avx LABEL PROC
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,95,209                  ; vmaxps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,21,94,0,0           ; vbroadcastss  0x5e15(%rip),%ymm8        # 6420 <_sk_callback_avx+0x155>
+  DB  196,98,125,24,5,41,95,0,0           ; vbroadcastss  0x5f29(%rip),%ymm8        # 6534 <_sk_callback_avx+0x155>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5433,7 +5433,7 @@ _sk_lighten_avx LABEL PROC
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,193,93,0,0          ; vbroadcastss  0x5dc1(%rip),%ymm8        # 6424 <_sk_callback_avx+0x159>
+  DB  196,98,125,24,5,213,94,0,0          ; vbroadcastss  0x5ed5(%rip),%ymm8        # 6538 <_sk_callback_avx+0x159>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5460,7 +5460,7 @@ _sk_difference_avx LABEL PROC
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,97,93,0,0           ; vbroadcastss  0x5d61(%rip),%ymm8        # 6428 <_sk_callback_avx+0x15d>
+  DB  196,98,125,24,5,117,94,0,0          ; vbroadcastss  0x5e75(%rip),%ymm8        # 653c <_sk_callback_avx+0x15d>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5481,7 +5481,7 @@ _sk_exclusion_avx LABEL PROC
   DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,28,93,0,0           ; vbroadcastss  0x5d1c(%rip),%ymm8        # 642c <_sk_callback_avx+0x161>
+  DB  196,98,125,24,5,48,94,0,0           ; vbroadcastss  0x5e30(%rip),%ymm8        # 6540 <_sk_callback_avx+0x161>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5490,7 +5490,7 @@ _sk_exclusion_avx LABEL PROC
 
 PUBLIC _sk_colorburn_avx
 _sk_colorburn_avx LABEL PROC
-  DB  196,98,125,24,5,7,93,0,0            ; vbroadcastss  0x5d07(%rip),%ymm8        # 6430 <_sk_callback_avx+0x165>
+  DB  196,98,125,24,5,27,94,0,0           ; vbroadcastss  0x5e1b(%rip),%ymm8        # 6544 <_sk_callback_avx+0x165>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,216                       ; vmulps        %ymm0,%ymm9,%ymm11
   DB  196,65,44,87,210                    ; vxorps        %ymm10,%ymm10,%ymm10
@@ -5550,7 +5550,7 @@ _sk_colorburn_avx LABEL PROC
 PUBLIC _sk_colordodge_avx
 _sk_colordodge_avx LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
-  DB  196,98,125,24,13,3,92,0,0           ; vbroadcastss  0x5c03(%rip),%ymm9        # 6434 <_sk_callback_avx+0x169>
+  DB  196,98,125,24,13,23,93,0,0          ; vbroadcastss  0x5d17(%rip),%ymm9        # 6548 <_sk_callback_avx+0x169>
   DB  197,52,92,215                       ; vsubps        %ymm7,%ymm9,%ymm10
   DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
   DB  197,52,92,203                       ; vsubps        %ymm3,%ymm9,%ymm9
@@ -5605,7 +5605,7 @@ _sk_colordodge_avx LABEL PROC
 
 PUBLIC _sk_hardlight_avx
 _sk_hardlight_avx LABEL PROC
-  DB  196,98,125,24,5,21,91,0,0           ; vbroadcastss  0x5b15(%rip),%ymm8        # 6438 <_sk_callback_avx+0x16d>
+  DB  196,98,125,24,5,41,92,0,0           ; vbroadcastss  0x5c29(%rip),%ymm8        # 654c <_sk_callback_avx+0x16d>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,200                       ; vmulps        %ymm0,%ymm10,%ymm9
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5658,7 +5658,7 @@ _sk_hardlight_avx LABEL PROC
 
 PUBLIC _sk_overlay_avx
 _sk_overlay_avx LABEL PROC
-  DB  196,98,125,24,5,62,90,0,0           ; vbroadcastss  0x5a3e(%rip),%ymm8        # 643c <_sk_callback_avx+0x171>
+  DB  196,98,125,24,5,82,91,0,0           ; vbroadcastss  0x5b52(%rip),%ymm8        # 6550 <_sk_callback_avx+0x171>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,200                       ; vmulps        %ymm0,%ymm10,%ymm9
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5723,10 +5723,10 @@ _sk_softlight_avx LABEL PROC
   DB  196,65,60,88,192                    ; vaddps        %ymm8,%ymm8,%ymm8
   DB  196,65,60,89,216                    ; vmulps        %ymm8,%ymm8,%ymm11
   DB  196,65,60,88,195                    ; vaddps        %ymm11,%ymm8,%ymm8
-  DB  196,98,125,24,29,49,89,0,0          ; vbroadcastss  0x5931(%rip),%ymm11        # 6444 <_sk_callback_avx+0x179>
+  DB  196,98,125,24,29,69,90,0,0          ; vbroadcastss  0x5a45(%rip),%ymm11        # 6558 <_sk_callback_avx+0x179>
   DB  196,65,28,88,235                    ; vaddps        %ymm11,%ymm12,%ymm13
   DB  196,65,20,89,192                    ; vmulps        %ymm8,%ymm13,%ymm8
-  DB  196,98,125,24,45,34,89,0,0          ; vbroadcastss  0x5922(%rip),%ymm13        # 6448 <_sk_callback_avx+0x17d>
+  DB  196,98,125,24,45,54,90,0,0          ; vbroadcastss  0x5a36(%rip),%ymm13        # 655c <_sk_callback_avx+0x17d>
   DB  196,65,28,89,245                    ; vmulps        %ymm13,%ymm12,%ymm14
   DB  196,65,12,88,192                    ; vaddps        %ymm8,%ymm14,%ymm8
   DB  196,65,124,82,244                   ; vrsqrtps      %ymm12,%ymm14
@@ -5737,7 +5737,7 @@ _sk_softlight_avx LABEL PROC
   DB  197,4,194,255,2                     ; vcmpleps      %ymm7,%ymm15,%ymm15
   DB  196,67,13,74,240,240                ; vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   DB  197,116,88,249                      ; vaddps        %ymm1,%ymm1,%ymm15
-  DB  196,98,125,24,5,224,88,0,0          ; vbroadcastss  0x58e0(%rip),%ymm8        # 6440 <_sk_callback_avx+0x175>
+  DB  196,98,125,24,5,244,89,0,0          ; vbroadcastss  0x59f4(%rip),%ymm8        # 6554 <_sk_callback_avx+0x175>
   DB  196,65,60,92,228                    ; vsubps        %ymm12,%ymm8,%ymm12
   DB  197,132,92,195                      ; vsubps        %ymm3,%ymm15,%ymm0
   DB  196,65,124,89,228                   ; vmulps        %ymm12,%ymm0,%ymm12
@@ -5864,12 +5864,12 @@ _sk_hue_avx LABEL PROC
   DB  196,65,28,89,219                    ; vmulps        %ymm11,%ymm12,%ymm11
   DB  196,65,36,94,222                    ; vdivps        %ymm14,%ymm11,%ymm11
   DB  196,67,37,74,224,240                ; vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  DB  196,98,125,24,53,170,86,0,0         ; vbroadcastss  0x56aa(%rip),%ymm14        # 644c <_sk_callback_avx+0x181>
+  DB  196,98,125,24,53,190,87,0,0         ; vbroadcastss  0x57be(%rip),%ymm14        # 6560 <_sk_callback_avx+0x181>
   DB  196,65,92,89,222                    ; vmulps        %ymm14,%ymm4,%ymm11
-  DB  196,98,125,24,61,160,86,0,0         ; vbroadcastss  0x56a0(%rip),%ymm15        # 6450 <_sk_callback_avx+0x185>
+  DB  196,98,125,24,61,180,87,0,0         ; vbroadcastss  0x57b4(%rip),%ymm15        # 6564 <_sk_callback_avx+0x185>
   DB  196,65,84,89,239                    ; vmulps        %ymm15,%ymm5,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
-  DB  196,226,125,24,5,145,86,0,0         ; vbroadcastss  0x5691(%rip),%ymm0        # 6454 <_sk_callback_avx+0x189>
+  DB  196,226,125,24,5,165,87,0,0         ; vbroadcastss  0x57a5(%rip),%ymm0        # 6568 <_sk_callback_avx+0x189>
   DB  197,76,89,232                       ; vmulps        %ymm0,%ymm6,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
   DB  196,65,52,89,238                    ; vmulps        %ymm14,%ymm9,%ymm13
@@ -5930,7 +5930,7 @@ _sk_hue_avx LABEL PROC
   DB  196,65,36,95,208                    ; vmaxps        %ymm8,%ymm11,%ymm10
   DB  196,195,109,74,209,240              ; vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,106,85,0,0          ; vbroadcastss  0x556a(%rip),%ymm8        # 6458 <_sk_callback_avx+0x18d>
+  DB  196,98,125,24,5,126,86,0,0          ; vbroadcastss  0x567e(%rip),%ymm8        # 656c <_sk_callback_avx+0x18d>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5987,12 +5987,12 @@ _sk_saturation_avx LABEL PROC
   DB  196,65,28,89,219                    ; vmulps        %ymm11,%ymm12,%ymm11
   DB  196,65,36,94,222                    ; vdivps        %ymm14,%ymm11,%ymm11
   DB  196,67,37,74,224,240                ; vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  DB  196,98,125,24,53,114,84,0,0         ; vbroadcastss  0x5472(%rip),%ymm14        # 645c <_sk_callback_avx+0x191>
+  DB  196,98,125,24,53,134,85,0,0         ; vbroadcastss  0x5586(%rip),%ymm14        # 6570 <_sk_callback_avx+0x191>
   DB  196,65,92,89,222                    ; vmulps        %ymm14,%ymm4,%ymm11
-  DB  196,98,125,24,61,104,84,0,0         ; vbroadcastss  0x5468(%rip),%ymm15        # 6460 <_sk_callback_avx+0x195>
+  DB  196,98,125,24,61,124,85,0,0         ; vbroadcastss  0x557c(%rip),%ymm15        # 6574 <_sk_callback_avx+0x195>
   DB  196,65,84,89,239                    ; vmulps        %ymm15,%ymm5,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
-  DB  196,226,125,24,5,89,84,0,0          ; vbroadcastss  0x5459(%rip),%ymm0        # 6464 <_sk_callback_avx+0x199>
+  DB  196,226,125,24,5,109,85,0,0         ; vbroadcastss  0x556d(%rip),%ymm0        # 6578 <_sk_callback_avx+0x199>
   DB  197,76,89,232                       ; vmulps        %ymm0,%ymm6,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
   DB  196,65,52,89,238                    ; vmulps        %ymm14,%ymm9,%ymm13
@@ -6053,7 +6053,7 @@ _sk_saturation_avx LABEL PROC
   DB  196,65,36,95,208                    ; vmaxps        %ymm8,%ymm11,%ymm10
   DB  196,195,109,74,209,240              ; vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,50,83,0,0           ; vbroadcastss  0x5332(%rip),%ymm8        # 6468 <_sk_callback_avx+0x19d>
+  DB  196,98,125,24,5,70,84,0,0           ; vbroadcastss  0x5446(%rip),%ymm8        # 657c <_sk_callback_avx+0x19d>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -6082,12 +6082,12 @@ _sk_color_avx LABEL PROC
   DB  197,252,17,68,36,32                 ; vmovups       %ymm0,0x20(%rsp)
   DB  197,124,89,199                      ; vmulps        %ymm7,%ymm0,%ymm8
   DB  197,116,89,207                      ; vmulps        %ymm7,%ymm1,%ymm9
-  DB  196,98,125,24,45,194,82,0,0         ; vbroadcastss  0x52c2(%rip),%ymm13        # 646c <_sk_callback_avx+0x1a1>
+  DB  196,98,125,24,45,214,83,0,0         ; vbroadcastss  0x53d6(%rip),%ymm13        # 6580 <_sk_callback_avx+0x1a1>
   DB  196,65,92,89,213                    ; vmulps        %ymm13,%ymm4,%ymm10
-  DB  196,98,125,24,53,184,82,0,0         ; vbroadcastss  0x52b8(%rip),%ymm14        # 6470 <_sk_callback_avx+0x1a5>
+  DB  196,98,125,24,53,204,83,0,0         ; vbroadcastss  0x53cc(%rip),%ymm14        # 6584 <_sk_callback_avx+0x1a5>
   DB  196,65,84,89,222                    ; vmulps        %ymm14,%ymm5,%ymm11
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
-  DB  196,98,125,24,61,169,82,0,0         ; vbroadcastss  0x52a9(%rip),%ymm15        # 6474 <_sk_callback_avx+0x1a9>
+  DB  196,98,125,24,61,189,83,0,0         ; vbroadcastss  0x53bd(%rip),%ymm15        # 6588 <_sk_callback_avx+0x1a9>
   DB  196,65,76,89,223                    ; vmulps        %ymm15,%ymm6,%ymm11
   DB  196,193,44,88,195                   ; vaddps        %ymm11,%ymm10,%ymm0
   DB  196,65,60,89,221                    ; vmulps        %ymm13,%ymm8,%ymm11
@@ -6150,7 +6150,7 @@ _sk_color_avx LABEL PROC
   DB  196,65,44,95,207                    ; vmaxps        %ymm15,%ymm10,%ymm9
   DB  196,195,37,74,192,0                 ; vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   DB  196,65,124,95,199                   ; vmaxps        %ymm15,%ymm0,%ymm8
-  DB  196,226,125,24,5,112,81,0,0         ; vbroadcastss  0x5170(%rip),%ymm0        # 6478 <_sk_callback_avx+0x1ad>
+  DB  196,226,125,24,5,132,82,0,0         ; vbroadcastss  0x5284(%rip),%ymm0        # 658c <_sk_callback_avx+0x1ad>
   DB  197,124,92,215                      ; vsubps        %ymm7,%ymm0,%ymm10
   DB  197,172,89,84,36,32                 ; vmulps        0x20(%rsp),%ymm10,%ymm2
   DB  197,124,92,219                      ; vsubps        %ymm3,%ymm0,%ymm11
@@ -6180,12 +6180,12 @@ _sk_luminosity_avx LABEL PROC
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
   DB  197,100,89,196                      ; vmulps        %ymm4,%ymm3,%ymm8
   DB  197,100,89,205                      ; vmulps        %ymm5,%ymm3,%ymm9
-  DB  196,98,125,24,45,252,80,0,0         ; vbroadcastss  0x50fc(%rip),%ymm13        # 647c <_sk_callback_avx+0x1b1>
+  DB  196,98,125,24,45,16,82,0,0          ; vbroadcastss  0x5210(%rip),%ymm13        # 6590 <_sk_callback_avx+0x1b1>
   DB  196,65,108,89,213                   ; vmulps        %ymm13,%ymm2,%ymm10
-  DB  196,98,125,24,53,242,80,0,0         ; vbroadcastss  0x50f2(%rip),%ymm14        # 6480 <_sk_callback_avx+0x1b5>
+  DB  196,98,125,24,53,6,82,0,0           ; vbroadcastss  0x5206(%rip),%ymm14        # 6594 <_sk_callback_avx+0x1b5>
   DB  196,65,116,89,222                   ; vmulps        %ymm14,%ymm1,%ymm11
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
-  DB  196,98,125,24,61,227,80,0,0         ; vbroadcastss  0x50e3(%rip),%ymm15        # 6484 <_sk_callback_avx+0x1b9>
+  DB  196,98,125,24,61,247,81,0,0         ; vbroadcastss  0x51f7(%rip),%ymm15        # 6598 <_sk_callback_avx+0x1b9>
   DB  196,65,28,89,223                    ; vmulps        %ymm15,%ymm12,%ymm11
   DB  196,193,44,88,195                   ; vaddps        %ymm11,%ymm10,%ymm0
   DB  196,65,60,89,221                    ; vmulps        %ymm13,%ymm8,%ymm11
@@ -6248,7 +6248,7 @@ _sk_luminosity_avx LABEL PROC
   DB  196,65,44,95,207                    ; vmaxps        %ymm15,%ymm10,%ymm9
   DB  196,195,37,74,192,0                 ; vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   DB  196,65,124,95,199                   ; vmaxps        %ymm15,%ymm0,%ymm8
-  DB  196,226,125,24,5,170,79,0,0         ; vbroadcastss  0x4faa(%rip),%ymm0        # 6488 <_sk_callback_avx+0x1bd>
+  DB  196,226,125,24,5,190,80,0,0         ; vbroadcastss  0x50be(%rip),%ymm0        # 659c <_sk_callback_avx+0x1bd>
   DB  197,124,92,215                      ; vsubps        %ymm7,%ymm0,%ymm10
   DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
   DB  197,124,92,219                      ; vsubps        %ymm3,%ymm0,%ymm11
@@ -6281,7 +6281,7 @@ _sk_clamp_0_avx LABEL PROC
 
 PUBLIC _sk_clamp_1_avx
 _sk_clamp_1_avx LABEL PROC
-  DB  196,98,125,24,5,58,79,0,0           ; vbroadcastss  0x4f3a(%rip),%ymm8        # 648c <_sk_callback_avx+0x1c1>
+  DB  196,98,125,24,5,78,80,0,0           ; vbroadcastss  0x504e(%rip),%ymm8        # 65a0 <_sk_callback_avx+0x1c1>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
@@ -6291,7 +6291,7 @@ _sk_clamp_1_avx LABEL PROC
 
 PUBLIC _sk_clamp_a_avx
 _sk_clamp_a_avx LABEL PROC
-  DB  196,98,125,24,5,29,79,0,0           ; vbroadcastss  0x4f1d(%rip),%ymm8        # 6490 <_sk_callback_avx+0x1c5>
+  DB  196,98,125,24,5,49,80,0,0           ; vbroadcastss  0x5031(%rip),%ymm8        # 65a4 <_sk_callback_avx+0x1c5>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
   DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
@@ -6363,7 +6363,7 @@ PUBLIC _sk_unpremul_avx
 _sk_unpremul_avx LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
-  DB  196,98,125,24,21,101,78,0,0         ; vbroadcastss  0x4e65(%rip),%ymm10        # 6494 <_sk_callback_avx+0x1c9>
+  DB  196,98,125,24,21,121,79,0,0         ; vbroadcastss  0x4f79(%rip),%ymm10        # 65a8 <_sk_callback_avx+0x1c9>
   DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
   DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
@@ -6374,17 +6374,17 @@ _sk_unpremul_avx LABEL PROC
 
 PUBLIC _sk_from_srgb_avx
 _sk_from_srgb_avx LABEL PROC
-  DB  196,98,125,24,5,70,78,0,0           ; vbroadcastss  0x4e46(%rip),%ymm8        # 6498 <_sk_callback_avx+0x1cd>
+  DB  196,98,125,24,5,90,79,0,0           ; vbroadcastss  0x4f5a(%rip),%ymm8        # 65ac <_sk_callback_avx+0x1cd>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
-  DB  196,98,125,24,29,56,78,0,0          ; vbroadcastss  0x4e38(%rip),%ymm11        # 649c <_sk_callback_avx+0x1d1>
+  DB  196,98,125,24,29,76,79,0,0          ; vbroadcastss  0x4f4c(%rip),%ymm11        # 65b0 <_sk_callback_avx+0x1d1>
   DB  196,65,124,89,227                   ; vmulps        %ymm11,%ymm0,%ymm12
-  DB  196,98,125,24,45,46,78,0,0          ; vbroadcastss  0x4e2e(%rip),%ymm13        # 64a0 <_sk_callback_avx+0x1d5>
+  DB  196,98,125,24,45,66,79,0,0          ; vbroadcastss  0x4f42(%rip),%ymm13        # 65b4 <_sk_callback_avx+0x1d5>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,44,89,212                    ; vmulps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,31,78,0,0          ; vbroadcastss  0x4e1f(%rip),%ymm12        # 64a4 <_sk_callback_avx+0x1d9>
+  DB  196,98,125,24,37,51,79,0,0          ; vbroadcastss  0x4f33(%rip),%ymm12        # 65b8 <_sk_callback_avx+0x1d9>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,53,21,78,0,0          ; vbroadcastss  0x4e15(%rip),%ymm14        # 64a8 <_sk_callback_avx+0x1dd>
+  DB  196,98,125,24,53,41,79,0,0          ; vbroadcastss  0x4f29(%rip),%ymm14        # 65bc <_sk_callback_avx+0x1dd>
   DB  196,193,124,194,198,1               ; vcmpltps      %ymm14,%ymm0,%ymm0
   DB  196,195,45,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm10,%ymm0
   DB  196,65,116,89,200                   ; vmulps        %ymm8,%ymm1,%ymm9
@@ -6409,20 +6409,20 @@ _sk_from_srgb_avx LABEL PROC
 PUBLIC _sk_to_srgb_avx
 _sk_to_srgb_avx LABEL PROC
   DB  197,124,82,200                      ; vrsqrtps      %ymm0,%ymm9
-  DB  196,98,125,24,5,170,77,0,0          ; vbroadcastss  0x4daa(%rip),%ymm8        # 64ac <_sk_callback_avx+0x1e1>
+  DB  196,98,125,24,5,190,78,0,0          ; vbroadcastss  0x4ebe(%rip),%ymm8        # 65c0 <_sk_callback_avx+0x1e1>
   DB  196,65,124,89,208                   ; vmulps        %ymm8,%ymm0,%ymm10
-  DB  196,98,125,24,29,160,77,0,0         ; vbroadcastss  0x4da0(%rip),%ymm11        # 64b0 <_sk_callback_avx+0x1e5>
+  DB  196,98,125,24,29,180,78,0,0         ; vbroadcastss  0x4eb4(%rip),%ymm11        # 65c4 <_sk_callback_avx+0x1e5>
   DB  196,65,52,89,227                    ; vmulps        %ymm11,%ymm9,%ymm12
-  DB  196,98,125,24,45,150,77,0,0         ; vbroadcastss  0x4d96(%rip),%ymm13        # 64b4 <_sk_callback_avx+0x1e9>
+  DB  196,98,125,24,45,170,78,0,0         ; vbroadcastss  0x4eaa(%rip),%ymm13        # 65c8 <_sk_callback_avx+0x1e9>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,52,89,228                    ; vmulps        %ymm12,%ymm9,%ymm12
-  DB  196,98,125,24,53,135,77,0,0         ; vbroadcastss  0x4d87(%rip),%ymm14        # 64b8 <_sk_callback_avx+0x1ed>
+  DB  196,98,125,24,53,155,78,0,0         ; vbroadcastss  0x4e9b(%rip),%ymm14        # 65cc <_sk_callback_avx+0x1ed>
   DB  196,65,28,88,230                    ; vaddps        %ymm14,%ymm12,%ymm12
-  DB  196,98,125,24,61,125,77,0,0         ; vbroadcastss  0x4d7d(%rip),%ymm15        # 64bc <_sk_callback_avx+0x1f1>
+  DB  196,98,125,24,61,145,78,0,0         ; vbroadcastss  0x4e91(%rip),%ymm15        # 65d0 <_sk_callback_avx+0x1f1>
   DB  196,65,52,88,207                    ; vaddps        %ymm15,%ymm9,%ymm9
   DB  196,65,124,83,201                   ; vrcpps        %ymm9,%ymm9
   DB  196,65,52,89,204                    ; vmulps        %ymm12,%ymm9,%ymm9
-  DB  196,98,125,24,37,105,77,0,0         ; vbroadcastss  0x4d69(%rip),%ymm12        # 64c0 <_sk_callback_avx+0x1f5>
+  DB  196,98,125,24,37,125,78,0,0         ; vbroadcastss  0x4e7d(%rip),%ymm12        # 65d4 <_sk_callback_avx+0x1f5>
   DB  196,193,124,194,196,1               ; vcmpltps      %ymm12,%ymm0,%ymm0
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
@@ -6457,7 +6457,7 @@ _sk_rgb_to_hsl_avx LABEL PROC
   DB  197,124,93,201                      ; vminps        %ymm1,%ymm0,%ymm9
   DB  197,52,93,202                       ; vminps        %ymm2,%ymm9,%ymm9
   DB  196,65,60,92,209                    ; vsubps        %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,29,207,76,0,0         ; vbroadcastss  0x4ccf(%rip),%ymm11        # 64c4 <_sk_callback_avx+0x1f9>
+  DB  196,98,125,24,29,227,77,0,0         ; vbroadcastss  0x4de3(%rip),%ymm11        # 65d8 <_sk_callback_avx+0x1f9>
   DB  196,65,36,94,218                    ; vdivps        %ymm10,%ymm11,%ymm11
   DB  197,116,92,226                      ; vsubps        %ymm2,%ymm1,%ymm12
   DB  196,65,28,89,227                    ; vmulps        %ymm11,%ymm12,%ymm12
@@ -6467,19 +6467,19 @@ _sk_rgb_to_hsl_avx LABEL PROC
   DB  196,193,108,89,211                  ; vmulps        %ymm11,%ymm2,%ymm2
   DB  197,252,92,201                      ; vsubps        %ymm1,%ymm0,%ymm1
   DB  196,193,116,89,203                  ; vmulps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,168,76,0,0         ; vbroadcastss  0x4ca8(%rip),%ymm11        # 64d0 <_sk_callback_avx+0x205>
+  DB  196,98,125,24,29,188,77,0,0         ; vbroadcastss  0x4dbc(%rip),%ymm11        # 65e4 <_sk_callback_avx+0x205>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,150,76,0,0         ; vbroadcastss  0x4c96(%rip),%ymm11        # 64cc <_sk_callback_avx+0x201>
+  DB  196,98,125,24,29,170,77,0,0         ; vbroadcastss  0x4daa(%rip),%ymm11        # 65e0 <_sk_callback_avx+0x201>
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,227,117,74,202,224              ; vblendvps     %ymm14,%ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,126,76,0,0        ; vbroadcastss  0x4c7e(%rip),%ymm2        # 64c8 <_sk_callback_avx+0x1fd>
+  DB  196,226,125,24,21,146,77,0,0        ; vbroadcastss  0x4d92(%rip),%ymm2        # 65dc <_sk_callback_avx+0x1fd>
   DB  196,65,12,87,246                    ; vxorps        %ymm14,%ymm14,%ymm14
   DB  196,227,13,74,210,208               ; vblendvps     %ymm13,%ymm2,%ymm14,%ymm2
   DB  197,188,194,192,0                   ; vcmpeqps      %ymm0,%ymm8,%ymm0
   DB  196,193,108,88,212                  ; vaddps        %ymm12,%ymm2,%ymm2
   DB  196,227,117,74,194,0                ; vblendvps     %ymm0,%ymm2,%ymm1,%ymm0
   DB  196,193,60,88,201                   ; vaddps        %ymm9,%ymm8,%ymm1
-  DB  196,98,125,24,37,101,76,0,0         ; vbroadcastss  0x4c65(%rip),%ymm12        # 64d8 <_sk_callback_avx+0x20d>
+  DB  196,98,125,24,37,121,77,0,0         ; vbroadcastss  0x4d79(%rip),%ymm12        # 65ec <_sk_callback_avx+0x20d>
   DB  196,193,116,89,212                  ; vmulps        %ymm12,%ymm1,%ymm2
   DB  197,28,194,226,1                    ; vcmpltps      %ymm2,%ymm12,%ymm12
   DB  196,65,36,92,216                    ; vsubps        %ymm8,%ymm11,%ymm11
@@ -6489,7 +6489,7 @@ _sk_rgb_to_hsl_avx LABEL PROC
   DB  197,172,94,201                      ; vdivps        %ymm1,%ymm10,%ymm1
   DB  196,195,125,74,198,128              ; vblendvps     %ymm8,%ymm14,%ymm0,%ymm0
   DB  196,195,117,74,206,128              ; vblendvps     %ymm8,%ymm14,%ymm1,%ymm1
-  DB  196,98,125,24,5,40,76,0,0           ; vbroadcastss  0x4c28(%rip),%ymm8        # 64d4 <_sk_callback_avx+0x209>
+  DB  196,98,125,24,5,60,77,0,0           ; vbroadcastss  0x4d3c(%rip),%ymm8        # 65e8 <_sk_callback_avx+0x209>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -6504,7 +6504,7 @@ _sk_hsl_to_rgb_avx LABEL PROC
   DB  197,252,17,28,36                    ; vmovups       %ymm3,(%rsp)
   DB  197,252,40,225                      ; vmovaps       %ymm1,%ymm4
   DB  197,252,40,216                      ; vmovaps       %ymm0,%ymm3
-  DB  196,98,125,24,5,239,75,0,0          ; vbroadcastss  0x4bef(%rip),%ymm8        # 64dc <_sk_callback_avx+0x211>
+  DB  196,98,125,24,5,3,77,0,0            ; vbroadcastss  0x4d03(%rip),%ymm8        # 65f0 <_sk_callback_avx+0x211>
   DB  197,60,194,202,2                    ; vcmpleps      %ymm2,%ymm8,%ymm9
   DB  197,92,89,210                       ; vmulps        %ymm2,%ymm4,%ymm10
   DB  196,65,92,92,218                    ; vsubps        %ymm10,%ymm4,%ymm11
@@ -6512,23 +6512,23 @@ _sk_hsl_to_rgb_avx LABEL PROC
   DB  197,52,88,210                       ; vaddps        %ymm2,%ymm9,%ymm10
   DB  197,108,88,202                      ; vaddps        %ymm2,%ymm2,%ymm9
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,29,201,75,0,0         ; vbroadcastss  0x4bc9(%rip),%ymm11        # 64e0 <_sk_callback_avx+0x215>
+  DB  196,98,125,24,29,221,76,0,0         ; vbroadcastss  0x4cdd(%rip),%ymm11        # 65f4 <_sk_callback_avx+0x215>
   DB  196,65,100,88,219                   ; vaddps        %ymm11,%ymm3,%ymm11
   DB  196,67,125,8,227,1                  ; vroundps      $0x1,%ymm11,%ymm12
   DB  196,65,36,92,252                    ; vsubps        %ymm12,%ymm11,%ymm15
   DB  196,65,44,92,217                    ; vsubps        %ymm9,%ymm10,%ymm11
-  DB  196,98,125,24,37,179,75,0,0         ; vbroadcastss  0x4bb3(%rip),%ymm12        # 64e8 <_sk_callback_avx+0x21d>
+  DB  196,98,125,24,37,199,76,0,0         ; vbroadcastss  0x4cc7(%rip),%ymm12        # 65fc <_sk_callback_avx+0x21d>
   DB  196,193,4,89,196                    ; vmulps        %ymm12,%ymm15,%ymm0
-  DB  196,98,125,24,45,169,75,0,0         ; vbroadcastss  0x4ba9(%rip),%ymm13        # 64ec <_sk_callback_avx+0x221>
+  DB  196,98,125,24,45,189,76,0,0         ; vbroadcastss  0x4cbd(%rip),%ymm13        # 6600 <_sk_callback_avx+0x221>
   DB  197,20,92,240                       ; vsubps        %ymm0,%ymm13,%ymm14
   DB  196,65,36,89,246                    ; vmulps        %ymm14,%ymm11,%ymm14
   DB  196,65,52,88,246                    ; vaddps        %ymm14,%ymm9,%ymm14
-  DB  196,226,125,24,13,138,75,0,0        ; vbroadcastss  0x4b8a(%rip),%ymm1        # 64e4 <_sk_callback_avx+0x219>
+  DB  196,226,125,24,13,158,76,0,0        ; vbroadcastss  0x4c9e(%rip),%ymm1        # 65f8 <_sk_callback_avx+0x219>
   DB  196,193,116,194,255,2               ; vcmpleps      %ymm15,%ymm1,%ymm7
   DB  196,195,13,74,249,112               ; vblendvps     %ymm7,%ymm9,%ymm14,%ymm7
   DB  196,65,60,194,247,2                 ; vcmpleps      %ymm15,%ymm8,%ymm14
   DB  196,227,45,74,255,224               ; vblendvps     %ymm14,%ymm7,%ymm10,%ymm7
-  DB  196,98,125,24,53,117,75,0,0         ; vbroadcastss  0x4b75(%rip),%ymm14        # 64f0 <_sk_callback_avx+0x225>
+  DB  196,98,125,24,53,137,76,0,0         ; vbroadcastss  0x4c89(%rip),%ymm14        # 6604 <_sk_callback_avx+0x225>
   DB  196,65,12,194,255,2                 ; vcmpleps      %ymm15,%ymm14,%ymm15
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -6547,7 +6547,7 @@ _sk_hsl_to_rgb_avx LABEL PROC
   DB  197,164,89,247                      ; vmulps        %ymm7,%ymm11,%ymm6
   DB  197,180,88,246                      ; vaddps        %ymm6,%ymm9,%ymm6
   DB  196,227,77,74,237,0                 ; vblendvps     %ymm0,%ymm5,%ymm6,%ymm5
-  DB  196,226,125,24,5,23,75,0,0          ; vbroadcastss  0x4b17(%rip),%ymm0        # 64f4 <_sk_callback_avx+0x229>
+  DB  196,226,125,24,5,43,76,0,0          ; vbroadcastss  0x4c2b(%rip),%ymm0        # 6608 <_sk_callback_avx+0x229>
   DB  197,228,88,192                      ; vaddps        %ymm0,%ymm3,%ymm0
   DB  196,227,125,8,216,1                 ; vroundps      $0x1,%ymm0,%ymm3
   DB  197,252,92,195                      ; vsubps        %ymm3,%ymm0,%ymm0
@@ -6602,7 +6602,7 @@ _sk_scale_u8_avx LABEL PROC
   DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
   DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,58,74,0,0          ; vbroadcastss  0x4a3a(%rip),%ymm9        # 64f8 <_sk_callback_avx+0x22d>
+  DB  196,98,125,24,13,78,75,0,0          ; vbroadcastss  0x4b4e(%rip),%ymm9        # 660c <_sk_callback_avx+0x22d>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -6657,7 +6657,7 @@ _sk_lerp_u8_avx LABEL PROC
   DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
   DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,134,73,0,0         ; vbroadcastss  0x4986(%rip),%ymm9        # 64fc <_sk_callback_avx+0x231>
+  DB  196,98,125,24,13,154,74,0,0         ; vbroadcastss  0x4a9a(%rip),%ymm9        # 6610 <_sk_callback_avx+0x231>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
@@ -6698,20 +6698,20 @@ _sk_lerp_565_avx LABEL PROC
   DB  196,65,57,105,201                   ; vpunpckhwd    %xmm9,%xmm8,%xmm9
   DB  196,66,121,51,192                   ; vpmovzxwd     %xmm8,%xmm8
   DB  196,67,61,24,193,1                  ; vinsertf128   $0x1,%xmm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,240,72,0,0         ; vbroadcastss  0x48f0(%rip),%ymm9        # 6500 <_sk_callback_avx+0x235>
+  DB  196,98,125,24,13,4,74,0,0           ; vbroadcastss  0x4a04(%rip),%ymm9        # 6614 <_sk_callback_avx+0x235>
   DB  196,65,60,84,201                    ; vandps        %ymm9,%ymm8,%ymm9
   DB  196,65,124,91,201                   ; vcvtdq2ps     %ymm9,%ymm9
-  DB  196,98,125,24,21,225,72,0,0         ; vbroadcastss  0x48e1(%rip),%ymm10        # 6504 <_sk_callback_avx+0x239>
+  DB  196,98,125,24,21,245,73,0,0         ; vbroadcastss  0x49f5(%rip),%ymm10        # 6618 <_sk_callback_avx+0x239>
   DB  196,65,52,89,202                    ; vmulps        %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,215,72,0,0         ; vbroadcastss  0x48d7(%rip),%ymm10        # 6508 <_sk_callback_avx+0x23d>
+  DB  196,98,125,24,21,235,73,0,0         ; vbroadcastss  0x49eb(%rip),%ymm10        # 661c <_sk_callback_avx+0x23d>
   DB  196,65,60,84,210                    ; vandps        %ymm10,%ymm8,%ymm10
   DB  196,65,124,91,210                   ; vcvtdq2ps     %ymm10,%ymm10
-  DB  196,98,125,24,29,200,72,0,0         ; vbroadcastss  0x48c8(%rip),%ymm11        # 650c <_sk_callback_avx+0x241>
+  DB  196,98,125,24,29,220,73,0,0         ; vbroadcastss  0x49dc(%rip),%ymm11        # 6620 <_sk_callback_avx+0x241>
   DB  196,65,44,89,211                    ; vmulps        %ymm11,%ymm10,%ymm10
-  DB  196,98,125,24,29,190,72,0,0         ; vbroadcastss  0x48be(%rip),%ymm11        # 6510 <_sk_callback_avx+0x245>
+  DB  196,98,125,24,29,210,73,0,0         ; vbroadcastss  0x49d2(%rip),%ymm11        # 6624 <_sk_callback_avx+0x245>
   DB  196,65,60,84,195                    ; vandps        %ymm11,%ymm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,29,175,72,0,0         ; vbroadcastss  0x48af(%rip),%ymm11        # 6514 <_sk_callback_avx+0x249>
+  DB  196,98,125,24,29,195,73,0,0         ; vbroadcastss  0x49c3(%rip),%ymm11        # 6628 <_sk_callback_avx+0x249>
   DB  196,65,60,89,195                    ; vmulps        %ymm11,%ymm8,%ymm8
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
@@ -6780,22 +6780,19 @@ _sk_lerp_565_avx LABEL PROC
 
 PUBLIC _sk_load_tables_avx
 _sk_load_tables_avx LABEL PROC
-  DB  73,137,200                          ; mov           %rcx,%r8
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
-  DB  76,3,8                              ; add           (%rax),%r9
-  DB  77,133,192                          ; test          %r8,%r8
-  DB  15,133,31,2,0,0                     ; jne           1f80 <_sk_load_tables_avx+0x238>
-  DB  196,65,124,16,17                    ; vmovups       (%r9),%ymm10
+  DB  76,139,0                            ; mov           (%rax),%r8
+  DB  72,133,201                          ; test          %rcx,%rcx
+  DB  15,133,26,2,0,0                     ; jne           1f70 <_sk_load_tables_avx+0x228>
+  DB  196,65,124,16,4,184                 ; vmovups       (%r8,%rdi,4),%ymm8
   DB  85                                  ; push          %rbp
   DB  65,87                               ; push          %r15
   DB  65,86                               ; push          %r14
   DB  65,85                               ; push          %r13
   DB  65,84                               ; push          %r12
   DB  83                                  ; push          %rbx
-  DB  80                                  ; push          %rax
-  DB  197,124,40,13,7,75,0,0              ; vmovaps       0x4b07(%rip),%ymm9        # 6880 <_sk_callback_avx+0x5b5>
-  DB  196,193,44,84,193                   ; vandps        %ymm9,%ymm10,%ymm0
+  DB  197,124,40,13,146,75,0,0            ; vmovaps       0x4b92(%rip),%ymm9        # 6900 <_sk_callback_avx+0x521>
+  DB  196,193,60,84,193                   ; vandps        %ymm9,%ymm8,%ymm0
   DB  196,193,249,126,193                 ; vmovq         %xmm0,%r9
   DB  69,137,203                          ; mov           %r9d,%r11d
   DB  196,195,249,22,194,1                ; vpextrq       $0x1,%xmm0,%r10
@@ -6803,26 +6800,26 @@ _sk_load_tables_avx LABEL PROC
   DB  73,193,234,32                       ; shr           $0x20,%r10
   DB  73,193,233,32                       ; shr           $0x20,%r9
   DB  196,227,125,25,192,1                ; vextractf128  $0x1,%ymm0,%xmm0
-  DB  196,225,249,126,195                 ; vmovq         %xmm0,%rbx
-  DB  65,137,223                          ; mov           %ebx,%r15d
-  DB  196,227,249,22,193,1                ; vpextrq       $0x1,%xmm0,%rcx
-  DB  65,137,205                          ; mov           %ecx,%r13d
-  DB  72,193,233,32                       ; shr           $0x20,%rcx
+  DB  196,193,249,126,196                 ; vmovq         %xmm0,%r12
+  DB  69,137,231                          ; mov           %r12d,%r15d
+  DB  196,227,249,22,195,1                ; vpextrq       $0x1,%xmm0,%rbx
+  DB  65,137,221                          ; mov           %ebx,%r13d
   DB  72,193,235,32                       ; shr           $0x20,%rbx
+  DB  73,193,236,32                       ; shr           $0x20,%r12
   DB  72,139,104,8                        ; mov           0x8(%rax),%rbp
-  DB  76,139,96,16                        ; mov           0x10(%rax),%r12
+  DB  76,139,64,16                        ; mov           0x10(%rax),%r8
   DB  196,161,122,16,68,189,0             ; vmovss        0x0(%rbp,%r15,4),%xmm0
-  DB  196,227,121,33,68,157,0,16          ; vinsertps     $0x10,0x0(%rbp,%rbx,4),%xmm0,%xmm0
+  DB  196,163,121,33,68,165,0,16          ; vinsertps     $0x10,0x0(%rbp,%r12,4),%xmm0,%xmm0
   DB  196,163,121,33,68,173,0,32          ; vinsertps     $0x20,0x0(%rbp,%r13,4),%xmm0,%xmm0
-  DB  196,227,121,33,68,141,0,48          ; vinsertps     $0x30,0x0(%rbp,%rcx,4),%xmm0,%xmm0
+  DB  196,227,121,33,68,157,0,48          ; vinsertps     $0x30,0x0(%rbp,%rbx,4),%xmm0,%xmm0
   DB  196,161,122,16,76,157,0             ; vmovss        0x0(%rbp,%r11,4),%xmm1
   DB  196,163,113,33,76,141,0,16          ; vinsertps     $0x10,0x0(%rbp,%r9,4),%xmm1,%xmm1
   DB  196,163,113,33,76,181,0,32          ; vinsertps     $0x20,0x0(%rbp,%r14,4),%xmm1,%xmm1
   DB  196,163,113,33,76,149,0,48          ; vinsertps     $0x30,0x0(%rbp,%r10,4),%xmm1,%xmm1
   DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
-  DB  196,193,113,114,210,8               ; vpsrld        $0x8,%xmm10,%xmm1
-  DB  196,67,125,25,208,1                 ; vextractf128  $0x1,%ymm10,%xmm8
-  DB  196,193,105,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm2
+  DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
+  DB  196,67,125,25,194,1                 ; vextractf128  $0x1,%ymm8,%xmm10
+  DB  196,193,105,114,210,8               ; vpsrld        $0x8,%xmm10,%xmm2
   DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   DB  196,193,116,84,201                  ; vandps        %ymm9,%ymm1,%ymm1
   DB  196,193,249,126,201                 ; vmovq         %xmm1,%r9
@@ -6832,36 +6829,36 @@ _sk_load_tables_avx LABEL PROC
   DB  73,193,234,32                       ; shr           $0x20,%r10
   DB  73,193,233,32                       ; shr           $0x20,%r9
   DB  196,227,125,25,201,1                ; vextractf128  $0x1,%ymm1,%xmm1
-  DB  196,225,249,126,203                 ; vmovq         %xmm1,%rbx
-  DB  65,137,223                          ; mov           %ebx,%r15d
-  DB  196,227,249,22,205,1                ; vpextrq       $0x1,%xmm1,%rbp
-  DB  137,233                             ; mov           %ebp,%ecx
-  DB  72,193,237,32                       ; shr           $0x20,%rbp
+  DB  196,225,249,126,205                 ; vmovq         %xmm1,%rbp
+  DB  65,137,239                          ; mov           %ebp,%r15d
+  DB  196,227,249,22,203,1                ; vpextrq       $0x1,%xmm1,%rbx
+  DB  65,137,220                          ; mov           %ebx,%r12d
   DB  72,193,235,32                       ; shr           $0x20,%rbx
-  DB  196,129,122,16,12,188               ; vmovss        (%r12,%r15,4),%xmm1
-  DB  196,195,113,33,12,156,16            ; vinsertps     $0x10,(%r12,%rbx,4),%xmm1,%xmm1
-  DB  196,193,122,16,20,140               ; vmovss        (%r12,%rcx,4),%xmm2
+  DB  72,193,237,32                       ; shr           $0x20,%rbp
+  DB  196,129,122,16,12,184               ; vmovss        (%r8,%r15,4),%xmm1
+  DB  196,195,113,33,12,168,16            ; vinsertps     $0x10,(%r8,%rbp,4),%xmm1,%xmm1
+  DB  196,129,122,16,20,160               ; vmovss        (%r8,%r12,4),%xmm2
   DB  196,227,113,33,202,32               ; vinsertps     $0x20,%xmm2,%xmm1,%xmm1
-  DB  196,193,122,16,20,172               ; vmovss        (%r12,%rbp,4),%xmm2
+  DB  196,193,122,16,20,152               ; vmovss        (%r8,%rbx,4),%xmm2
   DB  196,227,113,33,202,48               ; vinsertps     $0x30,%xmm2,%xmm1,%xmm1
-  DB  196,129,122,16,20,156               ; vmovss        (%r12,%r11,4),%xmm2
-  DB  196,131,105,33,20,140,16            ; vinsertps     $0x10,(%r12,%r9,4),%xmm2,%xmm2
-  DB  196,129,122,16,28,180               ; vmovss        (%r12,%r14,4),%xmm3
+  DB  196,129,122,16,20,152               ; vmovss        (%r8,%r11,4),%xmm2
+  DB  196,131,105,33,20,136,16            ; vinsertps     $0x10,(%r8,%r9,4),%xmm2,%xmm2
+  DB  196,129,122,16,28,176               ; vmovss        (%r8,%r14,4),%xmm3
   DB  196,227,105,33,211,32               ; vinsertps     $0x20,%xmm3,%xmm2,%xmm2
-  DB  196,129,122,16,28,148               ; vmovss        (%r12,%r10,4),%xmm3
+  DB  196,129,122,16,28,144               ; vmovss        (%r8,%r10,4),%xmm3
   DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   DB  196,227,109,24,201,1                ; vinsertf128   $0x1,%xmm1,%ymm2,%ymm1
   DB  72,139,64,24                        ; mov           0x18(%rax),%rax
-  DB  196,193,105,114,210,16              ; vpsrld        $0x10,%xmm10,%xmm2
-  DB  196,193,97,114,208,16               ; vpsrld        $0x10,%xmm8,%xmm3
+  DB  196,193,105,114,208,16              ; vpsrld        $0x10,%xmm8,%xmm2
+  DB  196,193,97,114,210,16               ; vpsrld        $0x10,%xmm10,%xmm3
   DB  196,227,109,24,211,1                ; vinsertf128   $0x1,%xmm3,%ymm2,%ymm2
   DB  196,193,108,84,209                  ; vandps        %ymm9,%ymm2,%ymm2
-  DB  196,193,249,126,209                 ; vmovq         %xmm2,%r9
-  DB  69,137,202                          ; mov           %r9d,%r10d
-  DB  196,227,249,22,209,1                ; vpextrq       $0x1,%xmm2,%rcx
-  DB  65,137,203                          ; mov           %ecx,%r11d
-  DB  72,193,233,32                       ; shr           $0x20,%rcx
+  DB  196,193,249,126,208                 ; vmovq         %xmm2,%r8
+  DB  69,137,194                          ; mov           %r8d,%r10d
+  DB  196,195,249,22,209,1                ; vpextrq       $0x1,%xmm2,%r9
+  DB  69,137,203                          ; mov           %r9d,%r11d
   DB  73,193,233,32                       ; shr           $0x20,%r9
+  DB  73,193,232,32                       ; shr           $0x20,%r8
   DB  196,227,125,25,210,1                ; vextractf128  $0x1,%ymm2,%xmm2
   DB  196,225,249,126,213                 ; vmovq         %xmm2,%rbp
   DB  65,137,238                          ; mov           %ebp,%r14d
@@ -6876,21 +6873,19 @@ _sk_load_tables_avx LABEL PROC
   DB  197,250,16,28,152                   ; vmovss        (%rax,%rbx,4),%xmm3
   DB  196,99,105,33,203,48                ; vinsertps     $0x30,%xmm3,%xmm2,%xmm9
   DB  196,161,122,16,28,144               ; vmovss        (%rax,%r10,4),%xmm3
-  DB  196,163,97,33,28,136,16             ; vinsertps     $0x10,(%rax,%r9,4),%xmm3,%xmm3
+  DB  196,163,97,33,28,128,16             ; vinsertps     $0x10,(%rax,%r8,4),%xmm3,%xmm3
   DB  196,161,122,16,20,152               ; vmovss        (%rax,%r11,4),%xmm2
   DB  196,227,97,33,210,32                ; vinsertps     $0x20,%xmm2,%xmm3,%xmm2
-  DB  197,250,16,28,136                   ; vmovss        (%rax,%rcx,4),%xmm3
+  DB  196,161,122,16,28,136               ; vmovss        (%rax,%r9,4),%xmm3
   DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   DB  196,195,109,24,209,1                ; vinsertf128   $0x1,%xmm9,%ymm2,%ymm2
-  DB  196,193,49,114,210,24               ; vpsrld        $0x18,%xmm10,%xmm9
-  DB  196,193,97,114,208,24               ; vpsrld        $0x18,%xmm8,%xmm3
-  DB  196,227,53,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
+  DB  196,193,57,114,208,24               ; vpsrld        $0x18,%xmm8,%xmm8
+  DB  196,193,97,114,210,24               ; vpsrld        $0x18,%xmm10,%xmm3
+  DB  196,227,61,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,178,69,0,0          ; vbroadcastss  0x45b2(%rip),%ymm8        # 6518 <_sk_callback_avx+0x24d>
+  DB  196,98,125,24,5,207,70,0,0          ; vbroadcastss  0x46cf(%rip),%ymm8        # 662c <_sk_callback_avx+0x24d>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  76,137,193                          ; mov           %r8,%rcx
-  DB  72,131,196,8                        ; add           $0x8,%rsp
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,93                               ; pop           %r13
@@ -6898,20 +6893,57 @@ _sk_load_tables_avx LABEL PROC
   DB  65,95                               ; pop           %r15
   DB  93                                  ; pop           %rbp
   DB  255,224                             ; jmpq          *%rax
-  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
-  DB  68,41,193                           ; sub           %r8d,%ecx
-  DB  192,225,3                           ; shl           $0x3,%cl
-  DB  73,199,194,255,255,255,255          ; mov           $0xffffffffffffffff,%r10
-  DB  73,211,234                          ; shr           %cl,%r10
-  DB  196,193,249,110,194                 ; vmovq         %r10,%xmm0
-  DB  196,226,121,48,192                  ; vpmovzxbw     %xmm0,%xmm0
-  DB  196,226,121,0,13,72,72,0,0          ; vpshufb       0x4848(%rip),%xmm0,%xmm1        # 67f0 <_sk_callback_avx+0x525>
-  DB  196,226,121,33,201                  ; vpmovsxbd     %xmm1,%xmm1
-  DB  196,226,121,0,5,74,72,0,0           ; vpshufb       0x484a(%rip),%xmm0,%xmm0        # 6800 <_sk_callback_avx+0x535>
-  DB  196,226,121,33,192                  ; vpmovsxbd     %xmm0,%xmm0
-  DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
-  DB  196,66,125,44,17                    ; vmaskmovps    (%r9),%ymm0,%ymm10
-  DB  233,155,253,255,255                 ; jmpq          1d66 <_sk_load_tables_avx+0x1e>
+  DB  65,137,201                          ; mov           %ecx,%r9d
+  DB  65,128,225,7                        ; and           $0x7,%r9b
+  DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
+  DB  65,254,201                          ; dec           %r9b
+  DB  65,128,249,6                        ; cmp           $0x6,%r9b
+  DB  15,135,211,253,255,255              ; ja            1d5c <_sk_load_tables_avx+0x14>
+  DB  69,15,182,201                       ; movzbl        %r9b,%r9d
+  DB  76,141,21,140,0,0,0                 ; lea           0x8c(%rip),%r10        # 2020 <_sk_load_tables_avx+0x2d8>
+  DB  79,99,12,138                        ; movslq        (%r10,%r9,4),%r9
+  DB  77,1,209                            ; add           %r10,%r9
+  DB  65,255,225                          ; jmpq          *%r9
+  DB  196,193,121,110,68,184,24           ; vmovd         0x18(%r8,%rdi,4),%xmm0
+  DB  197,249,112,192,68                  ; vpshufd       $0x44,%xmm0,%xmm0
+  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
+  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
+  DB  196,99,117,12,192,64                ; vblendps      $0x40,%ymm0,%ymm1,%ymm8
+  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
+  DB  196,195,121,34,68,184,20,1          ; vpinsrd       $0x1,0x14(%r8,%rdi,4),%xmm0,%xmm0
+  DB  196,99,61,24,192,1                  ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
+  DB  196,99,125,25,192,1                 ; vextractf128  $0x1,%ymm8,%xmm0
+  DB  196,195,121,34,68,184,16,0          ; vpinsrd       $0x0,0x10(%r8,%rdi,4),%xmm0,%xmm0
+  DB  196,99,61,24,192,1                  ; vinsertf128   $0x1,%xmm0,%ymm8,%ymm8
+  DB  196,195,57,34,68,184,12,3           ; vpinsrd       $0x3,0xc(%r8,%rdi,4),%xmm8,%xmm0
+  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
+  DB  196,195,57,34,68,184,8,2            ; vpinsrd       $0x2,0x8(%r8,%rdi,4),%xmm8,%xmm0
+  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
+  DB  196,195,57,34,68,184,4,1            ; vpinsrd       $0x1,0x4(%r8,%rdi,4),%xmm8,%xmm0
+  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
+  DB  196,195,57,34,4,184,0               ; vpinsrd       $0x0,(%r8,%rdi,4),%xmm8,%xmm0
+  DB  196,99,61,12,192,15                 ; vblendps      $0xf,%ymm0,%ymm8,%ymm8
+  DB  233,62,253,255,255                  ; jmpq          1d5c <_sk_load_tables_avx+0x14>
+  DB  102,144                             ; xchg          %ax,%ax
+  DB  236                                 ; in            (%dx),%al
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  222,255                             ; fdivrp        %st,%st(7)
+  DB  255                                 ; (bad)
+  DB  255,208                             ; callq         *%rax
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255,194                             ; inc           %edx
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255,174,255,255,255,154             ; ljmp          *-0x65000001(%rsi)
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  126,255                             ; jle           2039 <_sk_load_tables_avx+0x2f1>
+  DB  255                                 ; (bad)
+  DB  255                                 ; .byte         0xff
 
 PUBLIC _sk_load_tables_u16_be_avx
 _sk_load_tables_u16_be_avx LABEL PROC
@@ -6919,7 +6951,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,113,2,0,0                    ; jne           2252 <_sk_load_tables_u16_be_avx+0x287>
+  DB  15,133,113,2,0,0                    ; jne           22c3 <_sk_load_tables_u16_be_avx+0x287>
   DB  196,1,121,16,4,72                   ; vmovupd       (%r8,%r9,2),%xmm8
   DB  196,129,121,16,84,72,16             ; vmovupd       0x10(%r8,%r9,2),%xmm2
   DB  196,129,121,16,92,72,32             ; vmovupd       0x20(%r8,%r9,2),%xmm3
@@ -6941,7 +6973,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
   DB  197,177,108,208                     ; vpunpcklqdq   %xmm0,%xmm9,%xmm2
   DB  197,177,109,200                     ; vpunpckhqdq   %xmm0,%xmm9,%xmm1
   DB  196,65,57,108,212                   ; vpunpcklqdq   %xmm12,%xmm8,%xmm10
-  DB  197,121,111,29,211,71,0,0           ; vmovdqa       0x47d3(%rip),%xmm11        # 6810 <_sk_callback_avx+0x545>
+  DB  197,121,111,29,210,72,0,0           ; vmovdqa       0x48d2(%rip),%xmm11        # 6980 <_sk_callback_avx+0x5a1>
   DB  196,193,105,219,195                 ; vpand         %xmm11,%xmm2,%xmm0
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  196,193,121,105,209                 ; vpunpckhwd    %xmm9,%xmm0,%xmm2
@@ -7040,7 +7072,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
   DB  196,226,121,51,219                  ; vpmovzxwd     %xmm3,%xmm3
   DB  196,195,101,24,216,1                ; vinsertf128   $0x1,%xmm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,221,66,0,0          ; vbroadcastss  0x42dd(%rip),%ymm8        # 651c <_sk_callback_avx+0x251>
+  DB  196,98,125,24,5,128,67,0,0          ; vbroadcastss  0x4380(%rip),%ymm8        # 6630 <_sk_callback_avx+0x251>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -7053,29 +7085,29 @@ _sk_load_tables_u16_be_avx LABEL PROC
   DB  196,1,123,16,4,72                   ; vmovsd        (%r8,%r9,2),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,85                              ; je            22b8 <_sk_load_tables_u16_be_avx+0x2ed>
+  DB  116,85                              ; je            2329 <_sk_load_tables_u16_be_avx+0x2ed>
   DB  196,1,57,22,68,72,8                 ; vmovhpd       0x8(%r8,%r9,2),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,72                              ; jb            22b8 <_sk_load_tables_u16_be_avx+0x2ed>
+  DB  114,72                              ; jb            2329 <_sk_load_tables_u16_be_avx+0x2ed>
   DB  196,129,123,16,84,72,16             ; vmovsd        0x10(%r8,%r9,2),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,72                              ; je            22c5 <_sk_load_tables_u16_be_avx+0x2fa>
+  DB  116,72                              ; je            2336 <_sk_load_tables_u16_be_avx+0x2fa>
   DB  196,129,105,22,84,72,24             ; vmovhpd       0x18(%r8,%r9,2),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,59                              ; jb            22c5 <_sk_load_tables_u16_be_avx+0x2fa>
+  DB  114,59                              ; jb            2336 <_sk_load_tables_u16_be_avx+0x2fa>
   DB  196,129,123,16,92,72,32             ; vmovsd        0x20(%r8,%r9,2),%xmm3
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,97,253,255,255               ; je            1ffc <_sk_load_tables_u16_be_avx+0x31>
+  DB  15,132,97,253,255,255               ; je            206d <_sk_load_tables_u16_be_avx+0x31>
   DB  196,129,97,22,92,72,40              ; vmovhpd       0x28(%r8,%r9,2),%xmm3,%xmm3
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,80,253,255,255               ; jb            1ffc <_sk_load_tables_u16_be_avx+0x31>
+  DB  15,130,80,253,255,255               ; jb            206d <_sk_load_tables_u16_be_avx+0x31>
   DB  196,1,122,126,76,72,48              ; vmovq         0x30(%r8,%r9,2),%xmm9
-  DB  233,68,253,255,255                  ; jmpq          1ffc <_sk_load_tables_u16_be_avx+0x31>
+  DB  233,68,253,255,255                  ; jmpq          206d <_sk_load_tables_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,55,253,255,255                  ; jmpq          1ffc <_sk_load_tables_u16_be_avx+0x31>
+  DB  233,55,253,255,255                  ; jmpq          206d <_sk_load_tables_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
-  DB  233,46,253,255,255                  ; jmpq          1ffc <_sk_load_tables_u16_be_avx+0x31>
+  DB  233,46,253,255,255                  ; jmpq          206d <_sk_load_tables_u16_be_avx+0x31>
 
 PUBLIC _sk_load_tables_rgb_u16_be_avx
 _sk_load_tables_rgb_u16_be_avx LABEL PROC
@@ -7083,7 +7115,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,127                       ; lea           (%rdi,%rdi,2),%r9
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,93,2,0,0                     ; jne           253d <_sk_load_tables_rgb_u16_be_avx+0x26f>
+  DB  15,133,93,2,0,0                     ; jne           25ae <_sk_load_tables_rgb_u16_be_avx+0x26f>
   DB  196,129,122,111,4,72                ; vmovdqu       (%r8,%r9,2),%xmm0
   DB  196,129,122,111,84,72,12            ; vmovdqu       0xc(%r8,%r9,2),%xmm2
   DB  196,129,122,111,76,72,24            ; vmovdqu       0x18(%r8,%r9,2),%xmm1
@@ -7110,7 +7142,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
   DB  197,185,108,202                     ; vpunpcklqdq   %xmm2,%xmm8,%xmm1
   DB  197,185,109,210                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm2
   DB  197,121,108,195                     ; vpunpcklqdq   %xmm3,%xmm0,%xmm8
-  DB  197,121,111,13,204,68,0,0           ; vmovdqa       0x44cc(%rip),%xmm9        # 6820 <_sk_callback_avx+0x555>
+  DB  197,121,111,13,203,69,0,0           ; vmovdqa       0x45cb(%rip),%xmm9        # 6990 <_sk_callback_avx+0x5b1>
   DB  196,193,113,219,193                 ; vpand         %xmm9,%xmm1,%xmm0
   DB  196,65,41,239,210                   ; vpxor         %xmm10,%xmm10,%xmm10
   DB  196,193,121,105,202                 ; vpunpckhwd    %xmm10,%xmm0,%xmm1
@@ -7202,7 +7234,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
   DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   DB  196,195,109,24,208,1                ; vinsertf128   $0x1,%xmm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,239,63,0,0        ; vbroadcastss  0x3fef(%rip),%ymm3        # 6520 <_sk_callback_avx+0x255>
+  DB  196,226,125,24,29,146,64,0,0        ; vbroadcastss  0x4092(%rip),%ymm3        # 6634 <_sk_callback_avx+0x255>
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,93                               ; pop           %r13
@@ -7213,36 +7245,36 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
   DB  196,129,121,110,4,72                ; vmovd         (%r8,%r9,2),%xmm0
   DB  196,129,121,196,68,72,4,2           ; vpinsrw       $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  117,5                               ; jne           2556 <_sk_load_tables_rgb_u16_be_avx+0x288>
-  DB  233,190,253,255,255                 ; jmpq          2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  117,5                               ; jne           25c7 <_sk_load_tables_rgb_u16_be_avx+0x288>
+  DB  233,190,253,255,255                 ; jmpq          2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
   DB  196,129,121,110,76,72,6             ; vmovd         0x6(%r8,%r9,2),%xmm1
   DB  196,1,113,196,68,72,10,2            ; vpinsrw       $0x2,0xa(%r8,%r9,2),%xmm1,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,26                              ; jb            2585 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
+  DB  114,26                              ; jb            25f6 <_sk_load_tables_rgb_u16_be_avx+0x2b7>
   DB  196,129,121,110,76,72,12            ; vmovd         0xc(%r8,%r9,2),%xmm1
   DB  196,129,113,196,84,72,16,2          ; vpinsrw       $0x2,0x10(%r8,%r9,2),%xmm1,%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  117,10                              ; jne           258a <_sk_load_tables_rgb_u16_be_avx+0x2bc>
-  DB  233,143,253,255,255                 ; jmpq          2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  DB  233,138,253,255,255                 ; jmpq          2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           25fb <_sk_load_tables_rgb_u16_be_avx+0x2bc>
+  DB  233,143,253,255,255                 ; jmpq          2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,138,253,255,255                 ; jmpq          2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
   DB  196,129,121,110,76,72,18            ; vmovd         0x12(%r8,%r9,2),%xmm1
   DB  196,1,113,196,76,72,22,2            ; vpinsrw       $0x2,0x16(%r8,%r9,2),%xmm1,%xmm9
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,26                              ; jb            25b9 <_sk_load_tables_rgb_u16_be_avx+0x2eb>
+  DB  114,26                              ; jb            262a <_sk_load_tables_rgb_u16_be_avx+0x2eb>
   DB  196,129,121,110,76,72,24            ; vmovd         0x18(%r8,%r9,2),%xmm1
   DB  196,129,113,196,76,72,28,2          ; vpinsrw       $0x2,0x1c(%r8,%r9,2),%xmm1,%xmm1
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  117,10                              ; jne           25be <_sk_load_tables_rgb_u16_be_avx+0x2f0>
-  DB  233,91,253,255,255                  ; jmpq          2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  DB  233,86,253,255,255                  ; jmpq          2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           262f <_sk_load_tables_rgb_u16_be_avx+0x2f0>
+  DB  233,91,253,255,255                  ; jmpq          2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,86,253,255,255                  ; jmpq          2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
   DB  196,129,121,110,92,72,30            ; vmovd         0x1e(%r8,%r9,2),%xmm3
   DB  196,1,97,196,92,72,34,2             ; vpinsrw       $0x2,0x22(%r8,%r9,2),%xmm3,%xmm11
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,20                              ; jb            25e7 <_sk_load_tables_rgb_u16_be_avx+0x319>
+  DB  114,20                              ; jb            2658 <_sk_load_tables_rgb_u16_be_avx+0x319>
   DB  196,129,121,110,92,72,36            ; vmovd         0x24(%r8,%r9,2),%xmm3
   DB  196,129,97,196,92,72,40,2           ; vpinsrw       $0x2,0x28(%r8,%r9,2),%xmm3,%xmm3
-  DB  233,45,253,255,255                  ; jmpq          2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
-  DB  233,40,253,255,255                  ; jmpq          2314 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,45,253,255,255                  ; jmpq          2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
+  DB  233,40,253,255,255                  ; jmpq          2385 <_sk_load_tables_rgb_u16_be_avx+0x46>
 
 PUBLIC _sk_byte_tables_avx
 _sk_byte_tables_avx LABEL PROC
@@ -7253,7 +7285,7 @@ _sk_byte_tables_avx LABEL PROC
   DB  65,84                               ; push          %r12
   DB  83                                  ; push          %rbx
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,35,63,0,0           ; vbroadcastss  0x3f23(%rip),%ymm8        # 6524 <_sk_callback_avx+0x259>
+  DB  196,98,125,24,5,198,63,0,0          ; vbroadcastss  0x3fc6(%rip),%ymm8        # 6638 <_sk_callback_avx+0x259>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,195,249,22,192,1                ; vpextrq       $0x1,%xmm0,%r8
@@ -7290,7 +7322,7 @@ _sk_byte_tables_avx LABEL PROC
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,53,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,113,62,0,0         ; vbroadcastss  0x3e71(%rip),%ymm9        # 6528 <_sk_callback_avx+0x25d>
+  DB  196,98,125,24,13,20,63,0,0          ; vbroadcastss  0x3f14(%rip),%ymm9        # 663c <_sk_callback_avx+0x25d>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -7450,7 +7482,7 @@ _sk_byte_tables_rgb_avx LABEL PROC
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,53,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,151,59,0,0         ; vbroadcastss  0x3b97(%rip),%ymm9        # 652c <_sk_callback_avx+0x261>
+  DB  196,98,125,24,13,58,60,0,0          ; vbroadcastss  0x3c3a(%rip),%ymm9        # 6640 <_sk_callback_avx+0x261>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -7737,36 +7769,36 @@ _sk_parametric_r_avx LABEL PROC
   DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,216                      ; vcvtdq2ps     %ymm0,%ymm11
-  DB  196,98,125,24,37,245,54,0,0         ; vbroadcastss  0x36f5(%rip),%ymm12        # 6530 <_sk_callback_avx+0x265>
+  DB  196,98,125,24,37,152,55,0,0         ; vbroadcastss  0x3798(%rip),%ymm12        # 6644 <_sk_callback_avx+0x265>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,235,54,0,0         ; vbroadcastss  0x36eb(%rip),%ymm12        # 6534 <_sk_callback_avx+0x269>
+  DB  196,98,125,24,37,142,55,0,0         ; vbroadcastss  0x378e(%rip),%ymm12        # 6648 <_sk_callback_avx+0x269>
   DB  196,193,124,84,196                  ; vandps        %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,225,54,0,0         ; vbroadcastss  0x36e1(%rip),%ymm12        # 6538 <_sk_callback_avx+0x26d>
+  DB  196,98,125,24,37,132,55,0,0         ; vbroadcastss  0x3784(%rip),%ymm12        # 664c <_sk_callback_avx+0x26d>
   DB  196,193,124,86,196                  ; vorps         %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,215,54,0,0         ; vbroadcastss  0x36d7(%rip),%ymm12        # 653c <_sk_callback_avx+0x271>
+  DB  196,98,125,24,37,122,55,0,0         ; vbroadcastss  0x377a(%rip),%ymm12        # 6650 <_sk_callback_avx+0x271>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,205,54,0,0         ; vbroadcastss  0x36cd(%rip),%ymm12        # 6540 <_sk_callback_avx+0x275>
+  DB  196,98,125,24,37,112,55,0,0         ; vbroadcastss  0x3770(%rip),%ymm12        # 6654 <_sk_callback_avx+0x275>
   DB  196,65,124,89,228                   ; vmulps        %ymm12,%ymm0,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,190,54,0,0         ; vbroadcastss  0x36be(%rip),%ymm12        # 6544 <_sk_callback_avx+0x279>
+  DB  196,98,125,24,37,97,55,0,0          ; vbroadcastss  0x3761(%rip),%ymm12        # 6658 <_sk_callback_avx+0x279>
   DB  196,193,124,88,196                  ; vaddps        %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,180,54,0,0         ; vbroadcastss  0x36b4(%rip),%ymm12        # 6548 <_sk_callback_avx+0x27d>
+  DB  196,98,125,24,37,87,55,0,0          ; vbroadcastss  0x3757(%rip),%ymm12        # 665c <_sk_callback_avx+0x27d>
   DB  197,156,94,192                      ; vdivps        %ymm0,%ymm12,%ymm0
   DB  197,164,92,192                      ; vsubps        %ymm0,%ymm11,%ymm0
   DB  197,172,89,192                      ; vmulps        %ymm0,%ymm10,%ymm0
   DB  196,99,125,8,208,1                  ; vroundps      $0x1,%ymm0,%ymm10
   DB  196,65,124,92,210                   ; vsubps        %ymm10,%ymm0,%ymm10
-  DB  196,98,125,24,29,152,54,0,0         ; vbroadcastss  0x3698(%rip),%ymm11        # 654c <_sk_callback_avx+0x281>
+  DB  196,98,125,24,29,59,55,0,0          ; vbroadcastss  0x373b(%rip),%ymm11        # 6660 <_sk_callback_avx+0x281>
   DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
-  DB  196,98,125,24,29,142,54,0,0         ; vbroadcastss  0x368e(%rip),%ymm11        # 6550 <_sk_callback_avx+0x285>
+  DB  196,98,125,24,29,49,55,0,0          ; vbroadcastss  0x3731(%rip),%ymm11        # 6664 <_sk_callback_avx+0x285>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,124,92,195                  ; vsubps        %ymm11,%ymm0,%ymm0
-  DB  196,98,125,24,29,127,54,0,0         ; vbroadcastss  0x367f(%rip),%ymm11        # 6554 <_sk_callback_avx+0x289>
+  DB  196,98,125,24,29,34,55,0,0          ; vbroadcastss  0x3722(%rip),%ymm11        # 6668 <_sk_callback_avx+0x289>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,117,54,0,0         ; vbroadcastss  0x3675(%rip),%ymm11        # 6558 <_sk_callback_avx+0x28d>
+  DB  196,98,125,24,29,24,55,0,0          ; vbroadcastss  0x3718(%rip),%ymm11        # 666c <_sk_callback_avx+0x28d>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,124,88,194                  ; vaddps        %ymm10,%ymm0,%ymm0
-  DB  196,98,125,24,21,102,54,0,0         ; vbroadcastss  0x3666(%rip),%ymm10        # 655c <_sk_callback_avx+0x291>
+  DB  196,98,125,24,21,9,55,0,0           ; vbroadcastss  0x3709(%rip),%ymm10        # 6670 <_sk_callback_avx+0x291>
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -7774,7 +7806,7 @@ _sk_parametric_r_avx LABEL PROC
   DB  196,195,125,74,193,128              ; vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,61,54,0,0           ; vbroadcastss  0x363d(%rip),%ymm8        # 6560 <_sk_callback_avx+0x295>
+  DB  196,98,125,24,5,224,54,0,0          ; vbroadcastss  0x36e0(%rip),%ymm8        # 6674 <_sk_callback_avx+0x295>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7794,36 +7826,36 @@ _sk_parametric_g_avx LABEL PROC
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,217                      ; vcvtdq2ps     %ymm1,%ymm11
-  DB  196,98,125,24,37,238,53,0,0         ; vbroadcastss  0x35ee(%rip),%ymm12        # 6564 <_sk_callback_avx+0x299>
+  DB  196,98,125,24,37,145,54,0,0         ; vbroadcastss  0x3691(%rip),%ymm12        # 6678 <_sk_callback_avx+0x299>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,228,53,0,0         ; vbroadcastss  0x35e4(%rip),%ymm12        # 6568 <_sk_callback_avx+0x29d>
+  DB  196,98,125,24,37,135,54,0,0         ; vbroadcastss  0x3687(%rip),%ymm12        # 667c <_sk_callback_avx+0x29d>
   DB  196,193,116,84,204                  ; vandps        %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,218,53,0,0         ; vbroadcastss  0x35da(%rip),%ymm12        # 656c <_sk_callback_avx+0x2a1>
+  DB  196,98,125,24,37,125,54,0,0         ; vbroadcastss  0x367d(%rip),%ymm12        # 6680 <_sk_callback_avx+0x2a1>
   DB  196,193,116,86,204                  ; vorps         %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,208,53,0,0         ; vbroadcastss  0x35d0(%rip),%ymm12        # 6570 <_sk_callback_avx+0x2a5>
+  DB  196,98,125,24,37,115,54,0,0         ; vbroadcastss  0x3673(%rip),%ymm12        # 6684 <_sk_callback_avx+0x2a5>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,198,53,0,0         ; vbroadcastss  0x35c6(%rip),%ymm12        # 6574 <_sk_callback_avx+0x2a9>
+  DB  196,98,125,24,37,105,54,0,0         ; vbroadcastss  0x3669(%rip),%ymm12        # 6688 <_sk_callback_avx+0x2a9>
   DB  196,65,116,89,228                   ; vmulps        %ymm12,%ymm1,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,183,53,0,0         ; vbroadcastss  0x35b7(%rip),%ymm12        # 6578 <_sk_callback_avx+0x2ad>
+  DB  196,98,125,24,37,90,54,0,0          ; vbroadcastss  0x365a(%rip),%ymm12        # 668c <_sk_callback_avx+0x2ad>
   DB  196,193,116,88,204                  ; vaddps        %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,173,53,0,0         ; vbroadcastss  0x35ad(%rip),%ymm12        # 657c <_sk_callback_avx+0x2b1>
+  DB  196,98,125,24,37,80,54,0,0          ; vbroadcastss  0x3650(%rip),%ymm12        # 6690 <_sk_callback_avx+0x2b1>
   DB  197,156,94,201                      ; vdivps        %ymm1,%ymm12,%ymm1
   DB  197,164,92,201                      ; vsubps        %ymm1,%ymm11,%ymm1
   DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
   DB  196,99,125,8,209,1                  ; vroundps      $0x1,%ymm1,%ymm10
   DB  196,65,116,92,210                   ; vsubps        %ymm10,%ymm1,%ymm10
-  DB  196,98,125,24,29,145,53,0,0         ; vbroadcastss  0x3591(%rip),%ymm11        # 6580 <_sk_callback_avx+0x2b5>
+  DB  196,98,125,24,29,52,54,0,0          ; vbroadcastss  0x3634(%rip),%ymm11        # 6694 <_sk_callback_avx+0x2b5>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,135,53,0,0         ; vbroadcastss  0x3587(%rip),%ymm11        # 6584 <_sk_callback_avx+0x2b9>
+  DB  196,98,125,24,29,42,54,0,0          ; vbroadcastss  0x362a(%rip),%ymm11        # 6698 <_sk_callback_avx+0x2b9>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,116,92,203                  ; vsubps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,120,53,0,0         ; vbroadcastss  0x3578(%rip),%ymm11        # 6588 <_sk_callback_avx+0x2bd>
+  DB  196,98,125,24,29,27,54,0,0          ; vbroadcastss  0x361b(%rip),%ymm11        # 669c <_sk_callback_avx+0x2bd>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,110,53,0,0         ; vbroadcastss  0x356e(%rip),%ymm11        # 658c <_sk_callback_avx+0x2c1>
+  DB  196,98,125,24,29,17,54,0,0          ; vbroadcastss  0x3611(%rip),%ymm11        # 66a0 <_sk_callback_avx+0x2c1>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,116,88,202                  ; vaddps        %ymm10,%ymm1,%ymm1
-  DB  196,98,125,24,21,95,53,0,0          ; vbroadcastss  0x355f(%rip),%ymm10        # 6590 <_sk_callback_avx+0x2c5>
+  DB  196,98,125,24,21,2,54,0,0           ; vbroadcastss  0x3602(%rip),%ymm10        # 66a4 <_sk_callback_avx+0x2c5>
   DB  196,193,116,89,202                  ; vmulps        %ymm10,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -7831,7 +7863,7 @@ _sk_parametric_g_avx LABEL PROC
   DB  196,195,117,74,201,128              ; vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
-  DB  196,98,125,24,5,54,53,0,0           ; vbroadcastss  0x3536(%rip),%ymm8        # 6594 <_sk_callback_avx+0x2c9>
+  DB  196,98,125,24,5,217,53,0,0          ; vbroadcastss  0x35d9(%rip),%ymm8        # 66a8 <_sk_callback_avx+0x2c9>
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7851,36 +7883,36 @@ _sk_parametric_b_avx LABEL PROC
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,218                      ; vcvtdq2ps     %ymm2,%ymm11
-  DB  196,98,125,24,37,231,52,0,0         ; vbroadcastss  0x34e7(%rip),%ymm12        # 6598 <_sk_callback_avx+0x2cd>
+  DB  196,98,125,24,37,138,53,0,0         ; vbroadcastss  0x358a(%rip),%ymm12        # 66ac <_sk_callback_avx+0x2cd>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,221,52,0,0         ; vbroadcastss  0x34dd(%rip),%ymm12        # 659c <_sk_callback_avx+0x2d1>
+  DB  196,98,125,24,37,128,53,0,0         ; vbroadcastss  0x3580(%rip),%ymm12        # 66b0 <_sk_callback_avx+0x2d1>
   DB  196,193,108,84,212                  ; vandps        %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,211,52,0,0         ; vbroadcastss  0x34d3(%rip),%ymm12        # 65a0 <_sk_callback_avx+0x2d5>
+  DB  196,98,125,24,37,118,53,0,0         ; vbroadcastss  0x3576(%rip),%ymm12        # 66b4 <_sk_callback_avx+0x2d5>
   DB  196,193,108,86,212                  ; vorps         %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,201,52,0,0         ; vbroadcastss  0x34c9(%rip),%ymm12        # 65a4 <_sk_callback_avx+0x2d9>
+  DB  196,98,125,24,37,108,53,0,0         ; vbroadcastss  0x356c(%rip),%ymm12        # 66b8 <_sk_callback_avx+0x2d9>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,191,52,0,0         ; vbroadcastss  0x34bf(%rip),%ymm12        # 65a8 <_sk_callback_avx+0x2dd>
+  DB  196,98,125,24,37,98,53,0,0          ; vbroadcastss  0x3562(%rip),%ymm12        # 66bc <_sk_callback_avx+0x2dd>
   DB  196,65,108,89,228                   ; vmulps        %ymm12,%ymm2,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,176,52,0,0         ; vbroadcastss  0x34b0(%rip),%ymm12        # 65ac <_sk_callback_avx+0x2e1>
+  DB  196,98,125,24,37,83,53,0,0          ; vbroadcastss  0x3553(%rip),%ymm12        # 66c0 <_sk_callback_avx+0x2e1>
   DB  196,193,108,88,212                  ; vaddps        %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,166,52,0,0         ; vbroadcastss  0x34a6(%rip),%ymm12        # 65b0 <_sk_callback_avx+0x2e5>
+  DB  196,98,125,24,37,73,53,0,0          ; vbroadcastss  0x3549(%rip),%ymm12        # 66c4 <_sk_callback_avx+0x2e5>
   DB  197,156,94,210                      ; vdivps        %ymm2,%ymm12,%ymm2
   DB  197,164,92,210                      ; vsubps        %ymm2,%ymm11,%ymm2
   DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
   DB  196,99,125,8,210,1                  ; vroundps      $0x1,%ymm2,%ymm10
   DB  196,65,108,92,210                   ; vsubps        %ymm10,%ymm2,%ymm10
-  DB  196,98,125,24,29,138,52,0,0         ; vbroadcastss  0x348a(%rip),%ymm11        # 65b4 <_sk_callback_avx+0x2e9>
+  DB  196,98,125,24,29,45,53,0,0          ; vbroadcastss  0x352d(%rip),%ymm11        # 66c8 <_sk_callback_avx+0x2e9>
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
-  DB  196,98,125,24,29,128,52,0,0         ; vbroadcastss  0x3480(%rip),%ymm11        # 65b8 <_sk_callback_avx+0x2ed>
+  DB  196,98,125,24,29,35,53,0,0          ; vbroadcastss  0x3523(%rip),%ymm11        # 66cc <_sk_callback_avx+0x2ed>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,108,92,211                  ; vsubps        %ymm11,%ymm2,%ymm2
-  DB  196,98,125,24,29,113,52,0,0         ; vbroadcastss  0x3471(%rip),%ymm11        # 65bc <_sk_callback_avx+0x2f1>
+  DB  196,98,125,24,29,20,53,0,0          ; vbroadcastss  0x3514(%rip),%ymm11        # 66d0 <_sk_callback_avx+0x2f1>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,103,52,0,0         ; vbroadcastss  0x3467(%rip),%ymm11        # 65c0 <_sk_callback_avx+0x2f5>
+  DB  196,98,125,24,29,10,53,0,0          ; vbroadcastss  0x350a(%rip),%ymm11        # 66d4 <_sk_callback_avx+0x2f5>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,108,88,210                  ; vaddps        %ymm10,%ymm2,%ymm2
-  DB  196,98,125,24,21,88,52,0,0          ; vbroadcastss  0x3458(%rip),%ymm10        # 65c4 <_sk_callback_avx+0x2f9>
+  DB  196,98,125,24,21,251,52,0,0         ; vbroadcastss  0x34fb(%rip),%ymm10        # 66d8 <_sk_callback_avx+0x2f9>
   DB  196,193,108,89,210                  ; vmulps        %ymm10,%ymm2,%ymm2
   DB  197,253,91,210                      ; vcvtps2dq     %ymm2,%ymm2
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -7888,7 +7920,7 @@ _sk_parametric_b_avx LABEL PROC
   DB  196,195,109,74,209,128              ; vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,47,52,0,0           ; vbroadcastss  0x342f(%rip),%ymm8        # 65c8 <_sk_callback_avx+0x2fd>
+  DB  196,98,125,24,5,210,52,0,0          ; vbroadcastss  0x34d2(%rip),%ymm8        # 66dc <_sk_callback_avx+0x2fd>
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7908,36 +7940,36 @@ _sk_parametric_a_avx LABEL PROC
   DB  196,193,100,88,219                  ; vaddps        %ymm11,%ymm3,%ymm3
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,219                      ; vcvtdq2ps     %ymm3,%ymm11
-  DB  196,98,125,24,37,224,51,0,0         ; vbroadcastss  0x33e0(%rip),%ymm12        # 65cc <_sk_callback_avx+0x301>
+  DB  196,98,125,24,37,131,52,0,0         ; vbroadcastss  0x3483(%rip),%ymm12        # 66e0 <_sk_callback_avx+0x301>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,214,51,0,0         ; vbroadcastss  0x33d6(%rip),%ymm12        # 65d0 <_sk_callback_avx+0x305>
+  DB  196,98,125,24,37,121,52,0,0         ; vbroadcastss  0x3479(%rip),%ymm12        # 66e4 <_sk_callback_avx+0x305>
   DB  196,193,100,84,220                  ; vandps        %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,204,51,0,0         ; vbroadcastss  0x33cc(%rip),%ymm12        # 65d4 <_sk_callback_avx+0x309>
+  DB  196,98,125,24,37,111,52,0,0         ; vbroadcastss  0x346f(%rip),%ymm12        # 66e8 <_sk_callback_avx+0x309>
   DB  196,193,100,86,220                  ; vorps         %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,194,51,0,0         ; vbroadcastss  0x33c2(%rip),%ymm12        # 65d8 <_sk_callback_avx+0x30d>
+  DB  196,98,125,24,37,101,52,0,0         ; vbroadcastss  0x3465(%rip),%ymm12        # 66ec <_sk_callback_avx+0x30d>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,184,51,0,0         ; vbroadcastss  0x33b8(%rip),%ymm12        # 65dc <_sk_callback_avx+0x311>
+  DB  196,98,125,24,37,91,52,0,0          ; vbroadcastss  0x345b(%rip),%ymm12        # 66f0 <_sk_callback_avx+0x311>
   DB  196,65,100,89,228                   ; vmulps        %ymm12,%ymm3,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,169,51,0,0         ; vbroadcastss  0x33a9(%rip),%ymm12        # 65e0 <_sk_callback_avx+0x315>
+  DB  196,98,125,24,37,76,52,0,0          ; vbroadcastss  0x344c(%rip),%ymm12        # 66f4 <_sk_callback_avx+0x315>
   DB  196,193,100,88,220                  ; vaddps        %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,159,51,0,0         ; vbroadcastss  0x339f(%rip),%ymm12        # 65e4 <_sk_callback_avx+0x319>
+  DB  196,98,125,24,37,66,52,0,0          ; vbroadcastss  0x3442(%rip),%ymm12        # 66f8 <_sk_callback_avx+0x319>
   DB  197,156,94,219                      ; vdivps        %ymm3,%ymm12,%ymm3
   DB  197,164,92,219                      ; vsubps        %ymm3,%ymm11,%ymm3
   DB  197,172,89,219                      ; vmulps        %ymm3,%ymm10,%ymm3
   DB  196,99,125,8,211,1                  ; vroundps      $0x1,%ymm3,%ymm10
   DB  196,65,100,92,210                   ; vsubps        %ymm10,%ymm3,%ymm10
-  DB  196,98,125,24,29,131,51,0,0         ; vbroadcastss  0x3383(%rip),%ymm11        # 65e8 <_sk_callback_avx+0x31d>
+  DB  196,98,125,24,29,38,52,0,0          ; vbroadcastss  0x3426(%rip),%ymm11        # 66fc <_sk_callback_avx+0x31d>
   DB  196,193,100,88,219                  ; vaddps        %ymm11,%ymm3,%ymm3
-  DB  196,98,125,24,29,121,51,0,0         ; vbroadcastss  0x3379(%rip),%ymm11        # 65ec <_sk_callback_avx+0x321>
+  DB  196,98,125,24,29,28,52,0,0          ; vbroadcastss  0x341c(%rip),%ymm11        # 6700 <_sk_callback_avx+0x321>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,100,92,219                  ; vsubps        %ymm11,%ymm3,%ymm3
-  DB  196,98,125,24,29,106,51,0,0         ; vbroadcastss  0x336a(%rip),%ymm11        # 65f0 <_sk_callback_avx+0x325>
+  DB  196,98,125,24,29,13,52,0,0          ; vbroadcastss  0x340d(%rip),%ymm11        # 6704 <_sk_callback_avx+0x325>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,96,51,0,0          ; vbroadcastss  0x3360(%rip),%ymm11        # 65f4 <_sk_callback_avx+0x329>
+  DB  196,98,125,24,29,3,52,0,0           ; vbroadcastss  0x3403(%rip),%ymm11        # 6708 <_sk_callback_avx+0x329>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,100,88,218                  ; vaddps        %ymm10,%ymm3,%ymm3
-  DB  196,98,125,24,21,81,51,0,0          ; vbroadcastss  0x3351(%rip),%ymm10        # 65f8 <_sk_callback_avx+0x32d>
+  DB  196,98,125,24,21,244,51,0,0         ; vbroadcastss  0x33f4(%rip),%ymm10        # 670c <_sk_callback_avx+0x32d>
   DB  196,193,100,89,218                  ; vmulps        %ymm10,%ymm3,%ymm3
   DB  197,253,91,219                      ; vcvtps2dq     %ymm3,%ymm3
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -7945,38 +7977,38 @@ _sk_parametric_a_avx LABEL PROC
   DB  196,195,101,74,217,128              ; vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
-  DB  196,98,125,24,5,40,51,0,0           ; vbroadcastss  0x3328(%rip),%ymm8        # 65fc <_sk_callback_avx+0x331>
+  DB  196,98,125,24,5,203,51,0,0          ; vbroadcastss  0x33cb(%rip),%ymm8        # 6710 <_sk_callback_avx+0x331>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_lab_to_xyz_avx
 _sk_lab_to_xyz_avx LABEL PROC
-  DB  196,98,125,24,5,26,51,0,0           ; vbroadcastss  0x331a(%rip),%ymm8        # 6600 <_sk_callback_avx+0x335>
+  DB  196,98,125,24,5,189,51,0,0          ; vbroadcastss  0x33bd(%rip),%ymm8        # 6714 <_sk_callback_avx+0x335>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,16,51,0,0           ; vbroadcastss  0x3310(%rip),%ymm8        # 6604 <_sk_callback_avx+0x339>
+  DB  196,98,125,24,5,179,51,0,0          ; vbroadcastss  0x33b3(%rip),%ymm8        # 6718 <_sk_callback_avx+0x339>
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  196,98,125,24,13,6,51,0,0           ; vbroadcastss  0x3306(%rip),%ymm9        # 6608 <_sk_callback_avx+0x33d>
+  DB  196,98,125,24,13,169,51,0,0         ; vbroadcastss  0x33a9(%rip),%ymm9        # 671c <_sk_callback_avx+0x33d>
   DB  196,193,116,88,201                  ; vaddps        %ymm9,%ymm1,%ymm1
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  196,193,108,88,209                  ; vaddps        %ymm9,%ymm2,%ymm2
-  DB  196,98,125,24,5,242,50,0,0          ; vbroadcastss  0x32f2(%rip),%ymm8        # 660c <_sk_callback_avx+0x341>
+  DB  196,98,125,24,5,149,51,0,0          ; vbroadcastss  0x3395(%rip),%ymm8        # 6720 <_sk_callback_avx+0x341>
   DB  196,193,124,88,192                  ; vaddps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,232,50,0,0          ; vbroadcastss  0x32e8(%rip),%ymm8        # 6610 <_sk_callback_avx+0x345>
+  DB  196,98,125,24,5,139,51,0,0          ; vbroadcastss  0x338b(%rip),%ymm8        # 6724 <_sk_callback_avx+0x345>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,222,50,0,0          ; vbroadcastss  0x32de(%rip),%ymm8        # 6614 <_sk_callback_avx+0x349>
+  DB  196,98,125,24,5,129,51,0,0          ; vbroadcastss  0x3381(%rip),%ymm8        # 6728 <_sk_callback_avx+0x349>
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  197,252,88,201                      ; vaddps        %ymm1,%ymm0,%ymm1
-  DB  196,98,125,24,5,208,50,0,0          ; vbroadcastss  0x32d0(%rip),%ymm8        # 6618 <_sk_callback_avx+0x34d>
+  DB  196,98,125,24,5,115,51,0,0          ; vbroadcastss  0x3373(%rip),%ymm8        # 672c <_sk_callback_avx+0x34d>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  197,252,92,210                      ; vsubps        %ymm2,%ymm0,%ymm2
   DB  197,116,89,193                      ; vmulps        %ymm1,%ymm1,%ymm8
   DB  196,65,116,89,192                   ; vmulps        %ymm8,%ymm1,%ymm8
-  DB  196,98,125,24,13,185,50,0,0         ; vbroadcastss  0x32b9(%rip),%ymm9        # 661c <_sk_callback_avx+0x351>
+  DB  196,98,125,24,13,92,51,0,0          ; vbroadcastss  0x335c(%rip),%ymm9        # 6730 <_sk_callback_avx+0x351>
   DB  196,65,52,194,208,1                 ; vcmpltps      %ymm8,%ymm9,%ymm10
-  DB  196,98,125,24,29,174,50,0,0         ; vbroadcastss  0x32ae(%rip),%ymm11        # 6620 <_sk_callback_avx+0x355>
+  DB  196,98,125,24,29,81,51,0,0          ; vbroadcastss  0x3351(%rip),%ymm11        # 6734 <_sk_callback_avx+0x355>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,37,164,50,0,0         ; vbroadcastss  0x32a4(%rip),%ymm12        # 6624 <_sk_callback_avx+0x359>
+  DB  196,98,125,24,37,71,51,0,0          ; vbroadcastss  0x3347(%rip),%ymm12        # 6738 <_sk_callback_avx+0x359>
   DB  196,193,116,89,204                  ; vmulps        %ymm12,%ymm1,%ymm1
   DB  196,67,117,74,192,160               ; vblendvps     %ymm10,%ymm8,%ymm1,%ymm8
   DB  197,252,89,200                      ; vmulps        %ymm0,%ymm0,%ymm1
@@ -7991,9 +8023,9 @@ _sk_lab_to_xyz_avx LABEL PROC
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,193,108,89,212                  ; vmulps        %ymm12,%ymm2,%ymm2
   DB  196,227,109,74,208,144              ; vblendvps     %ymm9,%ymm0,%ymm2,%ymm2
-  DB  196,226,125,24,5,90,50,0,0          ; vbroadcastss  0x325a(%rip),%ymm0        # 6628 <_sk_callback_avx+0x35d>
+  DB  196,226,125,24,5,253,50,0,0         ; vbroadcastss  0x32fd(%rip),%ymm0        # 673c <_sk_callback_avx+0x35d>
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
-  DB  196,98,125,24,5,81,50,0,0           ; vbroadcastss  0x3251(%rip),%ymm8        # 662c <_sk_callback_avx+0x361>
+  DB  196,98,125,24,5,244,50,0,0          ; vbroadcastss  0x32f4(%rip),%ymm8        # 6740 <_sk_callback_avx+0x361>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8005,14 +8037,14 @@ _sk_load_a8_avx LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,62                              ; jne           3432 <_sk_load_a8_avx+0x4e>
+  DB  117,62                              ; jne           34a3 <_sk_load_a8_avx+0x4e>
   DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
   DB  196,226,121,49,200                  ; vpmovzxbd     %xmm0,%xmm1
   DB  196,227,121,4,192,229               ; vpermilps     $0xe5,%xmm0,%xmm0
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,21,50,0,0         ; vbroadcastss  0x3215(%rip),%ymm1        # 6630 <_sk_callback_avx+0x365>
+  DB  196,226,125,24,13,184,50,0,0        ; vbroadcastss  0x32b8(%rip),%ymm1        # 6744 <_sk_callback_avx+0x365>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -8029,9 +8061,9 @@ _sk_load_a8_avx LABEL PROC
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           343a <_sk_load_a8_avx+0x56>
+  DB  117,234                             ; jne           34ab <_sk_load_a8_avx+0x56>
   DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
-  DB  235,161                             ; jmp           33f8 <_sk_load_a8_avx+0x14>
+  DB  235,161                             ; jmp           3469 <_sk_load_a8_avx+0x14>
 
 PUBLIC _sk_gather_a8_avx
 _sk_gather_a8_avx LABEL PROC
@@ -8079,7 +8111,7 @@ _sk_gather_a8_avx LABEL PROC
   DB  196,226,121,49,201                  ; vpmovzxbd     %xmm1,%xmm1
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,10,49,0,0         ; vbroadcastss  0x310a(%rip),%ymm1        # 6634 <_sk_callback_avx+0x369>
+  DB  196,226,125,24,13,173,49,0,0        ; vbroadcastss  0x31ad(%rip),%ymm1        # 6748 <_sk_callback_avx+0x369>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -8095,14 +8127,14 @@ PUBLIC _sk_store_a8_avx
 _sk_store_a8_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,229,48,0,0          ; vbroadcastss  0x30e5(%rip),%ymm8        # 6638 <_sk_callback_avx+0x36d>
+  DB  196,98,125,24,5,136,49,0,0          ; vbroadcastss  0x3188(%rip),%ymm8        # 674c <_sk_callback_avx+0x36d>
   DB  196,65,100,89,192                   ; vmulps        %ymm8,%ymm3,%ymm8
   DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  196,65,57,103,192                   ; vpackuswb     %xmm8,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           357c <_sk_store_a8_avx+0x37>
+  DB  117,10                              ; jne           35ed <_sk_store_a8_avx+0x37>
   DB  196,65,123,17,4,58                  ; vmovsd        %xmm8,(%r10,%rdi,1)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8110,10 +8142,10 @@ _sk_store_a8_avx LABEL PROC
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            3578 <_sk_store_a8_avx+0x33>
+  DB  119,236                             ; ja            35e9 <_sk_store_a8_avx+0x33>
   DB  196,66,121,48,192                   ; vpmovzxbw     %xmm8,%xmm8
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 35e0 <_sk_store_a8_avx+0x9b>
+  DB  76,141,13,67,0,0,0                  ; lea           0x43(%rip),%r9        # 3650 <_sk_store_a8_avx+0x9a>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8124,28 +8156,27 @@ _sk_store_a8_avx LABEL PROC
   DB  196,67,121,20,68,58,2,4             ; vpextrb       $0x4,%xmm8,0x2(%r10,%rdi,1)
   DB  196,67,121,20,68,58,1,2             ; vpextrb       $0x2,%xmm8,0x1(%r10,%rdi,1)
   DB  196,67,121,20,4,58,0                ; vpextrb       $0x0,%xmm8,(%r10,%rdi,1)
-  DB  235,154                             ; jmp           3578 <_sk_store_a8_avx+0x33>
-  DB  102,144                             ; xchg          %ax,%ax
-  DB  245                                 ; cmc
-  DB  255                                 ; (bad)
+  DB  235,154                             ; jmp           35e9 <_sk_store_a8_avx+0x33>
+  DB  144                                 ; nop
+  DB  246,255                             ; idiv          %bh
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  237                                 ; in            (%dx),%eax
+  DB  238                                 ; out           %al,(%dx)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,229                             ; jmpq          *%rbp
+  DB  255,230                             ; jmpq          *%rsi
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  221,255                             ; (bad)
+  DB  222,255                             ; fdivrp        %st,%st(7)
   DB  255                                 ; (bad)
-  DB  255,213                             ; callq         *%rbp
+  DB  255,214                             ; callq         *%rsi
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,205                             ; dec           %ebp
+  DB  255,206                             ; dec           %esi
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,197                             ; inc           %ebp
+  DB  255,198                             ; inc           %esi
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; .byte         0xff
@@ -8157,17 +8188,17 @@ _sk_load_g8_avx LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,1,248                            ; add           %rdi,%rax
   DB  77,133,192                          ; test          %r8,%r8
-  DB  117,67                              ; jne           364f <_sk_load_g8_avx+0x53>
+  DB  117,67                              ; jne           36bf <_sk_load_g8_avx+0x53>
   DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
   DB  196,226,121,49,200                  ; vpmovzxbd     %xmm0,%xmm1
   DB  196,227,121,4,192,229               ; vpermilps     $0xe5,%xmm0,%xmm0
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,9,48,0,0          ; vbroadcastss  0x3009(%rip),%ymm1        # 663c <_sk_callback_avx+0x371>
+  DB  196,226,125,24,13,173,48,0,0        ; vbroadcastss  0x30ad(%rip),%ymm1        # 6750 <_sk_callback_avx+0x371>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,254,47,0,0        ; vbroadcastss  0x2ffe(%rip),%ymm3        # 6640 <_sk_callback_avx+0x375>
+  DB  196,226,125,24,29,162,48,0,0        ; vbroadcastss  0x30a2(%rip),%ymm3        # 6754 <_sk_callback_avx+0x375>
   DB  76,137,193                          ; mov           %r8,%rcx
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
@@ -8181,9 +8212,9 @@ _sk_load_g8_avx LABEL PROC
   DB  77,9,217                            ; or            %r11,%r9
   DB  72,131,193,8                        ; add           $0x8,%rcx
   DB  73,255,202                          ; dec           %r10
-  DB  117,234                             ; jne           3657 <_sk_load_g8_avx+0x5b>
+  DB  117,234                             ; jne           36c7 <_sk_load_g8_avx+0x5b>
   DB  196,193,249,110,193                 ; vmovq         %r9,%xmm0
-  DB  235,156                             ; jmp           3610 <_sk_load_g8_avx+0x14>
+  DB  235,156                             ; jmp           3680 <_sk_load_g8_avx+0x14>
 
 PUBLIC _sk_gather_g8_avx
 _sk_gather_g8_avx LABEL PROC
@@ -8231,10 +8262,10 @@ _sk_gather_g8_avx LABEL PROC
   DB  196,226,121,49,201                  ; vpmovzxbd     %xmm1,%xmm1
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,253,46,0,0        ; vbroadcastss  0x2efd(%rip),%ymm1        # 6644 <_sk_callback_avx+0x379>
+  DB  196,226,125,24,13,161,47,0,0        ; vbroadcastss  0x2fa1(%rip),%ymm1        # 6758 <_sk_callback_avx+0x379>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,242,46,0,0        ; vbroadcastss  0x2ef2(%rip),%ymm3        # 6648 <_sk_callback_avx+0x37d>
+  DB  196,226,125,24,29,150,47,0,0        ; vbroadcastss  0x2f96(%rip),%ymm3        # 675c <_sk_callback_avx+0x37d>
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
   DB  91                                  ; pop           %rbx
@@ -8248,9 +8279,9 @@ _sk_gather_i8_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  73,137,192                          ; mov           %rax,%r8
   DB  77,133,192                          ; test          %r8,%r8
-  DB  116,5                               ; je            3776 <_sk_gather_i8_avx+0xf>
+  DB  116,5                               ; je            37e6 <_sk_gather_i8_avx+0xf>
   DB  76,137,192                          ; mov           %r8,%rax
-  DB  235,2                               ; jmp           3778 <_sk_gather_i8_avx+0x11>
+  DB  235,2                               ; jmp           37e8 <_sk_gather_i8_avx+0x11>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,87                               ; push          %r15
   DB  65,86                               ; push          %r14
@@ -8312,10 +8343,10 @@ _sk_gather_i8_avx LABEL PROC
   DB  196,163,121,34,4,163,2              ; vpinsrd       $0x2,(%rbx,%r12,4),%xmm0,%xmm0
   DB  196,163,121,34,28,19,3              ; vpinsrd       $0x3,(%rbx,%r10,1),%xmm0,%xmm3
   DB  196,227,61,24,195,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  DB  197,124,40,21,254,47,0,0            ; vmovaps       0x2ffe(%rip),%ymm10        # 68a0 <_sk_callback_avx+0x5d5>
+  DB  197,124,40,21,14,48,0,0             ; vmovaps       0x300e(%rip),%ymm10        # 6920 <_sk_callback_avx+0x541>
   DB  196,193,124,84,194                  ; vandps        %ymm10,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,152,45,0,0         ; vbroadcastss  0x2d98(%rip),%ymm9        # 664c <_sk_callback_avx+0x381>
+  DB  196,98,125,24,13,60,46,0,0          ; vbroadcastss  0x2e3c(%rip),%ymm9        # 6760 <_sk_callback_avx+0x381>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
   DB  197,233,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm2
@@ -8347,38 +8378,38 @@ _sk_load_565_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,128,0,0,0                    ; jne           39ac <_sk_load_565_avx+0x8e>
+  DB  15,133,128,0,0,0                    ; jne           3a1c <_sk_load_565_avx+0x8e>
   DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
   DB  197,241,239,201                     ; vpxor         %xmm1,%xmm1,%xmm1
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,209,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  DB  196,226,125,24,5,2,45,0,0           ; vbroadcastss  0x2d02(%rip),%ymm0        # 6650 <_sk_callback_avx+0x385>
+  DB  196,226,125,24,5,166,45,0,0         ; vbroadcastss  0x2da6(%rip),%ymm0        # 6764 <_sk_callback_avx+0x385>
   DB  197,236,84,192                      ; vandps        %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,245,44,0,0        ; vbroadcastss  0x2cf5(%rip),%ymm1        # 6654 <_sk_callback_avx+0x389>
+  DB  196,226,125,24,13,153,45,0,0        ; vbroadcastss  0x2d99(%rip),%ymm1        # 6768 <_sk_callback_avx+0x389>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,236,44,0,0        ; vbroadcastss  0x2cec(%rip),%ymm1        # 6658 <_sk_callback_avx+0x38d>
+  DB  196,226,125,24,13,144,45,0,0        ; vbroadcastss  0x2d90(%rip),%ymm1        # 676c <_sk_callback_avx+0x38d>
   DB  197,236,84,201                      ; vandps        %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,223,44,0,0        ; vbroadcastss  0x2cdf(%rip),%ymm3        # 665c <_sk_callback_avx+0x391>
+  DB  196,226,125,24,29,131,45,0,0        ; vbroadcastss  0x2d83(%rip),%ymm3        # 6770 <_sk_callback_avx+0x391>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,24,29,214,44,0,0        ; vbroadcastss  0x2cd6(%rip),%ymm3        # 6660 <_sk_callback_avx+0x395>
+  DB  196,226,125,24,29,122,45,0,0        ; vbroadcastss  0x2d7a(%rip),%ymm3        # 6774 <_sk_callback_avx+0x395>
   DB  197,236,84,211                      ; vandps        %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,201,44,0,0        ; vbroadcastss  0x2cc9(%rip),%ymm3        # 6664 <_sk_callback_avx+0x399>
+  DB  196,226,125,24,29,109,45,0,0        ; vbroadcastss  0x2d6d(%rip),%ymm3        # 6778 <_sk_callback_avx+0x399>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,190,44,0,0        ; vbroadcastss  0x2cbe(%rip),%ymm3        # 6668 <_sk_callback_avx+0x39d>
+  DB  196,226,125,24,29,98,45,0,0         ; vbroadcastss  0x2d62(%rip),%ymm3        # 677c <_sk_callback_avx+0x39d>
   DB  255,224                             ; jmpq          *%rax
   DB  65,137,200                          ; mov           %ecx,%r8d
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,110,255,255,255              ; ja            3932 <_sk_load_565_avx+0x14>
+  DB  15,135,110,255,255,255              ; ja            39a2 <_sk_load_565_avx+0x14>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 3a18 <_sk_load_565_avx+0xfa>
+  DB  76,141,13,73,0,0,0                  ; lea           0x49(%rip),%r9        # 3a88 <_sk_load_565_avx+0xfa>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8390,7 +8421,7 @@ _sk_load_565_avx LABEL PROC
   DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  DB  233,26,255,255,255                  ; jmpq          3932 <_sk_load_565_avx+0x14>
+  DB  233,26,255,255,255                  ; jmpq          39a2 <_sk_load_565_avx+0x14>
   DB  244                                 ; hlt
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
@@ -8466,23 +8497,23 @@ _sk_gather_565_avx LABEL PROC
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,209,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  DB  196,226,125,24,5,94,43,0,0          ; vbroadcastss  0x2b5e(%rip),%ymm0        # 666c <_sk_callback_avx+0x3a1>
+  DB  196,226,125,24,5,2,44,0,0           ; vbroadcastss  0x2c02(%rip),%ymm0        # 6780 <_sk_callback_avx+0x3a1>
   DB  197,236,84,192                      ; vandps        %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,81,43,0,0         ; vbroadcastss  0x2b51(%rip),%ymm1        # 6670 <_sk_callback_avx+0x3a5>
+  DB  196,226,125,24,13,245,43,0,0        ; vbroadcastss  0x2bf5(%rip),%ymm1        # 6784 <_sk_callback_avx+0x3a5>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,72,43,0,0         ; vbroadcastss  0x2b48(%rip),%ymm1        # 6674 <_sk_callback_avx+0x3a9>
+  DB  196,226,125,24,13,236,43,0,0        ; vbroadcastss  0x2bec(%rip),%ymm1        # 6788 <_sk_callback_avx+0x3a9>
   DB  197,236,84,201                      ; vandps        %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,59,43,0,0         ; vbroadcastss  0x2b3b(%rip),%ymm3        # 6678 <_sk_callback_avx+0x3ad>
+  DB  196,226,125,24,29,223,43,0,0        ; vbroadcastss  0x2bdf(%rip),%ymm3        # 678c <_sk_callback_avx+0x3ad>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,24,29,50,43,0,0         ; vbroadcastss  0x2b32(%rip),%ymm3        # 667c <_sk_callback_avx+0x3b1>
+  DB  196,226,125,24,29,214,43,0,0        ; vbroadcastss  0x2bd6(%rip),%ymm3        # 6790 <_sk_callback_avx+0x3b1>
   DB  197,236,84,211                      ; vandps        %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,37,43,0,0         ; vbroadcastss  0x2b25(%rip),%ymm3        # 6680 <_sk_callback_avx+0x3b5>
+  DB  196,226,125,24,29,201,43,0,0        ; vbroadcastss  0x2bc9(%rip),%ymm3        # 6794 <_sk_callback_avx+0x3b5>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,26,43,0,0         ; vbroadcastss  0x2b1a(%rip),%ymm3        # 6684 <_sk_callback_avx+0x3b9>
+  DB  196,226,125,24,29,190,43,0,0        ; vbroadcastss  0x2bbe(%rip),%ymm3        # 6798 <_sk_callback_avx+0x3b9>
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,94                               ; pop           %r14
@@ -8494,14 +8525,14 @@ PUBLIC _sk_store_565_avx
 _sk_store_565_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,6,43,0,0            ; vbroadcastss  0x2b06(%rip),%ymm8        # 6688 <_sk_callback_avx+0x3bd>
+  DB  196,98,125,24,5,170,43,0,0          ; vbroadcastss  0x2baa(%rip),%ymm8        # 679c <_sk_callback_avx+0x3bd>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,41,114,241,11               ; vpslld        $0xb,%xmm9,%xmm10
   DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
   DB  196,193,49,114,241,11               ; vpslld        $0xb,%xmm9,%xmm9
   DB  196,67,45,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
-  DB  196,98,125,24,21,223,42,0,0         ; vbroadcastss  0x2adf(%rip),%ymm10        # 668c <_sk_callback_avx+0x3c1>
+  DB  196,98,125,24,21,131,43,0,0         ; vbroadcastss  0x2b83(%rip),%ymm10        # 67a0 <_sk_callback_avx+0x3c1>
   DB  196,65,116,89,210                   ; vmulps        %ymm10,%ymm1,%ymm10
   DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
   DB  196,193,33,114,242,5                ; vpslld        $0x5,%xmm10,%xmm11
@@ -8515,7 +8546,7 @@ _sk_store_565_avx LABEL PROC
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           3bfd <_sk_store_565_avx+0x89>
+  DB  117,10                              ; jne           3c6d <_sk_store_565_avx+0x89>
   DB  196,65,122,127,4,122                ; vmovdqu       %xmm8,(%r10,%rdi,2)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8523,9 +8554,9 @@ _sk_store_565_avx LABEL PROC
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            3bf9 <_sk_store_565_avx+0x85>
+  DB  119,236                             ; ja            3c69 <_sk_store_565_avx+0x85>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 3c5c <_sk_store_565_avx+0xe8>
+  DB  76,141,13,68,0,0,0                  ; lea           0x44(%rip),%r9        # 3ccc <_sk_store_565_avx+0xe8>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8536,7 +8567,7 @@ _sk_store_565_avx LABEL PROC
   DB  196,67,121,21,68,122,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   DB  196,67,121,21,68,122,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   DB  196,67,121,21,4,122,0               ; vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  DB  235,159                             ; jmp           3bf9 <_sk_store_565_avx+0x85>
+  DB  235,159                             ; jmp           3c69 <_sk_store_565_avx+0x85>
   DB  102,144                             ; xchg          %ax,%ax
   DB  245                                 ; cmc
   DB  255                                 ; (bad)
@@ -8567,31 +8598,31 @@ _sk_load_4444_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,152,0,0,0                    ; jne           3d1e <_sk_load_4444_avx+0xa6>
+  DB  15,133,152,0,0,0                    ; jne           3d8e <_sk_load_4444_avx+0xa6>
   DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
   DB  197,241,239,201                     ; vpxor         %xmm1,%xmm1,%xmm1
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,217,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  DB  196,226,125,24,5,232,41,0,0         ; vbroadcastss  0x29e8(%rip),%ymm0        # 6690 <_sk_callback_avx+0x3c5>
+  DB  196,226,125,24,5,140,42,0,0         ; vbroadcastss  0x2a8c(%rip),%ymm0        # 67a4 <_sk_callback_avx+0x3c5>
   DB  197,228,84,192                      ; vandps        %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,219,41,0,0        ; vbroadcastss  0x29db(%rip),%ymm1        # 6694 <_sk_callback_avx+0x3c9>
+  DB  196,226,125,24,13,127,42,0,0        ; vbroadcastss  0x2a7f(%rip),%ymm1        # 67a8 <_sk_callback_avx+0x3c9>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,210,41,0,0        ; vbroadcastss  0x29d2(%rip),%ymm1        # 6698 <_sk_callback_avx+0x3cd>
+  DB  196,226,125,24,13,118,42,0,0        ; vbroadcastss  0x2a76(%rip),%ymm1        # 67ac <_sk_callback_avx+0x3cd>
   DB  197,228,84,201                      ; vandps        %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,197,41,0,0        ; vbroadcastss  0x29c5(%rip),%ymm2        # 669c <_sk_callback_avx+0x3d1>
+  DB  196,226,125,24,21,105,42,0,0        ; vbroadcastss  0x2a69(%rip),%ymm2        # 67b0 <_sk_callback_avx+0x3d1>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,188,41,0,0        ; vbroadcastss  0x29bc(%rip),%ymm2        # 66a0 <_sk_callback_avx+0x3d5>
+  DB  196,226,125,24,21,96,42,0,0         ; vbroadcastss  0x2a60(%rip),%ymm2        # 67b4 <_sk_callback_avx+0x3d5>
   DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,175,41,0,0          ; vbroadcastss  0x29af(%rip),%ymm8        # 66a4 <_sk_callback_avx+0x3d9>
+  DB  196,98,125,24,5,83,42,0,0           ; vbroadcastss  0x2a53(%rip),%ymm8        # 67b8 <_sk_callback_avx+0x3d9>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,165,41,0,0          ; vbroadcastss  0x29a5(%rip),%ymm8        # 66a8 <_sk_callback_avx+0x3dd>
+  DB  196,98,125,24,5,73,42,0,0           ; vbroadcastss  0x2a49(%rip),%ymm8        # 67bc <_sk_callback_avx+0x3dd>
   DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,151,41,0,0          ; vbroadcastss  0x2997(%rip),%ymm8        # 66ac <_sk_callback_avx+0x3e1>
+  DB  196,98,125,24,5,59,42,0,0           ; vbroadcastss  0x2a3b(%rip),%ymm8        # 67c0 <_sk_callback_avx+0x3e1>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8600,9 +8631,9 @@ _sk_load_4444_avx LABEL PROC
   DB  197,249,239,192                     ; vpxor         %xmm0,%xmm0,%xmm0
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  15,135,86,255,255,255               ; ja            3c8c <_sk_load_4444_avx+0x14>
+  DB  15,135,86,255,255,255               ; ja            3cfc <_sk_load_4444_avx+0x14>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,75,0,0,0                  ; lea           0x4b(%rip),%r9        # 3d8c <_sk_load_4444_avx+0x114>
+  DB  76,141,13,75,0,0,0                  ; lea           0x4b(%rip),%r9        # 3dfc <_sk_load_4444_avx+0x114>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8614,7 +8645,7 @@ _sk_load_4444_avx LABEL PROC
   DB  196,193,121,196,68,122,4,2          ; vpinsrw       $0x2,0x4(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,68,122,2,1          ; vpinsrw       $0x1,0x2(%r10,%rdi,2),%xmm0,%xmm0
   DB  196,193,121,196,4,122,0             ; vpinsrw       $0x0,(%r10,%rdi,2),%xmm0,%xmm0
-  DB  233,2,255,255,255                   ; jmpq          3c8c <_sk_load_4444_avx+0x14>
+  DB  233,2,255,255,255                   ; jmpq          3cfc <_sk_load_4444_avx+0x14>
   DB  102,144                             ; xchg          %ax,%ax
   DB  242,255                             ; repnz         (bad)
   DB  255                                 ; (bad)
@@ -8691,25 +8722,25 @@ _sk_gather_4444_avx LABEL PROC
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,217,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  DB  196,226,125,24,5,46,40,0,0          ; vbroadcastss  0x282e(%rip),%ymm0        # 66b0 <_sk_callback_avx+0x3e5>
+  DB  196,226,125,24,5,210,40,0,0         ; vbroadcastss  0x28d2(%rip),%ymm0        # 67c4 <_sk_callback_avx+0x3e5>
   DB  197,228,84,192                      ; vandps        %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,33,40,0,0         ; vbroadcastss  0x2821(%rip),%ymm1        # 66b4 <_sk_callback_avx+0x3e9>
+  DB  196,226,125,24,13,197,40,0,0        ; vbroadcastss  0x28c5(%rip),%ymm1        # 67c8 <_sk_callback_avx+0x3e9>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,24,40,0,0         ; vbroadcastss  0x2818(%rip),%ymm1        # 66b8 <_sk_callback_avx+0x3ed>
+  DB  196,226,125,24,13,188,40,0,0        ; vbroadcastss  0x28bc(%rip),%ymm1        # 67cc <_sk_callback_avx+0x3ed>
   DB  197,228,84,201                      ; vandps        %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,11,40,0,0         ; vbroadcastss  0x280b(%rip),%ymm2        # 66bc <_sk_callback_avx+0x3f1>
+  DB  196,226,125,24,21,175,40,0,0        ; vbroadcastss  0x28af(%rip),%ymm2        # 67d0 <_sk_callback_avx+0x3f1>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,2,40,0,0          ; vbroadcastss  0x2802(%rip),%ymm2        # 66c0 <_sk_callback_avx+0x3f5>
+  DB  196,226,125,24,21,166,40,0,0        ; vbroadcastss  0x28a6(%rip),%ymm2        # 67d4 <_sk_callback_avx+0x3f5>
   DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,245,39,0,0          ; vbroadcastss  0x27f5(%rip),%ymm8        # 66c4 <_sk_callback_avx+0x3f9>
+  DB  196,98,125,24,5,153,40,0,0          ; vbroadcastss  0x2899(%rip),%ymm8        # 67d8 <_sk_callback_avx+0x3f9>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,235,39,0,0          ; vbroadcastss  0x27eb(%rip),%ymm8        # 66c8 <_sk_callback_avx+0x3fd>
+  DB  196,98,125,24,5,143,40,0,0          ; vbroadcastss  0x288f(%rip),%ymm8        # 67dc <_sk_callback_avx+0x3fd>
   DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,221,39,0,0          ; vbroadcastss  0x27dd(%rip),%ymm8        # 66cc <_sk_callback_avx+0x401>
+  DB  196,98,125,24,5,129,40,0,0          ; vbroadcastss  0x2881(%rip),%ymm8        # 67e0 <_sk_callback_avx+0x401>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -8723,7 +8754,7 @@ PUBLIC _sk_store_4444_avx
 _sk_store_4444_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,194,39,0,0          ; vbroadcastss  0x27c2(%rip),%ymm8        # 66d0 <_sk_callback_avx+0x405>
+  DB  196,98,125,24,5,102,40,0,0          ; vbroadcastss  0x2866(%rip),%ymm8        # 67e4 <_sk_callback_avx+0x405>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,41,114,241,12               ; vpslld        $0xc,%xmm9,%xmm10
@@ -8750,7 +8781,7 @@ _sk_store_4444_avx LABEL PROC
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
   DB  196,66,57,43,193                    ; vpackusdw     %xmm9,%xmm8,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,10                              ; jne           3fa7 <_sk_store_4444_avx+0xa7>
+  DB  117,10                              ; jne           4017 <_sk_store_4444_avx+0xa7>
   DB  196,65,122,127,4,122                ; vmovdqu       %xmm8,(%r10,%rdi,2)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8758,9 +8789,9 @@ _sk_store_4444_avx LABEL PROC
   DB  65,128,224,7                        ; and           $0x7,%r8b
   DB  65,254,200                          ; dec           %r8b
   DB  65,128,248,6                        ; cmp           $0x6,%r8b
-  DB  119,236                             ; ja            3fa3 <_sk_store_4444_avx+0xa3>
+  DB  119,236                             ; ja            4013 <_sk_store_4444_avx+0xa3>
   DB  69,15,182,192                       ; movzbl        %r8b,%r8d
-  DB  76,141,13,66,0,0,0                  ; lea           0x42(%rip),%r9        # 4004 <_sk_store_4444_avx+0x104>
+  DB  76,141,13,66,0,0,0                  ; lea           0x42(%rip),%r9        # 4074 <_sk_store_4444_avx+0x104>
   DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
   DB  76,1,200                            ; add           %r9,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8771,7 +8802,7 @@ _sk_store_4444_avx LABEL PROC
   DB  196,67,121,21,68,122,4,2            ; vpextrw       $0x2,%xmm8,0x4(%r10,%rdi,2)
   DB  196,67,121,21,68,122,2,1            ; vpextrw       $0x1,%xmm8,0x2(%r10,%rdi,2)
   DB  196,67,121,21,4,122,0               ; vpextrw       $0x0,%xmm8,(%r10,%rdi,2)
-  DB  235,159                             ; jmp           3fa3 <_sk_store_4444_avx+0xa3>
+  DB  235,159                             ; jmp           4013 <_sk_store_4444_avx+0xa3>
   DB  247,255                             ; idiv          %edi
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
@@ -8797,55 +8828,87 @@ _sk_store_4444_avx LABEL PROC
 
 PUBLIC _sk_load_8888_avx
 _sk_load_8888_avx LABEL PROC
-  DB  80                                  ; push          %rax
-  DB  73,137,200                          ; mov           %rcx,%r8
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
-  DB  76,3,8                              ; add           (%rax),%r9
-  DB  77,133,192                          ; test          %r8,%r8
-  DB  15,133,139,0,0,0                    ; jne           40c5 <_sk_load_8888_avx+0xa5>
-  DB  196,193,124,16,25                   ; vmovups       (%r9),%ymm3
-  DB  197,124,40,21,121,40,0,0            ; vmovaps       0x2879(%rip),%ymm10        # 68c0 <_sk_callback_avx+0x5f5>
-  DB  196,193,100,84,194                  ; vandps        %ymm10,%ymm3,%ymm0
+  DB  76,139,16                           ; mov           (%rax),%r10
+  DB  72,133,201                          ; test          %rcx,%rcx
+  DB  15,133,135,0,0,0                    ; jne           4125 <_sk_load_8888_avx+0x95>
+  DB  196,65,124,16,12,186                ; vmovups       (%r10,%rdi,4),%ymm9
+  DB  197,124,40,21,148,40,0,0            ; vmovaps       0x2894(%rip),%ymm10        # 6940 <_sk_callback_avx+0x561>
+  DB  196,193,52,84,194                   ; vandps        %ymm10,%ymm9,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,5,123,38,0,0          ; vbroadcastss  0x267b(%rip),%ymm8        # 66d4 <_sk_callback_avx+0x409>
+  DB  196,98,125,24,5,42,39,0,0           ; vbroadcastss  0x272a(%rip),%ymm8        # 67e8 <_sk_callback_avx+0x409>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  197,241,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm1
-  DB  196,195,125,25,217,1                ; vextractf128  $0x1,%ymm3,%xmm9
-  DB  196,193,105,114,209,8               ; vpsrld        $0x8,%xmm9,%xmm2
+  DB  196,193,113,114,209,8               ; vpsrld        $0x8,%xmm9,%xmm1
+  DB  196,99,125,25,203,1                 ; vextractf128  $0x1,%ymm9,%xmm3
+  DB  197,233,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm2
   DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   DB  196,193,116,84,202                  ; vandps        %ymm10,%ymm1,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  197,161,114,211,16                  ; vpsrld        $0x10,%xmm3,%xmm11
-  DB  196,193,105,114,209,16              ; vpsrld        $0x10,%xmm9,%xmm2
+  DB  196,193,33,114,209,16               ; vpsrld        $0x10,%xmm9,%xmm11
+  DB  197,233,114,211,16                  ; vpsrld        $0x10,%xmm3,%xmm2
   DB  196,227,37,24,210,1                 ; vinsertf128   $0x1,%xmm2,%ymm11,%ymm2
   DB  196,193,108,84,210                  ; vandps        %ymm10,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  197,169,114,211,24                  ; vpsrld        $0x18,%xmm3,%xmm10
-  DB  196,193,97,114,209,24               ; vpsrld        $0x18,%xmm9,%xmm3
-  DB  196,227,45,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm10,%ymm3
+  DB  196,193,49,114,209,24               ; vpsrld        $0x18,%xmm9,%xmm9
+  DB  197,225,114,211,24                  ; vpsrld        $0x18,%xmm3,%xmm3
+  DB  196,227,53,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm9,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  76,137,193                          ; mov           %r8,%rcx
-  DB  65,88                               ; pop           %r8
   DB  255,224                             ; jmpq          *%rax
-  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
-  DB  68,41,193                           ; sub           %r8d,%ecx
-  DB  192,225,3                           ; shl           $0x3,%cl
-  DB  72,199,192,255,255,255,255          ; mov           $0xffffffffffffffff,%rax
-  DB  72,211,232                          ; shr           %cl,%rax
-  DB  196,225,249,110,192                 ; vmovq         %rax,%xmm0
-  DB  196,226,121,48,192                  ; vpmovzxbw     %xmm0,%xmm0
-  DB  196,226,121,0,13,67,39,0,0          ; vpshufb       0x2743(%rip),%xmm0,%xmm1        # 6830 <_sk_callback_avx+0x565>
-  DB  196,226,121,33,201                  ; vpmovsxbd     %xmm1,%xmm1
-  DB  196,226,121,0,5,69,39,0,0           ; vpshufb       0x2745(%rip),%xmm0,%xmm0        # 6840 <_sk_callback_avx+0x575>
-  DB  196,226,121,33,192                  ; vpmovsxbd     %xmm0,%xmm0
-  DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
-  DB  196,194,125,44,25                   ; vmaskmovps    (%r9),%ymm0,%ymm3
-  DB  233,47,255,255,255                  ; jmpq          403f <_sk_load_8888_avx+0x1f>
+  DB  65,137,200                          ; mov           %ecx,%r8d
+  DB  65,128,224,7                        ; and           $0x7,%r8b
+  DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
+  DB  65,254,200                          ; dec           %r8b
+  DB  65,128,248,6                        ; cmp           $0x6,%r8b
+  DB  15,135,102,255,255,255              ; ja            40a4 <_sk_load_8888_avx+0x14>
+  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
+  DB  76,141,13,139,0,0,0                 ; lea           0x8b(%rip),%r9        # 41d4 <_sk_load_8888_avx+0x144>
+  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
+  DB  76,1,200                            ; add           %r9,%rax
+  DB  255,224                             ; jmpq          *%rax
+  DB  196,193,121,110,68,186,24           ; vmovd         0x18(%r10,%rdi,4),%xmm0
+  DB  197,249,112,192,68                  ; vpshufd       $0x44,%xmm0,%xmm0
+  DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
+  DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
+  DB  196,99,117,12,200,64                ; vblendps      $0x40,%ymm0,%ymm1,%ymm9
+  DB  196,99,125,25,200,1                 ; vextractf128  $0x1,%ymm9,%xmm0
+  DB  196,195,121,34,68,186,20,1          ; vpinsrd       $0x1,0x14(%r10,%rdi,4),%xmm0,%xmm0
+  DB  196,99,53,24,200,1                  ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
+  DB  196,99,125,25,200,1                 ; vextractf128  $0x1,%ymm9,%xmm0
+  DB  196,195,121,34,68,186,16,0          ; vpinsrd       $0x0,0x10(%r10,%rdi,4),%xmm0,%xmm0
+  DB  196,99,53,24,200,1                  ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm9
+  DB  196,195,49,34,68,186,12,3           ; vpinsrd       $0x3,0xc(%r10,%rdi,4),%xmm9,%xmm0
+  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
+  DB  196,195,49,34,68,186,8,2            ; vpinsrd       $0x2,0x8(%r10,%rdi,4),%xmm9,%xmm0
+  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
+  DB  196,195,49,34,68,186,4,1            ; vpinsrd       $0x1,0x4(%r10,%rdi,4),%xmm9,%xmm0
+  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
+  DB  196,195,49,34,4,186,0               ; vpinsrd       $0x0,(%r10,%rdi,4),%xmm9,%xmm0
+  DB  196,99,53,12,200,15                 ; vblendps      $0xf,%ymm0,%ymm9,%ymm9
+  DB  233,210,254,255,255                 ; jmpq          40a4 <_sk_load_8888_avx+0x14>
+  DB  102,144                             ; xchg          %ax,%ax
+  DB  236                                 ; in            (%dx),%al
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  222,255                             ; fdivrp        %st,%st(7)
+  DB  255                                 ; (bad)
+  DB  255,208                             ; callq         *%rax
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255,194                             ; inc           %edx
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255,174,255,255,255,154             ; ljmp          *-0x65000001(%rsi)
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  126,255                             ; jle           41ed <_sk_load_8888_avx+0x15d>
+  DB  255                                 ; (bad)
+  DB  255                                 ; .byte         0xff
 
 PUBLIC _sk_gather_8888_avx
 _sk_gather_8888_avx LABEL PROC
@@ -8886,10 +8949,10 @@ _sk_gather_8888_avx LABEL PROC
   DB  196,131,121,34,4,152,2              ; vpinsrd       $0x2,(%r8,%r11,4),%xmm0,%xmm0
   DB  196,131,121,34,28,144,3             ; vpinsrd       $0x3,(%r8,%r10,4),%xmm0,%xmm3
   DB  196,227,61,24,195,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  DB  197,124,40,21,30,39,0,0             ; vmovaps       0x271e(%rip),%ymm10        # 68e0 <_sk_callback_avx+0x615>
+  DB  197,124,40,21,190,38,0,0            ; vmovaps       0x26be(%rip),%ymm10        # 6960 <_sk_callback_avx+0x581>
   DB  196,193,124,84,194                  ; vandps        %ymm10,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,4,37,0,0           ; vbroadcastss  0x2504(%rip),%ymm9        # 66d8 <_sk_callback_avx+0x40d>
+  DB  196,98,125,24,13,56,37,0,0          ; vbroadcastss  0x2538(%rip),%ymm9        # 67ec <_sk_callback_avx+0x40d>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
   DB  197,233,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm2
@@ -8917,12 +8980,9 @@ _sk_gather_8888_avx LABEL PROC
 
 PUBLIC _sk_store_8888_avx
 _sk_store_8888_avx LABEL PROC
-  DB  80                                  ; push          %rax
-  DB  73,137,200                          ; mov           %rcx,%r8
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
-  DB  76,3,8                              ; add           (%rax),%r9
-  DB  196,98,125,24,5,134,36,0,0          ; vbroadcastss  0x2486(%rip),%ymm8        # 66dc <_sk_callback_avx+0x411>
+  DB  76,139,16                           ; mov           (%rax),%r10
+  DB  196,98,125,24,5,198,36,0,0          ; vbroadcastss  0x24c6(%rip),%ymm8        # 67f0 <_sk_callback_avx+0x411>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,65,116,89,208                   ; vmulps        %ymm8,%ymm1,%ymm10
@@ -8946,27 +9006,56 @@ _sk_store_8888_avx LABEL PROC
   DB  196,67,37,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm11,%ymm8
   DB  196,65,45,86,192                    ; vorpd         %ymm8,%ymm10,%ymm8
   DB  196,65,53,86,192                    ; vorpd         %ymm8,%ymm9,%ymm8
-  DB  77,133,192                          ; test          %r8,%r8
-  DB  117,14                              ; jne           42e8 <_sk_store_8888_avx+0xac>
-  DB  196,65,124,17,1                     ; vmovups       %ymm8,(%r9)
+  DB  72,133,201                          ; test          %rcx,%rcx
+  DB  117,10                              ; jne           43b8 <_sk_store_8888_avx+0x9c>
+  DB  196,65,124,17,4,186                 ; vmovups       %ymm8,(%r10,%rdi,4)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  76,137,193                          ; mov           %r8,%rcx
-  DB  65,88                               ; pop           %r8
   DB  255,224                             ; jmpq          *%rax
-  DB  185,8,0,0,0                         ; mov           $0x8,%ecx
-  DB  68,41,193                           ; sub           %r8d,%ecx
-  DB  192,225,3                           ; shl           $0x3,%cl
-  DB  72,199,192,255,255,255,255          ; mov           $0xffffffffffffffff,%rax
-  DB  72,211,232                          ; shr           %cl,%rax
-  DB  196,97,249,110,200                  ; vmovq         %rax,%xmm9
-  DB  196,66,121,48,201                   ; vpmovzxbw     %xmm9,%xmm9
-  DB  196,98,49,0,21,64,37,0,0            ; vpshufb       0x2540(%rip),%xmm9,%xmm10        # 6850 <_sk_callback_avx+0x585>
-  DB  196,66,121,33,210                   ; vpmovsxbd     %xmm10,%xmm10
-  DB  196,98,49,0,13,66,37,0,0            ; vpshufb       0x2542(%rip),%xmm9,%xmm9        # 6860 <_sk_callback_avx+0x595>
-  DB  196,66,121,33,201                   ; vpmovsxbd     %xmm9,%xmm9
-  DB  196,67,45,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
-  DB  196,66,53,46,1                      ; vmaskmovps    %ymm8,%ymm9,(%r9)
-  DB  235,175                             ; jmp           42df <_sk_store_8888_avx+0xa3>
+  DB  65,137,200                          ; mov           %ecx,%r8d
+  DB  65,128,224,7                        ; and           $0x7,%r8b
+  DB  65,254,200                          ; dec           %r8b
+  DB  65,128,248,6                        ; cmp           $0x6,%r8b
+  DB  119,236                             ; ja            43b4 <_sk_store_8888_avx+0x98>
+  DB  69,15,182,192                       ; movzbl        %r8b,%r8d
+  DB  76,141,13,85,0,0,0                  ; lea           0x55(%rip),%r9        # 4428 <_sk_store_8888_avx+0x10c>
+  DB  75,99,4,129                         ; movslq        (%r9,%r8,4),%rax
+  DB  76,1,200                            ; add           %r9,%rax
+  DB  255,224                             ; jmpq          *%rax
+  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
+  DB  196,67,121,22,76,186,24,2           ; vpextrd       $0x2,%xmm9,0x18(%r10,%rdi,4)
+  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
+  DB  196,67,121,22,76,186,20,1           ; vpextrd       $0x1,%xmm9,0x14(%r10,%rdi,4)
+  DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
+  DB  196,65,122,17,76,186,16             ; vmovss        %xmm9,0x10(%r10,%rdi,4)
+  DB  196,67,121,22,68,186,12,3           ; vpextrd       $0x3,%xmm8,0xc(%r10,%rdi,4)
+  DB  196,67,121,22,68,186,8,2            ; vpextrd       $0x2,%xmm8,0x8(%r10,%rdi,4)
+  DB  196,67,121,22,68,186,4,1            ; vpextrd       $0x1,%xmm8,0x4(%r10,%rdi,4)
+  DB  196,65,121,126,4,186                ; vmovd         %xmm8,(%r10,%rdi,4)
+  DB  235,143                             ; jmp           43b4 <_sk_store_8888_avx+0x98>
+  DB  15,31,0                             ; nopl          (%rax)
+  DB  245                                 ; cmc
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  237                                 ; in            (%dx),%eax
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255,229                             ; jmpq          *%rbp
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  221,255                             ; (bad)
+  DB  255                                 ; (bad)
+  DB  255,208                             ; callq         *%rax
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255,194                             ; inc           %edx
+  DB  255                                 ; (bad)
+  DB  255                                 ; (bad)
+  DB  255                                 ; .byte         0xff
+  DB  180,255                             ; mov           $0xff,%ah
+  DB  255                                 ; (bad)
+  DB  255                                 ; .byte         0xff
 
 PUBLIC _sk_load_f16_avx
 _sk_load_f16_avx LABEL PROC
@@ -8978,7 +9067,7 @@ _sk_load_f16_avx LABEL PROC
   DB  197,252,17,116,36,64                ; vmovups       %ymm6,0x40(%rsp)
   DB  197,252,17,108,36,32                ; vmovups       %ymm5,0x20(%rsp)
   DB  197,254,127,36,36                   ; vmovdqu       %ymm4,(%rsp)
-  DB  15,133,143,2,0,0                    ; jne           45eb <_sk_load_f16_avx+0x2bb>
+  DB  15,133,143,2,0,0                    ; jne           46ff <_sk_load_f16_avx+0x2bb>
   DB  197,121,16,4,248                    ; vmovupd       (%rax,%rdi,8),%xmm8
   DB  197,249,16,84,248,16                ; vmovupd       0x10(%rax,%rdi,8),%xmm2
   DB  197,249,16,76,248,32                ; vmovupd       0x20(%rax,%rdi,8),%xmm1
@@ -8996,13 +9085,13 @@ _sk_load_f16_avx LABEL PROC
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
-  DB  196,98,125,24,37,43,35,0,0          ; vbroadcastss  0x232b(%rip),%ymm12        # 66e0 <_sk_callback_avx+0x415>
+  DB  196,98,125,24,37,43,35,0,0          ; vbroadcastss  0x232b(%rip),%ymm12        # 67f4 <_sk_callback_avx+0x415>
   DB  196,193,124,84,204                  ; vandps        %ymm12,%ymm0,%ymm1
   DB  197,252,87,193                      ; vxorps        %ymm1,%ymm0,%ymm0
   DB  196,195,125,25,198,1                ; vextractf128  $0x1,%ymm0,%xmm14
-  DB  196,98,121,24,29,23,35,0,0          ; vbroadcastss  0x2317(%rip),%xmm11        # 66e4 <_sk_callback_avx+0x419>
+  DB  196,98,121,24,29,23,35,0,0          ; vbroadcastss  0x2317(%rip),%xmm11        # 67f8 <_sk_callback_avx+0x419>
   DB  196,193,8,87,219                    ; vxorps        %xmm11,%xmm14,%xmm3
-  DB  196,98,121,24,45,13,35,0,0          ; vbroadcastss  0x230d(%rip),%xmm13        # 66e8 <_sk_callback_avx+0x41d>
+  DB  196,98,121,24,45,13,35,0,0          ; vbroadcastss  0x230d(%rip),%xmm13        # 67fc <_sk_callback_avx+0x41d>
   DB  197,145,102,219                     ; vpcmpgtd      %xmm3,%xmm13,%xmm3
   DB  196,65,120,87,211                   ; vxorps        %xmm11,%xmm0,%xmm10
   DB  196,65,17,102,210                   ; vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -9016,7 +9105,7 @@ _sk_load_f16_avx LABEL PROC
   DB  196,227,125,24,195,1                ; vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   DB  197,252,86,193                      ; vorps         %ymm1,%ymm0,%ymm0
   DB  196,227,125,25,193,1                ; vextractf128  $0x1,%ymm0,%xmm1
-  DB  196,226,121,24,29,195,34,0,0        ; vbroadcastss  0x22c3(%rip),%xmm3        # 66ec <_sk_callback_avx+0x421>
+  DB  196,226,121,24,29,195,34,0,0        ; vbroadcastss  0x22c3(%rip),%xmm3        # 6800 <_sk_callback_avx+0x421>
   DB  197,241,254,203                     ; vpaddd        %xmm3,%xmm1,%xmm1
   DB  197,249,254,195                     ; vpaddd        %xmm3,%xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
@@ -9109,29 +9198,29 @@ _sk_load_f16_avx LABEL PROC
   DB  197,123,16,4,248                    ; vmovsd        (%rax,%rdi,8),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,79                              ; je            464a <_sk_load_f16_avx+0x31a>
+  DB  116,79                              ; je            475e <_sk_load_f16_avx+0x31a>
   DB  197,57,22,68,248,8                  ; vmovhpd       0x8(%rax,%rdi,8),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,67                              ; jb            464a <_sk_load_f16_avx+0x31a>
+  DB  114,67                              ; jb            475e <_sk_load_f16_avx+0x31a>
   DB  197,251,16,84,248,16                ; vmovsd        0x10(%rax,%rdi,8),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,68                              ; je            4657 <_sk_load_f16_avx+0x327>
+  DB  116,68                              ; je            476b <_sk_load_f16_avx+0x327>
   DB  197,233,22,84,248,24                ; vmovhpd       0x18(%rax,%rdi,8),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,56                              ; jb            4657 <_sk_load_f16_avx+0x327>
+  DB  114,56                              ; jb            476b <_sk_load_f16_avx+0x327>
   DB  197,251,16,76,248,32                ; vmovsd        0x20(%rax,%rdi,8),%xmm1
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,68,253,255,255               ; je            4373 <_sk_load_f16_avx+0x43>
+  DB  15,132,68,253,255,255               ; je            4487 <_sk_load_f16_avx+0x43>
   DB  197,241,22,76,248,40                ; vmovhpd       0x28(%rax,%rdi,8),%xmm1,%xmm1
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,52,253,255,255               ; jb            4373 <_sk_load_f16_avx+0x43>
+  DB  15,130,52,253,255,255               ; jb            4487 <_sk_load_f16_avx+0x43>
   DB  197,122,126,76,248,48               ; vmovq         0x30(%rax,%rdi,8),%xmm9
-  DB  233,41,253,255,255                  ; jmpq          4373 <_sk_load_f16_avx+0x43>
+  DB  233,41,253,255,255                  ; jmpq          4487 <_sk_load_f16_avx+0x43>
   DB  197,241,87,201                      ; vxorpd        %xmm1,%xmm1,%xmm1
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,28,253,255,255                  ; jmpq          4373 <_sk_load_f16_avx+0x43>
+  DB  233,28,253,255,255                  ; jmpq          4487 <_sk_load_f16_avx+0x43>
   DB  197,241,87,201                      ; vxorpd        %xmm1,%xmm1,%xmm1
-  DB  233,19,253,255,255                  ; jmpq          4373 <_sk_load_f16_avx+0x43>
+  DB  233,19,253,255,255                  ; jmpq          4487 <_sk_load_f16_avx+0x43>
 
 PUBLIC _sk_gather_f16_avx
 _sk_gather_f16_avx LABEL PROC
@@ -9193,13 +9282,13 @@ _sk_gather_f16_avx LABEL PROC
   DB  197,249,105,210                     ; vpunpckhwd    %xmm2,%xmm0,%xmm2
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,194,1                ; vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
-  DB  196,98,125,24,37,131,31,0,0         ; vbroadcastss  0x1f83(%rip),%ymm12        # 66f0 <_sk_callback_avx+0x425>
+  DB  196,98,125,24,37,131,31,0,0         ; vbroadcastss  0x1f83(%rip),%ymm12        # 6804 <_sk_callback_avx+0x425>
   DB  196,193,124,84,212                  ; vandps        %ymm12,%ymm0,%ymm2
   DB  197,252,87,194                      ; vxorps        %ymm2,%ymm0,%ymm0
   DB  196,195,125,25,198,1                ; vextractf128  $0x1,%ymm0,%xmm14
-  DB  196,98,121,24,29,111,31,0,0         ; vbroadcastss  0x1f6f(%rip),%xmm11        # 66f4 <_sk_callback_avx+0x429>
+  DB  196,98,121,24,29,111,31,0,0         ; vbroadcastss  0x1f6f(%rip),%xmm11        # 6808 <_sk_callback_avx+0x429>
   DB  196,193,8,87,219                    ; vxorps        %xmm11,%xmm14,%xmm3
-  DB  196,98,121,24,45,101,31,0,0         ; vbroadcastss  0x1f65(%rip),%xmm13        # 66f8 <_sk_callback_avx+0x42d>
+  DB  196,98,121,24,45,101,31,0,0         ; vbroadcastss  0x1f65(%rip),%xmm13        # 680c <_sk_callback_avx+0x42d>
   DB  197,145,102,219                     ; vpcmpgtd      %xmm3,%xmm13,%xmm3
   DB  196,65,120,87,211                   ; vxorps        %xmm11,%xmm0,%xmm10
   DB  196,65,17,102,210                   ; vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -9213,7 +9302,7 @@ _sk_gather_f16_avx LABEL PROC
   DB  196,227,125,24,195,1                ; vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   DB  197,252,86,194                      ; vorps         %ymm2,%ymm0,%ymm0
   DB  196,227,125,25,194,1                ; vextractf128  $0x1,%ymm0,%xmm2
-  DB  196,226,121,24,29,27,31,0,0         ; vbroadcastss  0x1f1b(%rip),%xmm3        # 66fc <_sk_callback_avx+0x431>
+  DB  196,226,121,24,29,27,31,0,0         ; vbroadcastss  0x1f1b(%rip),%xmm3        # 6810 <_sk_callback_avx+0x431>
   DB  197,233,254,211                     ; vpaddd        %xmm3,%xmm2,%xmm2
   DB  197,249,254,195                     ; vpaddd        %xmm3,%xmm0,%xmm0
   DB  196,227,125,24,194,1                ; vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
@@ -9315,12 +9404,12 @@ _sk_store_f16_avx LABEL PROC
   DB  197,252,17,180,36,128,0,0,0         ; vmovups       %ymm6,0x80(%rsp)
   DB  197,252,17,108,36,96                ; vmovups       %ymm5,0x60(%rsp)
   DB  197,252,17,100,36,64                ; vmovups       %ymm4,0x40(%rsp)
-  DB  196,98,125,24,13,40,29,0,0          ; vbroadcastss  0x1d28(%rip),%ymm9        # 6700 <_sk_callback_avx+0x435>
+  DB  196,98,125,24,13,40,29,0,0          ; vbroadcastss  0x1d28(%rip),%ymm9        # 6814 <_sk_callback_avx+0x435>
   DB  196,65,124,84,209                   ; vandps        %ymm9,%ymm0,%ymm10
   DB  197,252,17,4,36                     ; vmovups       %ymm0,(%rsp)
   DB  196,65,124,87,218                   ; vxorps        %ymm10,%ymm0,%ymm11
   DB  196,67,125,25,220,1                 ; vextractf128  $0x1,%ymm11,%xmm12
-  DB  196,98,121,24,5,14,29,0,0           ; vbroadcastss  0x1d0e(%rip),%xmm8        # 6704 <_sk_callback_avx+0x439>
+  DB  196,98,121,24,5,14,29,0,0           ; vbroadcastss  0x1d0e(%rip),%xmm8        # 6818 <_sk_callback_avx+0x439>
   DB  196,65,57,102,236                   ; vpcmpgtd      %xmm12,%xmm8,%xmm13
   DB  196,65,57,102,243                   ; vpcmpgtd      %xmm11,%xmm8,%xmm14
   DB  196,67,13,24,237,1                  ; vinsertf128   $0x1,%xmm13,%ymm14,%ymm13
@@ -9330,7 +9419,7 @@ _sk_store_f16_avx LABEL PROC
   DB  196,67,13,24,242,1                  ; vinsertf128   $0x1,%xmm10,%ymm14,%ymm14
   DB  196,193,33,114,211,13               ; vpsrld        $0xd,%xmm11,%xmm11
   DB  196,193,25,114,212,13               ; vpsrld        $0xd,%xmm12,%xmm12
-  DB  196,98,125,24,21,213,28,0,0         ; vbroadcastss  0x1cd5(%rip),%ymm10        # 6708 <_sk_callback_avx+0x43d>
+  DB  196,98,125,24,21,213,28,0,0         ; vbroadcastss  0x1cd5(%rip),%ymm10        # 681c <_sk_callback_avx+0x43d>
   DB  196,65,12,86,242                    ; vorps         %ymm10,%ymm14,%ymm14
   DB  196,67,125,25,247,1                 ; vextractf128  $0x1,%ymm14,%xmm15
   DB  196,65,1,254,228                    ; vpaddd        %xmm12,%xmm15,%xmm12
@@ -9412,7 +9501,7 @@ _sk_store_f16_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,75                              ; jne           4c1a <_sk_store_f16_avx+0x270>
+  DB  117,75                              ; jne           4d2e <_sk_store_f16_avx+0x270>
   DB  197,120,17,28,248                   ; vmovups       %xmm11,(%rax,%rdi,8)
   DB  197,120,17,84,248,16                ; vmovups       %xmm10,0x10(%rax,%rdi,8)
   DB  197,120,17,76,248,32                ; vmovups       %xmm9,0x20(%rax,%rdi,8)
@@ -9428,22 +9517,22 @@ _sk_store_f16_avx LABEL PROC
   DB  255,224                             ; jmpq          *%rax
   DB  197,121,214,28,248                  ; vmovq         %xmm11,(%rax,%rdi,8)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,193                             ; je            4be6 <_sk_store_f16_avx+0x23c>
+  DB  116,193                             ; je            4cfa <_sk_store_f16_avx+0x23c>
   DB  197,121,23,92,248,8                 ; vmovhpd       %xmm11,0x8(%rax,%rdi,8)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,181                             ; jb            4be6 <_sk_store_f16_avx+0x23c>
+  DB  114,181                             ; jb            4cfa <_sk_store_f16_avx+0x23c>
   DB  197,121,214,84,248,16               ; vmovq         %xmm10,0x10(%rax,%rdi,8)
-  DB  116,173                             ; je            4be6 <_sk_store_f16_avx+0x23c>
+  DB  116,173                             ; je            4cfa <_sk_store_f16_avx+0x23c>
   DB  197,121,23,84,248,24                ; vmovhpd       %xmm10,0x18(%rax,%rdi,8)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,161                             ; jb            4be6 <_sk_store_f16_avx+0x23c>
+  DB  114,161                             ; jb            4cfa <_sk_store_f16_avx+0x23c>
   DB  197,121,214,76,248,32               ; vmovq         %xmm9,0x20(%rax,%rdi,8)
-  DB  116,153                             ; je            4be6 <_sk_store_f16_avx+0x23c>
+  DB  116,153                             ; je            4cfa <_sk_store_f16_avx+0x23c>
   DB  197,121,23,76,248,40                ; vmovhpd       %xmm9,0x28(%rax,%rdi,8)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,141                             ; jb            4be6 <_sk_store_f16_avx+0x23c>
+  DB  114,141                             ; jb            4cfa <_sk_store_f16_avx+0x23c>
   DB  197,121,214,68,248,48               ; vmovq         %xmm8,0x30(%rax,%rdi,8)
-  DB  235,133                             ; jmp           4be6 <_sk_store_f16_avx+0x23c>
+  DB  235,133                             ; jmp           4cfa <_sk_store_f16_avx+0x23c>
 
 PUBLIC _sk_load_u16_be_avx
 _sk_load_u16_be_avx LABEL PROC
@@ -9451,7 +9540,7 @@ _sk_load_u16_be_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,253,0,0,0                    ; jne           4d74 <_sk_load_u16_be_avx+0x113>
+  DB  15,133,253,0,0,0                    ; jne           4e88 <_sk_load_u16_be_avx+0x113>
   DB  196,65,121,16,4,64                  ; vmovupd       (%r8,%rax,2),%xmm8
   DB  196,193,121,16,84,64,16             ; vmovupd       0x10(%r8,%rax,2),%xmm2
   DB  196,193,121,16,92,64,32             ; vmovupd       0x20(%r8,%rax,2),%xmm3
@@ -9473,7 +9562,7 @@ _sk_load_u16_be_avx LABEL PROC
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,29,36,26,0,0          ; vbroadcastss  0x1a24(%rip),%ymm11        # 670c <_sk_callback_avx+0x441>
+  DB  196,98,125,24,29,36,26,0,0          ; vbroadcastss  0x1a24(%rip),%ymm11        # 6820 <_sk_callback_avx+0x441>
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,177,109,202                     ; vpunpckhqdq   %xmm2,%xmm9,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -9507,29 +9596,29 @@ _sk_load_u16_be_avx LABEL PROC
   DB  196,65,123,16,4,64                  ; vmovsd        (%r8,%rax,2),%xmm8
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,85                              ; je            4dda <_sk_load_u16_be_avx+0x179>
+  DB  116,85                              ; je            4eee <_sk_load_u16_be_avx+0x179>
   DB  196,65,57,22,68,64,8                ; vmovhpd       0x8(%r8,%rax,2),%xmm8,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,72                              ; jb            4dda <_sk_load_u16_be_avx+0x179>
+  DB  114,72                              ; jb            4eee <_sk_load_u16_be_avx+0x179>
   DB  196,193,123,16,84,64,16             ; vmovsd        0x10(%r8,%rax,2),%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  116,72                              ; je            4de7 <_sk_load_u16_be_avx+0x186>
+  DB  116,72                              ; je            4efb <_sk_load_u16_be_avx+0x186>
   DB  196,193,105,22,84,64,24             ; vmovhpd       0x18(%r8,%rax,2),%xmm2,%xmm2
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,59                              ; jb            4de7 <_sk_load_u16_be_avx+0x186>
+  DB  114,59                              ; jb            4efb <_sk_load_u16_be_avx+0x186>
   DB  196,193,123,16,92,64,32             ; vmovsd        0x20(%r8,%rax,2),%xmm3
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  15,132,213,254,255,255              ; je            4c92 <_sk_load_u16_be_avx+0x31>
+  DB  15,132,213,254,255,255              ; je            4da6 <_sk_load_u16_be_avx+0x31>
   DB  196,193,97,22,92,64,40              ; vmovhpd       0x28(%r8,%rax,2),%xmm3,%xmm3
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  15,130,196,254,255,255              ; jb            4c92 <_sk_load_u16_be_avx+0x31>
+  DB  15,130,196,254,255,255              ; jb            4da6 <_sk_load_u16_be_avx+0x31>
   DB  196,65,122,126,76,64,48             ; vmovq         0x30(%r8,%rax,2),%xmm9
-  DB  233,184,254,255,255                 ; jmpq          4c92 <_sk_load_u16_be_avx+0x31>
+  DB  233,184,254,255,255                 ; jmpq          4da6 <_sk_load_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
   DB  197,233,87,210                      ; vxorpd        %xmm2,%xmm2,%xmm2
-  DB  233,171,254,255,255                 ; jmpq          4c92 <_sk_load_u16_be_avx+0x31>
+  DB  233,171,254,255,255                 ; jmpq          4da6 <_sk_load_u16_be_avx+0x31>
   DB  197,225,87,219                      ; vxorpd        %xmm3,%xmm3,%xmm3
-  DB  233,162,254,255,255                 ; jmpq          4c92 <_sk_load_u16_be_avx+0x31>
+  DB  233,162,254,255,255                 ; jmpq          4da6 <_sk_load_u16_be_avx+0x31>
 
 PUBLIC _sk_load_rgb_u16_be_avx
 _sk_load_rgb_u16_be_avx LABEL PROC
@@ -9537,7 +9626,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,127                        ; lea           (%rdi,%rdi,2),%rax
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  15,133,243,0,0,0                    ; jne           4ef5 <_sk_load_rgb_u16_be_avx+0x105>
+  DB  15,133,243,0,0,0                    ; jne           5009 <_sk_load_rgb_u16_be_avx+0x105>
   DB  196,193,122,111,4,64                ; vmovdqu       (%r8,%rax,2),%xmm0
   DB  196,193,122,111,84,64,12            ; vmovdqu       0xc(%r8,%rax,2),%xmm2
   DB  196,193,122,111,76,64,24            ; vmovdqu       0x18(%r8,%rax,2),%xmm1
@@ -9564,7 +9653,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,29,132,24,0,0         ; vbroadcastss  0x1884(%rip),%ymm11        # 6710 <_sk_callback_avx+0x445>
+  DB  196,98,125,24,29,132,24,0,0         ; vbroadcastss  0x1884(%rip),%ymm11        # 6824 <_sk_callback_avx+0x445>
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,185,109,202                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -9585,48 +9674,48 @@ _sk_load_rgb_u16_be_avx LABEL PROC
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,211                  ; vmulps        %ymm11,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,33,24,0,0         ; vbroadcastss  0x1821(%rip),%ymm3        # 6714 <_sk_callback_avx+0x449>
+  DB  196,226,125,24,29,33,24,0,0         ; vbroadcastss  0x1821(%rip),%ymm3        # 6828 <_sk_callback_avx+0x449>
   DB  255,224                             ; jmpq          *%rax
   DB  196,193,121,110,4,64                ; vmovd         (%r8,%rax,2),%xmm0
   DB  196,193,121,196,68,64,4,2           ; vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  117,5                               ; jne           4f0e <_sk_load_rgb_u16_be_avx+0x11e>
-  DB  233,40,255,255,255                  ; jmpq          4e36 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  117,5                               ; jne           5022 <_sk_load_rgb_u16_be_avx+0x11e>
+  DB  233,40,255,255,255                  ; jmpq          4f4a <_sk_load_rgb_u16_be_avx+0x46>
   DB  196,193,121,110,76,64,6             ; vmovd         0x6(%r8,%rax,2),%xmm1
   DB  196,65,113,196,68,64,10,2           ; vpinsrw       $0x2,0xa(%r8,%rax,2),%xmm1,%xmm8
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,26                              ; jb            4f3d <_sk_load_rgb_u16_be_avx+0x14d>
+  DB  114,26                              ; jb            5051 <_sk_load_rgb_u16_be_avx+0x14d>
   DB  196,193,121,110,76,64,12            ; vmovd         0xc(%r8,%rax,2),%xmm1
   DB  196,193,113,196,84,64,16,2          ; vpinsrw       $0x2,0x10(%r8,%rax,2),%xmm1,%xmm2
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  117,10                              ; jne           4f42 <_sk_load_rgb_u16_be_avx+0x152>
-  DB  233,249,254,255,255                 ; jmpq          4e36 <_sk_load_rgb_u16_be_avx+0x46>
-  DB  233,244,254,255,255                 ; jmpq          4e36 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           5056 <_sk_load_rgb_u16_be_avx+0x152>
+  DB  233,249,254,255,255                 ; jmpq          4f4a <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,244,254,255,255                 ; jmpq          4f4a <_sk_load_rgb_u16_be_avx+0x46>
   DB  196,193,121,110,76,64,18            ; vmovd         0x12(%r8,%rax,2),%xmm1
   DB  196,65,113,196,76,64,22,2           ; vpinsrw       $0x2,0x16(%r8,%rax,2),%xmm1,%xmm9
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,26                              ; jb            4f71 <_sk_load_rgb_u16_be_avx+0x181>
+  DB  114,26                              ; jb            5085 <_sk_load_rgb_u16_be_avx+0x181>
   DB  196,193,121,110,76,64,24            ; vmovd         0x18(%r8,%rax,2),%xmm1
   DB  196,193,113,196,76,64,28,2          ; vpinsrw       $0x2,0x1c(%r8,%rax,2),%xmm1,%xmm1
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  117,10                              ; jne           4f76 <_sk_load_rgb_u16_be_avx+0x186>
-  DB  233,197,254,255,255                 ; jmpq          4e36 <_sk_load_rgb_u16_be_avx+0x46>
-  DB  233,192,254,255,255                 ; jmpq          4e36 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  117,10                              ; jne           508a <_sk_load_rgb_u16_be_avx+0x186>
+  DB  233,197,254,255,255                 ; jmpq          4f4a <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,192,254,255,255                 ; jmpq          4f4a <_sk_load_rgb_u16_be_avx+0x46>
   DB  196,193,121,110,92,64,30            ; vmovd         0x1e(%r8,%rax,2),%xmm3
   DB  196,65,97,196,92,64,34,2            ; vpinsrw       $0x2,0x22(%r8,%rax,2),%xmm3,%xmm11
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,20                              ; jb            4f9f <_sk_load_rgb_u16_be_avx+0x1af>
+  DB  114,20                              ; jb            50b3 <_sk_load_rgb_u16_be_avx+0x1af>
   DB  196,193,121,110,92,64,36            ; vmovd         0x24(%r8,%rax,2),%xmm3
   DB  196,193,97,196,92,64,40,2           ; vpinsrw       $0x2,0x28(%r8,%rax,2),%xmm3,%xmm3
-  DB  233,151,254,255,255                 ; jmpq          4e36 <_sk_load_rgb_u16_be_avx+0x46>
-  DB  233,146,254,255,255                 ; jmpq          4e36 <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,151,254,255,255                 ; jmpq          4f4a <_sk_load_rgb_u16_be_avx+0x46>
+  DB  233,146,254,255,255                 ; jmpq          4f4a <_sk_load_rgb_u16_be_avx+0x46>
 
 PUBLIC _sk_store_u16_be_avx
 _sk_store_u16_be_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
-  DB  196,98,125,24,5,94,23,0,0           ; vbroadcastss  0x175e(%rip),%ymm8        # 6718 <_sk_callback_avx+0x44d>
+  DB  196,98,125,24,5,94,23,0,0           ; vbroadcastss  0x175e(%rip),%ymm8        # 682c <_sk_callback_avx+0x44d>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,67,125,25,202,1                 ; vextractf128  $0x1,%ymm9,%xmm10
@@ -9664,7 +9753,7 @@ _sk_store_u16_be_avx LABEL PROC
   DB  196,65,17,98,200                    ; vpunpckldq    %xmm8,%xmm13,%xmm9
   DB  196,65,17,106,192                   ; vpunpckhdq    %xmm8,%xmm13,%xmm8
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,31                              ; jne           509e <_sk_store_u16_be_avx+0xfa>
+  DB  117,31                              ; jne           51b2 <_sk_store_u16_be_avx+0xfa>
   DB  196,65,120,17,28,64                 ; vmovups       %xmm11,(%r8,%rax,2)
   DB  196,65,120,17,84,64,16              ; vmovups       %xmm10,0x10(%r8,%rax,2)
   DB  196,65,120,17,76,64,32              ; vmovups       %xmm9,0x20(%r8,%rax,2)
@@ -9673,31 +9762,31 @@ _sk_store_u16_be_avx LABEL PROC
   DB  255,224                             ; jmpq          *%rax
   DB  196,65,121,214,28,64                ; vmovq         %xmm11,(%r8,%rax,2)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,240                             ; je            509a <_sk_store_u16_be_avx+0xf6>
+  DB  116,240                             ; je            51ae <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,23,92,64,8               ; vmovhpd       %xmm11,0x8(%r8,%rax,2)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,227                             ; jb            509a <_sk_store_u16_be_avx+0xf6>
+  DB  114,227                             ; jb            51ae <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,214,84,64,16             ; vmovq         %xmm10,0x10(%r8,%rax,2)
-  DB  116,218                             ; je            509a <_sk_store_u16_be_avx+0xf6>
+  DB  116,218                             ; je            51ae <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,23,84,64,24              ; vmovhpd       %xmm10,0x18(%r8,%rax,2)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,205                             ; jb            509a <_sk_store_u16_be_avx+0xf6>
+  DB  114,205                             ; jb            51ae <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,214,76,64,32             ; vmovq         %xmm9,0x20(%r8,%rax,2)
-  DB  116,196                             ; je            509a <_sk_store_u16_be_avx+0xf6>
+  DB  116,196                             ; je            51ae <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,23,76,64,40              ; vmovhpd       %xmm9,0x28(%r8,%rax,2)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,183                             ; jb            509a <_sk_store_u16_be_avx+0xf6>
+  DB  114,183                             ; jb            51ae <_sk_store_u16_be_avx+0xf6>
   DB  196,65,121,214,68,64,48             ; vmovq         %xmm8,0x30(%r8,%rax,2)
-  DB  235,174                             ; jmp           509a <_sk_store_u16_be_avx+0xf6>
+  DB  235,174                             ; jmp           51ae <_sk_store_u16_be_avx+0xf6>
 
 PUBLIC _sk_load_f32_avx
 _sk_load_f32_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  119,110                             ; ja            5162 <_sk_load_f32_avx+0x76>
+  DB  119,110                             ; ja            5276 <_sk_load_f32_avx+0x76>
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
-  DB  76,141,21,134,0,0,0                 ; lea           0x86(%rip),%r10        # 518c <_sk_load_f32_avx+0xa0>
+  DB  76,141,21,134,0,0,0                 ; lea           0x86(%rip),%r10        # 52a0 <_sk_load_f32_avx+0xa0>
   DB  73,99,4,138                         ; movslq        (%r10,%rcx,4),%rax
   DB  76,1,208                            ; add           %r10,%rax
   DB  255,224                             ; jmpq          *%rax
@@ -9754,7 +9843,7 @@ _sk_store_f32_avx LABEL PROC
   DB  196,65,37,20,196                    ; vunpcklpd     %ymm12,%ymm11,%ymm8
   DB  196,65,37,21,220                    ; vunpckhpd     %ymm12,%ymm11,%ymm11
   DB  72,133,201                          ; test          %rcx,%rcx
-  DB  117,55                              ; jne           5219 <_sk_store_f32_avx+0x6d>
+  DB  117,55                              ; jne           532d <_sk_store_f32_avx+0x6d>
   DB  196,67,45,24,225,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm12
   DB  196,67,61,24,235,1                  ; vinsertf128   $0x1,%xmm11,%ymm8,%ymm13
   DB  196,67,45,6,201,49                  ; vperm2f128    $0x31,%ymm9,%ymm10,%ymm9
@@ -9767,22 +9856,22 @@ _sk_store_f32_avx LABEL PROC
   DB  255,224                             ; jmpq          *%rax
   DB  196,65,121,17,20,128                ; vmovupd       %xmm10,(%r8,%rax,4)
   DB  72,131,249,1                        ; cmp           $0x1,%rcx
-  DB  116,240                             ; je            5215 <_sk_store_f32_avx+0x69>
+  DB  116,240                             ; je            5329 <_sk_store_f32_avx+0x69>
   DB  196,65,121,17,76,128,16             ; vmovupd       %xmm9,0x10(%r8,%rax,4)
   DB  72,131,249,3                        ; cmp           $0x3,%rcx
-  DB  114,227                             ; jb            5215 <_sk_store_f32_avx+0x69>
+  DB  114,227                             ; jb            5329 <_sk_store_f32_avx+0x69>
   DB  196,65,121,17,68,128,32             ; vmovupd       %xmm8,0x20(%r8,%rax,4)
-  DB  116,218                             ; je            5215 <_sk_store_f32_avx+0x69>
+  DB  116,218                             ; je            5329 <_sk_store_f32_avx+0x69>
   DB  196,65,121,17,92,128,48             ; vmovupd       %xmm11,0x30(%r8,%rax,4)
   DB  72,131,249,5                        ; cmp           $0x5,%rcx
-  DB  114,205                             ; jb            5215 <_sk_store_f32_avx+0x69>
+  DB  114,205                             ; jb            5329 <_sk_store_f32_avx+0x69>
   DB  196,67,125,25,84,128,64,1           ; vextractf128  $0x1,%ymm10,0x40(%r8,%rax,4)
-  DB  116,195                             ; je            5215 <_sk_store_f32_avx+0x69>
+  DB  116,195                             ; je            5329 <_sk_store_f32_avx+0x69>
   DB  196,67,125,25,76,128,80,1           ; vextractf128  $0x1,%ymm9,0x50(%r8,%rax,4)
   DB  72,131,249,7                        ; cmp           $0x7,%rcx
-  DB  114,181                             ; jb            5215 <_sk_store_f32_avx+0x69>
+  DB  114,181                             ; jb            5329 <_sk_store_f32_avx+0x69>
   DB  196,67,125,25,68,128,96,1           ; vextractf128  $0x1,%ymm8,0x60(%r8,%rax,4)
-  DB  235,171                             ; jmp           5215 <_sk_store_f32_avx+0x69>
+  DB  235,171                             ; jmp           5329 <_sk_store_f32_avx+0x69>
 
 PUBLIC _sk_clamp_x_avx
 _sk_clamp_x_avx LABEL PROC
@@ -9874,12 +9963,12 @@ _sk_mirror_y_avx LABEL PROC
 
 PUBLIC _sk_luminance_to_alpha_avx
 _sk_luminance_to_alpha_avx LABEL PROC
-  DB  196,226,125,24,29,131,19,0,0        ; vbroadcastss  0x1383(%rip),%ymm3        # 671c <_sk_callback_avx+0x451>
+  DB  196,226,125,24,29,131,19,0,0        ; vbroadcastss  0x1383(%rip),%ymm3        # 6830 <_sk_callback_avx+0x451>
   DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
-  DB  196,226,125,24,29,122,19,0,0        ; vbroadcastss  0x137a(%rip),%ymm3        # 6720 <_sk_callback_avx+0x455>
+  DB  196,226,125,24,29,122,19,0,0        ; vbroadcastss  0x137a(%rip),%ymm3        # 6834 <_sk_callback_avx+0x455>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,109,19,0,0        ; vbroadcastss  0x136d(%rip),%ymm1        # 6724 <_sk_callback_avx+0x459>
+  DB  196,226,125,24,13,109,19,0,0        ; vbroadcastss  0x136d(%rip),%ymm1        # 6838 <_sk_callback_avx+0x459>
   DB  197,236,89,201                      ; vmulps        %ymm1,%ymm2,%ymm1
   DB  197,252,88,217                      ; vaddps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10086,9 +10175,9 @@ _sk_evenly_spaced_gradient_avx LABEL PROC
   DB  72,139,24                           ; mov           (%rax),%rbx
   DB  72,139,104,8                        ; mov           0x8(%rax),%rbp
   DB  72,255,203                          ; dec           %rbx
-  DB  120,7                               ; js            570d <_sk_evenly_spaced_gradient_avx+0x1f>
+  DB  120,7                               ; js            5821 <_sk_evenly_spaced_gradient_avx+0x1f>
   DB  196,225,242,42,203                  ; vcvtsi2ss     %rbx,%xmm1,%xmm1
-  DB  235,21                              ; jmp           5722 <_sk_evenly_spaced_gradient_avx+0x34>
+  DB  235,21                              ; jmp           5836 <_sk_evenly_spaced_gradient_avx+0x34>
   DB  73,137,216                          ; mov           %rbx,%r8
   DB  73,209,232                          ; shr           %r8
   DB  131,227,1                           ; and           $0x1,%ebx
@@ -10253,12 +10342,12 @@ _sk_gradient_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
   DB  73,131,248,2                        ; cmp           $0x2,%r8
-  DB  114,80                              ; jb            5ab0 <_sk_gradient_avx+0x69>
+  DB  114,80                              ; jb            5bc4 <_sk_gradient_avx+0x69>
   DB  72,139,88,72                        ; mov           0x48(%rax),%rbx
   DB  73,255,200                          ; dec           %r8
   DB  72,131,195,4                        ; add           $0x4,%rbx
   DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
-  DB  196,98,125,24,21,175,12,0,0         ; vbroadcastss  0xcaf(%rip),%ymm10        # 6728 <_sk_callback_avx+0x45d>
+  DB  196,98,125,24,21,175,12,0,0         ; vbroadcastss  0xcaf(%rip),%ymm10        # 683c <_sk_callback_avx+0x45d>
   DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
   DB  196,98,125,24,3                     ; vbroadcastss  (%rbx),%ymm8
   DB  197,60,194,192,2                    ; vcmpleps      %ymm0,%ymm8,%ymm8
@@ -10270,7 +10359,7 @@ _sk_gradient_avx LABEL PROC
   DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   DB  72,131,195,4                        ; add           $0x4,%rbx
   DB  73,255,200                          ; dec           %r8
-  DB  117,205                             ; jne           5a7d <_sk_gradient_avx+0x36>
+  DB  117,205                             ; jne           5b91 <_sk_gradient_avx+0x36>
   DB  196,195,249,22,200,1                ; vpextrq       $0x1,%xmm1,%r8
   DB  69,137,193                          ; mov           %r8d,%r9d
   DB  73,193,232,32                       ; shr           $0x20,%r8
@@ -10448,27 +10537,27 @@ _sk_xy_to_unit_angle_avx LABEL PROC
   DB  196,65,52,95,226                    ; vmaxps        %ymm10,%ymm9,%ymm12
   DB  196,65,36,94,220                    ; vdivps        %ymm12,%ymm11,%ymm11
   DB  196,65,36,89,227                    ; vmulps        %ymm11,%ymm11,%ymm12
-  DB  196,98,125,24,45,211,8,0,0          ; vbroadcastss  0x8d3(%rip),%ymm13        # 672c <_sk_callback_avx+0x461>
+  DB  196,98,125,24,45,211,8,0,0          ; vbroadcastss  0x8d3(%rip),%ymm13        # 6840 <_sk_callback_avx+0x461>
   DB  196,65,28,89,237                    ; vmulps        %ymm13,%ymm12,%ymm13
-  DB  196,98,125,24,53,201,8,0,0          ; vbroadcastss  0x8c9(%rip),%ymm14        # 6730 <_sk_callback_avx+0x465>
+  DB  196,98,125,24,53,201,8,0,0          ; vbroadcastss  0x8c9(%rip),%ymm14        # 6844 <_sk_callback_avx+0x465>
   DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
   DB  196,65,28,89,237                    ; vmulps        %ymm13,%ymm12,%ymm13
-  DB  196,98,125,24,53,186,8,0,0          ; vbroadcastss  0x8ba(%rip),%ymm14        # 6734 <_sk_callback_avx+0x469>
+  DB  196,98,125,24,53,186,8,0,0          ; vbroadcastss  0x8ba(%rip),%ymm14        # 6848 <_sk_callback_avx+0x469>
   DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
   DB  196,65,28,89,229                    ; vmulps        %ymm13,%ymm12,%ymm12
-  DB  196,98,125,24,45,171,8,0,0          ; vbroadcastss  0x8ab(%rip),%ymm13        # 6738 <_sk_callback_avx+0x46d>
+  DB  196,98,125,24,45,171,8,0,0          ; vbroadcastss  0x8ab(%rip),%ymm13        # 684c <_sk_callback_avx+0x46d>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
   DB  196,65,52,194,202,1                 ; vcmpltps      %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,150,8,0,0          ; vbroadcastss  0x896(%rip),%ymm10        # 673c <_sk_callback_avx+0x471>
+  DB  196,98,125,24,21,150,8,0,0          ; vbroadcastss  0x896(%rip),%ymm10        # 6850 <_sk_callback_avx+0x471>
   DB  196,65,44,92,211                    ; vsubps        %ymm11,%ymm10,%ymm10
   DB  196,67,37,74,202,144                ; vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   DB  196,193,124,194,192,1               ; vcmpltps      %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,21,128,8,0,0          ; vbroadcastss  0x880(%rip),%ymm10        # 6740 <_sk_callback_avx+0x475>
+  DB  196,98,125,24,21,128,8,0,0          ; vbroadcastss  0x880(%rip),%ymm10        # 6854 <_sk_callback_avx+0x475>
   DB  196,65,44,92,209                    ; vsubps        %ymm9,%ymm10,%ymm10
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  196,65,116,194,200,1                ; vcmpltps      %ymm8,%ymm1,%ymm9
-  DB  196,98,125,24,21,106,8,0,0          ; vbroadcastss  0x86a(%rip),%ymm10        # 6744 <_sk_callback_avx+0x479>
+  DB  196,98,125,24,21,106,8,0,0          ; vbroadcastss  0x86a(%rip),%ymm10        # 6858 <_sk_callback_avx+0x479>
   DB  197,44,92,208                       ; vsubps        %ymm0,%ymm10,%ymm10
   DB  196,195,125,74,194,144              ; vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   DB  196,65,124,194,200,3                ; vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -10488,7 +10577,7 @@ _sk_xy_to_radius_avx LABEL PROC
 PUBLIC _sk_save_xy_avx
 _sk_save_xy_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,52,8,0,0            ; vbroadcastss  0x834(%rip),%ymm8        # 6748 <_sk_callback_avx+0x47d>
+  DB  196,98,125,24,5,52,8,0,0            ; vbroadcastss  0x834(%rip),%ymm8        # 685c <_sk_callback_avx+0x47d>
   DB  196,65,124,88,200                   ; vaddps        %ymm8,%ymm0,%ymm9
   DB  196,67,125,8,209,1                  ; vroundps      $0x1,%ymm9,%ymm10
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
@@ -10521,9 +10610,9 @@ _sk_accumulate_avx LABEL PROC
 PUBLIC _sk_bilinear_nx_avx
 _sk_bilinear_nx_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,192,7,0,0          ; vbroadcastss  0x7c0(%rip),%ymm0        # 674c <_sk_callback_avx+0x481>
+  DB  196,226,125,24,5,192,7,0,0          ; vbroadcastss  0x7c0(%rip),%ymm0        # 6860 <_sk_callback_avx+0x481>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,183,7,0,0           ; vbroadcastss  0x7b7(%rip),%ymm8        # 6750 <_sk_callback_avx+0x485>
+  DB  196,98,125,24,5,183,7,0,0           ; vbroadcastss  0x7b7(%rip),%ymm8        # 6864 <_sk_callback_avx+0x485>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10532,7 +10621,7 @@ _sk_bilinear_nx_avx LABEL PROC
 PUBLIC _sk_bilinear_px_avx
 _sk_bilinear_px_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,159,7,0,0          ; vbroadcastss  0x79f(%rip),%ymm0        # 6754 <_sk_callback_avx+0x489>
+  DB  196,226,125,24,5,159,7,0,0          ; vbroadcastss  0x79f(%rip),%ymm0        # 6868 <_sk_callback_avx+0x489>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -10542,9 +10631,9 @@ _sk_bilinear_px_avx LABEL PROC
 PUBLIC _sk_bilinear_ny_avx
 _sk_bilinear_ny_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,131,7,0,0         ; vbroadcastss  0x783(%rip),%ymm1        # 6758 <_sk_callback_avx+0x48d>
+  DB  196,226,125,24,13,131,7,0,0         ; vbroadcastss  0x783(%rip),%ymm1        # 686c <_sk_callback_avx+0x48d>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,121,7,0,0           ; vbroadcastss  0x779(%rip),%ymm8        # 675c <_sk_callback_avx+0x491>
+  DB  196,98,125,24,5,121,7,0,0           ; vbroadcastss  0x779(%rip),%ymm8        # 6870 <_sk_callback_avx+0x491>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10553,7 +10642,7 @@ _sk_bilinear_ny_avx LABEL PROC
 PUBLIC _sk_bilinear_py_avx
 _sk_bilinear_py_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,97,7,0,0          ; vbroadcastss  0x761(%rip),%ymm1        # 6760 <_sk_callback_avx+0x495>
+  DB  196,226,125,24,13,97,7,0,0          ; vbroadcastss  0x761(%rip),%ymm1        # 6874 <_sk_callback_avx+0x495>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -10563,14 +10652,14 @@ _sk_bilinear_py_avx LABEL PROC
 PUBLIC _sk_bicubic_n3x_avx
 _sk_bicubic_n3x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,68,7,0,0           ; vbroadcastss  0x744(%rip),%ymm0        # 6764 <_sk_callback_avx+0x499>
+  DB  196,226,125,24,5,68,7,0,0           ; vbroadcastss  0x744(%rip),%ymm0        # 6878 <_sk_callback_avx+0x499>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,59,7,0,0            ; vbroadcastss  0x73b(%rip),%ymm8        # 6768 <_sk_callback_avx+0x49d>
+  DB  196,98,125,24,5,59,7,0,0            ; vbroadcastss  0x73b(%rip),%ymm8        # 687c <_sk_callback_avx+0x49d>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,44,7,0,0           ; vbroadcastss  0x72c(%rip),%ymm10        # 676c <_sk_callback_avx+0x4a1>
+  DB  196,98,125,24,21,44,7,0,0           ; vbroadcastss  0x72c(%rip),%ymm10        # 6880 <_sk_callback_avx+0x4a1>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,34,7,0,0           ; vbroadcastss  0x722(%rip),%ymm10        # 6770 <_sk_callback_avx+0x4a5>
+  DB  196,98,125,24,21,34,7,0,0           ; vbroadcastss  0x722(%rip),%ymm10        # 6884 <_sk_callback_avx+0x4a5>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -10580,19 +10669,19 @@ _sk_bicubic_n3x_avx LABEL PROC
 PUBLIC _sk_bicubic_n1x_avx
 _sk_bicubic_n1x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,5,7,0,0            ; vbroadcastss  0x705(%rip),%ymm0        # 6774 <_sk_callback_avx+0x4a9>
+  DB  196,226,125,24,5,5,7,0,0            ; vbroadcastss  0x705(%rip),%ymm0        # 6888 <_sk_callback_avx+0x4a9>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,252,6,0,0           ; vbroadcastss  0x6fc(%rip),%ymm8        # 6778 <_sk_callback_avx+0x4ad>
+  DB  196,98,125,24,5,252,6,0,0           ; vbroadcastss  0x6fc(%rip),%ymm8        # 688c <_sk_callback_avx+0x4ad>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,242,6,0,0          ; vbroadcastss  0x6f2(%rip),%ymm9        # 677c <_sk_callback_avx+0x4b1>
+  DB  196,98,125,24,13,242,6,0,0          ; vbroadcastss  0x6f2(%rip),%ymm9        # 6890 <_sk_callback_avx+0x4b1>
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,232,6,0,0          ; vbroadcastss  0x6e8(%rip),%ymm10        # 6780 <_sk_callback_avx+0x4b5>
+  DB  196,98,125,24,21,232,6,0,0          ; vbroadcastss  0x6e8(%rip),%ymm10        # 6894 <_sk_callback_avx+0x4b5>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,217,6,0,0          ; vbroadcastss  0x6d9(%rip),%ymm10        # 6784 <_sk_callback_avx+0x4b9>
+  DB  196,98,125,24,21,217,6,0,0          ; vbroadcastss  0x6d9(%rip),%ymm10        # 6898 <_sk_callback_avx+0x4b9>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,202,6,0,0          ; vbroadcastss  0x6ca(%rip),%ymm9        # 6788 <_sk_callback_avx+0x4bd>
+  DB  196,98,125,24,13,202,6,0,0          ; vbroadcastss  0x6ca(%rip),%ymm9        # 689c <_sk_callback_avx+0x4bd>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10601,17 +10690,17 @@ _sk_bicubic_n1x_avx LABEL PROC
 PUBLIC _sk_bicubic_p1x_avx
 _sk_bicubic_p1x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,178,6,0,0           ; vbroadcastss  0x6b2(%rip),%ymm8        # 678c <_sk_callback_avx+0x4c1>
+  DB  196,98,125,24,5,178,6,0,0           ; vbroadcastss  0x6b2(%rip),%ymm8        # 68a0 <_sk_callback_avx+0x4c1>
   DB  197,188,88,0                        ; vaddps        (%rax),%ymm8,%ymm0
   DB  197,124,16,72,64                    ; vmovups       0x40(%rax),%ymm9
-  DB  196,98,125,24,21,164,6,0,0          ; vbroadcastss  0x6a4(%rip),%ymm10        # 6790 <_sk_callback_avx+0x4c5>
+  DB  196,98,125,24,21,164,6,0,0          ; vbroadcastss  0x6a4(%rip),%ymm10        # 68a4 <_sk_callback_avx+0x4c5>
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
-  DB  196,98,125,24,29,154,6,0,0          ; vbroadcastss  0x69a(%rip),%ymm11        # 6794 <_sk_callback_avx+0x4c9>
+  DB  196,98,125,24,29,154,6,0,0          ; vbroadcastss  0x69a(%rip),%ymm11        # 68a8 <_sk_callback_avx+0x4c9>
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
   DB  196,65,44,88,192                    ; vaddps        %ymm8,%ymm10,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
-  DB  196,98,125,24,13,129,6,0,0          ; vbroadcastss  0x681(%rip),%ymm9        # 6798 <_sk_callback_avx+0x4cd>
+  DB  196,98,125,24,13,129,6,0,0          ; vbroadcastss  0x681(%rip),%ymm9        # 68ac <_sk_callback_avx+0x4cd>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10620,13 +10709,13 @@ _sk_bicubic_p1x_avx LABEL PROC
 PUBLIC _sk_bicubic_p3x_avx
 _sk_bicubic_p3x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,105,6,0,0          ; vbroadcastss  0x669(%rip),%ymm0        # 679c <_sk_callback_avx+0x4d1>
+  DB  196,226,125,24,5,105,6,0,0          ; vbroadcastss  0x669(%rip),%ymm0        # 68b0 <_sk_callback_avx+0x4d1>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,86,6,0,0           ; vbroadcastss  0x656(%rip),%ymm10        # 67a0 <_sk_callback_avx+0x4d5>
+  DB  196,98,125,24,21,86,6,0,0           ; vbroadcastss  0x656(%rip),%ymm10        # 68b4 <_sk_callback_avx+0x4d5>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,76,6,0,0           ; vbroadcastss  0x64c(%rip),%ymm10        # 67a4 <_sk_callback_avx+0x4d9>
+  DB  196,98,125,24,21,76,6,0,0           ; vbroadcastss  0x64c(%rip),%ymm10        # 68b8 <_sk_callback_avx+0x4d9>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -10636,14 +10725,14 @@ _sk_bicubic_p3x_avx LABEL PROC
 PUBLIC _sk_bicubic_n3y_avx
 _sk_bicubic_n3y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,47,6,0,0          ; vbroadcastss  0x62f(%rip),%ymm1        # 67a8 <_sk_callback_avx+0x4dd>
+  DB  196,226,125,24,13,47,6,0,0          ; vbroadcastss  0x62f(%rip),%ymm1        # 68bc <_sk_callback_avx+0x4dd>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,37,6,0,0            ; vbroadcastss  0x625(%rip),%ymm8        # 67ac <_sk_callback_avx+0x4e1>
+  DB  196,98,125,24,5,37,6,0,0            ; vbroadcastss  0x625(%rip),%ymm8        # 68c0 <_sk_callback_avx+0x4e1>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,22,6,0,0           ; vbroadcastss  0x616(%rip),%ymm10        # 67b0 <_sk_callback_avx+0x4e5>
+  DB  196,98,125,24,21,22,6,0,0           ; vbroadcastss  0x616(%rip),%ymm10        # 68c4 <_sk_callback_avx+0x4e5>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,12,6,0,0           ; vbroadcastss  0x60c(%rip),%ymm10        # 67b4 <_sk_callback_avx+0x4e9>
+  DB  196,98,125,24,21,12,6,0,0           ; vbroadcastss  0x60c(%rip),%ymm10        # 68c8 <_sk_callback_avx+0x4e9>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -10653,19 +10742,19 @@ _sk_bicubic_n3y_avx LABEL PROC
 PUBLIC _sk_bicubic_n1y_avx
 _sk_bicubic_n1y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,239,5,0,0         ; vbroadcastss  0x5ef(%rip),%ymm1        # 67b8 <_sk_callback_avx+0x4ed>
+  DB  196,226,125,24,13,239,5,0,0         ; vbroadcastss  0x5ef(%rip),%ymm1        # 68cc <_sk_callback_avx+0x4ed>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,229,5,0,0           ; vbroadcastss  0x5e5(%rip),%ymm8        # 67bc <_sk_callback_avx+0x4f1>
+  DB  196,98,125,24,5,229,5,0,0           ; vbroadcastss  0x5e5(%rip),%ymm8        # 68d0 <_sk_callback_avx+0x4f1>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,219,5,0,0          ; vbroadcastss  0x5db(%rip),%ymm9        # 67c0 <_sk_callback_avx+0x4f5>
+  DB  196,98,125,24,13,219,5,0,0          ; vbroadcastss  0x5db(%rip),%ymm9        # 68d4 <_sk_callback_avx+0x4f5>
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,209,5,0,0          ; vbroadcastss  0x5d1(%rip),%ymm10        # 67c4 <_sk_callback_avx+0x4f9>
+  DB  196,98,125,24,21,209,5,0,0          ; vbroadcastss  0x5d1(%rip),%ymm10        # 68d8 <_sk_callback_avx+0x4f9>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,194,5,0,0          ; vbroadcastss  0x5c2(%rip),%ymm10        # 67c8 <_sk_callback_avx+0x4fd>
+  DB  196,98,125,24,21,194,5,0,0          ; vbroadcastss  0x5c2(%rip),%ymm10        # 68dc <_sk_callback_avx+0x4fd>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,179,5,0,0          ; vbroadcastss  0x5b3(%rip),%ymm9        # 67cc <_sk_callback_avx+0x501>
+  DB  196,98,125,24,13,179,5,0,0          ; vbroadcastss  0x5b3(%rip),%ymm9        # 68e0 <_sk_callback_avx+0x501>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10674,17 +10763,17 @@ _sk_bicubic_n1y_avx LABEL PROC
 PUBLIC _sk_bicubic_p1y_avx
 _sk_bicubic_p1y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,155,5,0,0           ; vbroadcastss  0x59b(%rip),%ymm8        # 67d0 <_sk_callback_avx+0x505>
+  DB  196,98,125,24,5,155,5,0,0           ; vbroadcastss  0x59b(%rip),%ymm8        # 68e4 <_sk_callback_avx+0x505>
   DB  197,188,88,72,32                    ; vaddps        0x20(%rax),%ymm8,%ymm1
   DB  197,124,16,72,96                    ; vmovups       0x60(%rax),%ymm9
-  DB  196,98,125,24,21,140,5,0,0          ; vbroadcastss  0x58c(%rip),%ymm10        # 67d4 <_sk_callback_avx+0x509>
+  DB  196,98,125,24,21,140,5,0,0          ; vbroadcastss  0x58c(%rip),%ymm10        # 68e8 <_sk_callback_avx+0x509>
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
-  DB  196,98,125,24,29,130,5,0,0          ; vbroadcastss  0x582(%rip),%ymm11        # 67d8 <_sk_callback_avx+0x50d>
+  DB  196,98,125,24,29,130,5,0,0          ; vbroadcastss  0x582(%rip),%ymm11        # 68ec <_sk_callback_avx+0x50d>
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
   DB  196,65,44,88,192                    ; vaddps        %ymm8,%ymm10,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
-  DB  196,98,125,24,13,105,5,0,0          ; vbroadcastss  0x569(%rip),%ymm9        # 67dc <_sk_callback_avx+0x511>
+  DB  196,98,125,24,13,105,5,0,0          ; vbroadcastss  0x569(%rip),%ymm9        # 68f0 <_sk_callback_avx+0x511>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10693,13 +10782,13 @@ _sk_bicubic_p1y_avx LABEL PROC
 PUBLIC _sk_bicubic_p3y_avx
 _sk_bicubic_p3y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,81,5,0,0          ; vbroadcastss  0x551(%rip),%ymm1        # 67e0 <_sk_callback_avx+0x515>
+  DB  196,226,125,24,13,81,5,0,0          ; vbroadcastss  0x551(%rip),%ymm1        # 68f4 <_sk_callback_avx+0x515>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,61,5,0,0           ; vbroadcastss  0x53d(%rip),%ymm10        # 67e4 <_sk_callback_avx+0x519>
+  DB  196,98,125,24,21,61,5,0,0           ; vbroadcastss  0x53d(%rip),%ymm10        # 68f8 <_sk_callback_avx+0x519>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,51,5,0,0           ; vbroadcastss  0x533(%rip),%ymm10        # 67e8 <_sk_callback_avx+0x51d>
+  DB  196,98,125,24,21,51,5,0,0           ; vbroadcastss  0x533(%rip),%ymm10        # 68fc <_sk_callback_avx+0x51d>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -10813,25 +10902,25 @@ ALIGN 4
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 6495 <.literal4+0xb1>
+  DB  71,225,61                           ; rex.RXB       loope 65a9 <.literal4+0xb1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 64a5 <.literal4+0xc1>
+  DB  71,225,61                           ; rex.RXB       loope 65b9 <.literal4+0xc1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 64b5 <.literal4+0xd1>
+  DB  71,225,61                           ; rex.RXB       loope 65c9 <.literal4+0xd1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 64c5 <.literal4+0xe1>
+  DB  71,225,61                           ; rex.RXB       loope 65d9 <.literal4+0xe1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -10879,7 +10968,7 @@ ALIGN 4
   DB  190,129,128,128,59                  ; mov           $0x3b808081,%esi
   DB  129,128,128,59,0,248,0,0,8,33       ; addl          $0x21080000,-0x7ffc480(%rax)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        6511 <.literal4+0x12d>
+  DB  224,7                               ; loopne        6625 <.literal4+0x12d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -10895,10 +10984,10 @@ ALIGN 4
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
   DB  0,52,255                            ; add           %dh,(%rdi,%rdi,8)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            6538 <.literal4+0x154>
+  DB  127,0                               ; jg            664c <.literal4+0x154>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            65b1 <.literal4+0x1cd>
+  DB  119,115                             ; ja            66c5 <.literal4+0x1cd>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -10912,10 +11001,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            656c <.literal4+0x188>
+  DB  127,0                               ; jg            6680 <.literal4+0x188>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            65e5 <.literal4+0x201>
+  DB  119,115                             ; ja            66f9 <.literal4+0x201>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -10929,10 +11018,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            65a0 <.literal4+0x1bc>
+  DB  127,0                               ; jg            66b4 <.literal4+0x1bc>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            6619 <.literal4+0x235>
+  DB  119,115                             ; ja            672d <.literal4+0x235>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -10946,10 +11035,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            65d4 <.literal4+0x1f0>
+  DB  127,0                               ; jg            66e8 <.literal4+0x1f0>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            664d <.literal4+0x269>
+  DB  119,115                             ; ja            6761 <.literal4+0x269>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -10962,7 +11051,7 @@ ALIGN 4
   DB  0,75,0                              ; add           %cl,0x0(%rbx)
   DB  0,128,63,0,0,200                    ; add           %al,-0x37ffffc1(%rax)
   DB  66,0,0                              ; rex.X         add %al,(%rax)
-  DB  127,67                              ; jg            664b <.literal4+0x267>
+  DB  127,67                              ; jg            675f <.literal4+0x267>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,195                               ; add           %al,%bl
   DB  0,0                                 ; add           %al,(%rax)
@@ -10974,10 +11063,10 @@ ALIGN 4
   DB  190,80,128,3,62                     ; mov           $0x3e038050,%esi
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           666b <.literal4+0x287>
+  DB  118,63                              ; jbe           677f <.literal4+0x287>
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            667f <.literal4+0x29b>
+  DB  127,67                              ; jg            6793 <.literal4+0x29b>
   DB  129,128,128,59,0,0,128,63,129,128   ; addl          $0x80813f80,0x3b80(%rax)
   DB  128,59,0                            ; cmpb          $0x0,(%rbx)
   DB  0,128,63,129,128,128                ; add           %al,-0x7f7f7ec1(%rax)
@@ -10986,7 +11075,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        6661 <.literal4+0x27d>
+  DB  224,7                               ; loopne        6775 <.literal4+0x27d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -10998,7 +11087,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        667d <.literal4+0x299>
+  DB  224,7                               ; loopne        6791 <.literal4+0x299>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -11009,7 +11098,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  248                                 ; clc
   DB  65,0,0                              ; add           %al,(%r8)
-  DB  124,66                              ; jl            66d2 <.literal4+0x2ee>
+  DB  124,66                              ; jl            67e6 <.literal4+0x2ee>
   DB  0,240                               ; add           %dh,%al
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,55,0,15                 ; mov           %ecx,0xf003788(%rax)
@@ -11027,9 +11116,9 @@ ALIGN 4
   DB  137,136,136,59,15,0                 ; mov           %ecx,0xf3b88(%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,61,0,0                  ; mov           %ecx,0x3d88(%rax)
-  DB  112,65                              ; jo            6715 <.literal4+0x331>
+  DB  112,65                              ; jo            6829 <.literal4+0x331>
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            6723 <.literal4+0x33f>
+  DB  127,67                              ; jg            6837 <.literal4+0x33f>
   DB  0,128,0,0,0,0                       ; add           %al,0x0(%rax)
   DB  0,128,0,4,0,128                     ; add           %al,-0x7ffffc00(%rax)
   DB  0,0                                 ; add           %al,(%rax)
@@ -11045,7 +11134,7 @@ ALIGN 4
   DB  0,128,55,0,0,128                    ; add           %al,-0x7fffffc9(%rax)
   DB  63                                  ; (bad)
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            6763 <.literal4+0x37f>
+  DB  127,71                              ; jg            6877 <.literal4+0x37f>
   DB  208                                 ; (bad)
   DB  179,89                              ; mov           $0x59,%bl
   DB  62,89                               ; ds            pop %rcx
@@ -11132,72 +11221,6 @@ ALIGN 4
   DB  170                                 ; stos          %al,%es:(%rdi)
   DB  190                                 ; .byte         0xbe
 
-ALIGN 16
-  DB  0,2                                 ; add           %al,(%rdx)
-  DB  4,6                                 ; add           $0x6,%al
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  8,10                                ; or            %cl,(%rdx)
-  DB  12,14                               ; or            $0xe,%al
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  255,0                               ; incl          (%rax)
-  DB  0,2                                 ; add           %al,(%rdx)
-  DB  4,6                                 ; add           $0x6,%al
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  8,10                                ; or            %cl,(%rdx)
-  DB  12,14                               ; or            $0xe,%al
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,2                                 ; add           %al,(%rdx)
-  DB  4,6                                 ; add           $0x6,%al
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  8,10                                ; or            %cl,(%rdx)
-  DB  12,14                               ; or            $0xe,%al
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-  DB  0,0                                 ; add           %al,(%rax)
-
 ALIGN 32
   DB  255,0                               ; incl          (%rax)
   DB  0,0                                 ; add           %al,(%rax)
@@ -11263,6 +11286,24 @@ ALIGN 32
   DB  0,0                                 ; add           %al,(%rax)
   DB  255,0                               ; incl          (%rax)
   DB  0,0                                 ; add           %al,(%rax)
+
+ALIGN 16
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
+  DB  255,0                               ; incl          (%rax)
 ALIGN 32
 
 PUBLIC _sk_start_pipeline_sse41
index d174701..fa11869 100644 (file)
@@ -217,8 +217,8 @@ SI void store(T* dst, V v, size_t tail) {
     }
 #endif
 
-// AVX adds some mask loads and stores that make for shorter, faster code.
-#if defined(JUMPER) && defined(__AVX__)
+// AVX2 adds some mask loads and stores that make for shorter, faster code.
+#if defined(JUMPER) && defined(__AVX2__)
     SI U32 mask(size_t tail) {
         // We go a little out of our way to avoid needing large constant values here.
 
@@ -227,16 +227,14 @@ SI void store(T* dst, V v, size_t tail) {
         uint64_t mask = 0xffffffffffffffff >> 8*(kStride-tail);
 
         // Sign-extend each mask lane to its full width, 0x00000000 or 0xffffffff.
-        using S8  = int8_t  __attribute__((ext_vector_type(8)));
-        using S32 = int32_t __attribute__((ext_vector_type(8)));
-        return (U32)__builtin_convertvector(unaligned_load<S8>(&mask), S32);
+        return _mm256_cvtepi8_epi32(_mm_cvtsi64_si128((int64_t)mask));
     }
 
     template <>
     inline U32 load(const uint32_t* src, size_t tail) {
         __builtin_assume(tail < kStride);
         if (__builtin_expect(tail, 0)) {
-            return (U32)_mm256_maskload_ps((const float*)src, mask(tail));
+            return _mm256_maskload_epi32((const int*)src, mask(tail));
         }
         return unaligned_load<U32>(src);
     }
@@ -245,7 +243,7 @@ SI void store(T* dst, V v, size_t tail) {
     inline void store(uint32_t* dst, U32 v, size_t tail) {
         __builtin_assume(tail < kStride);
         if (__builtin_expect(tail, 0)) {
-            return _mm256_maskstore_ps((float*)dst, mask(tail), (F)v);
+            return _mm256_maskstore_epi32((int*)dst, mask(tail), v);
         }
         unaligned_store(dst, v);
     }