stage version of vertices
authorMike Reed <reed@google.com>
Fri, 19 May 2017 19:32:13 +0000 (15:32 -0400)
committerSkia Commit-Bot <skia-commit-bot@chromium.org>
Fri, 19 May 2017 19:43:06 +0000 (19:43 +0000)
This CL, just to limit its size/complexity, only handles
colors but not textures. Future CLs will cover everything.

Performance is pretty exciting. Its faster than the old code-path,
and when we fix a bug in pathutils to preserve opaqueness, it gets
a lot faster (8 -> 5)

Bug: skia:
Change-Id: I4113060e25fe25fe4e6a0ea59bd4fa5e33abc668
Reviewed-on: https://skia-review.googlesource.com/17276
Commit-Queue: Mike Reed <reed@google.com>
Reviewed-by: Mike Klein <mtklein@chromium.org>
Reviewed-by: Florin Malita <fmalita@chromium.org>
src/core/SkCoreBlitters.h
src/core/SkDraw_vertices.cpp
src/core/SkRasterPipeline.h
src/core/SkRasterPipelineBlitter.cpp
src/jumper/SkJumper_generated.S
src/jumper/SkJumper_generated_win.S
src/jumper/SkJumper_stages.cpp

index 0e8b819..277c8e7 100644 (file)
@@ -205,5 +205,9 @@ SkBlitter* SkBlitter_ChooseD565(const SkPixmap& device, const SkPaint& paint,
 // Returns nullptr if no SkRasterPipeline blitter can be constructed for this paint.
 SkBlitter* SkCreateRasterPipelineBlitter(const SkPixmap&, const SkPaint&, const SkMatrix& ctm,
                                          SkArenaAlloc*);
+SkBlitter* SkCreateRasterPipelineBlitter(const SkPixmap&, const SkPaint&, const SkMatrix& ctm,
+                                         const SkRasterPipeline& shaderPipeline,
+                                         bool shader_is_opaque, bool shader_wants_dither,
+                                         SkArenaAlloc*);
 
 #endif
index dcf8d58..9735fa8 100644 (file)
 #include "SkColorShader.h"
 #include "SkDraw.h"
 #include "SkNx.h"
-#include "SkPM4f.h"
+#include "SkPM4fPriv.h"
 #include "SkRasterClip.h"
 #include "SkScan.h"
 #include "SkShader.h"
 #include "SkString.h"
 #include "SkVertState.h"
 
+#include "SkRasterPipeline.h"
+#include "SkArenaAlloc.h"
+#include "SkCoreBlitters.h"
+#include "SkColorSpaceXform.h"
+#include "SkColorSpace_Base.h"
+
 struct Matrix43 {
     float fMat[12];    // column major
 
@@ -356,9 +362,67 @@ namespace {
 
         return alloc->makeSkSp<SkColorShader>(SkUnPreMultiply::PMColorToColor(pmColor));
     }
-
 } // anonymous ns
 
+static bool update_tricolor_matrix(const SkMatrix& ctmInv,
+                                   const SkPoint pts[], const SkPM4f colors[],
+                                   int index0, int index1, int index2, Matrix43* result) {
+    SkMatrix m, im;
+    m.reset();
+    m.set(0, pts[index1].fX - pts[index0].fX);
+    m.set(1, pts[index2].fX - pts[index0].fX);
+    m.set(2, pts[index0].fX);
+    m.set(3, pts[index1].fY - pts[index0].fY);
+    m.set(4, pts[index2].fY - pts[index0].fY);
+    m.set(5, pts[index0].fY);
+    if (!m.invert(&im)) {
+        return false;
+    }
+
+    SkMatrix dstToUnit;
+    dstToUnit.setConcat(im, ctmInv);
+
+    Sk4f c0 = colors[index0].to4f(),
+         c1 = colors[index1].to4f(),
+         c2 = colors[index2].to4f();
+
+    Matrix43 colorm;
+    (c1 - c0).store(&colorm.fMat[0]);
+    (c2 - c0).store(&colorm.fMat[4]);
+    c0.store(&colorm.fMat[8]);
+    result->setConcat(colorm, dstToUnit);
+    return true;
+}
+
+static SkPM4f* convert_colors(const SkColor src[], int count, SkColorSpace* deviceCS,
+                              SkArenaAlloc* alloc) {
+    SkPM4f* dst = alloc->makeArray<SkPM4f>(count);
+    if (!deviceCS) {
+        for (int i = 0; i < count; ++i) {
+            dst[i] = SkPM4f_from_SkColor(src[i], nullptr);
+        }
+    } else {
+        // For now, we want premul to happen on the colors before interplation. If we later want
+        // to apply it after the interp, pass kUnpremul here.
+        SkAlphaType alphaVerb = kPremul_SkAlphaType;
+        auto srcCS = SkColorSpace::MakeSRGB();
+        auto dstCS = as_CSB(deviceCS)->makeLinearGamma();
+        SkColorSpaceXform::New(srcCS.get(),
+                               dstCS.get())->apply(SkColorSpaceXform::kRGBA_F32_ColorFormat, dst,
+                                                   SkColorSpaceXform::kBGRA_8888_ColorFormat, src,
+                                                   count, alphaVerb);
+    }
+    return dst;
+}
+
+static bool compute_is_opaque(const SkColor colors[], int count) {
+    uint32_t c = ~0;
+    for (int i = 0; i < count; ++i) {
+        c &= colors[i];
+    }
+    return SkColorGetA(c) == 0xFF;
+}
+
 void SkDraw::drawVertices(SkVertices::VertexMode vmode, int count,
                           const SkPoint vertices[], const SkPoint textures[],
                           const SkColor colors[], SkBlendMode bmode,
@@ -370,6 +434,10 @@ void SkDraw::drawVertices(SkVertices::VertexMode vmode, int count,
     if (count < 3 || (indices && indexCount < 3) || fRC->isEmpty()) {
         return;
     }
+    SkMatrix ctmInv;
+    if (!fMatrix->invert(&ctmInv)) {
+        return;
+    }
 
     // transform out vertices into device coordinates
     SkAutoSTMalloc<16, SkPoint> storage(count);
@@ -387,6 +455,53 @@ void SkDraw::drawVertices(SkVertices::VertexMode vmode, int count,
      Thus for texture drawing, we need both texture[] and a shader.
      */
 
+    if (colors && !textures) {
+        char             arenaStorage[4096];
+        SkArenaAlloc     alloc(arenaStorage, sizeof(storage));
+        Matrix43         matrix43;
+        SkRasterPipeline shaderPipeline;
+
+        // Convert the SkColors into float colors. The conversion depends on some conditions:
+        // - If the pixmap has a dst colorspace, we have to be "color-correct".
+        //   Do we map into dst-colorspace before or after we interpolate?
+        // - We have to decide when to apply per-color alpha (before or after we interpolate)
+        //
+        // For now, we will take a simple approach, but recognize this is just a start:
+        // - convert colors into dst colorspace before interpolation (matches gradients)
+        // - apply per-color alpha before interpolation (matches old version of vertices)
+        //
+        SkPM4f* dstColors = convert_colors(colors, count, fDst.colorSpace(), &alloc);
+
+        shaderPipeline.append(SkRasterPipeline::matrix_4x3, &matrix43);
+        // In theory we should never need to clamp. However, either due to imprecision in our
+        // matrix43, or the scan converter passing us pixel centers that in fact are not within
+        // the triangle, we do see occasional (slightly) out-of-range values, so we add these
+        // clamp stages. It would be nice to find a way to detect when these are not needed.
+        shaderPipeline.append(SkRasterPipeline::clamp_0);
+        shaderPipeline.append(SkRasterPipeline::clamp_a);
+
+        bool is_opaque = compute_is_opaque(colors, count),
+             wants_dither = paint.isDither();
+        auto blitter = SkCreateRasterPipelineBlitter(fDst, paint, *fMatrix, shaderPipeline,
+                                                     is_opaque, wants_dither, &alloc);
+        SkASSERT(!blitter->isNullBlitter());
+
+        // setup our state and function pointer for iterating triangles
+        VertState       state(count, indices, indexCount);
+        VertState::Proc vertProc = state.chooseProc(vmode);
+
+        while (vertProc(&state)) {
+            SkPoint tmp[] = {
+                devVerts[state.f0], devVerts[state.f1], devVerts[state.f2]
+            };
+            if (update_tricolor_matrix(ctmInv, vertices, dstColors, state.f0, state.f1, state.f2,
+                                       &matrix43)) {
+                SkScan::FillTriangle(tmp, *fRC, blitter);
+            }
+        }
+        return;
+    }
+
     auto triShader = sk_make_sp<SkTriColorShader>();
     SkPaint p(paint);
 
index 1579b45..a5f47c2 100644 (file)
@@ -82,7 +82,7 @@
     M(exclusion) M(hardlight) M(lighten) M(overlay) M(softlight) \
     M(hue) M(saturation) M(color) M(luminosity)                  \
     M(luminance_to_alpha)                                        \
-    M(matrix_2x3) M(matrix_3x4) M(matrix_4x5)                    \
+    M(matrix_2x3) M(matrix_3x4) M(matrix_4x5) M(matrix_4x3)      \
     M(matrix_perspective)                                        \
     M(parametric_r) M(parametric_g) M(parametric_b)              \
     M(parametric_a)                                              \
index 363324b..b62b43f 100644 (file)
@@ -22,6 +22,10 @@ class SkRasterPipelineBlitter : public SkBlitter {
 public:
     static SkBlitter* Create(const SkPixmap&, const SkPaint&, const SkMatrix& ctm,
                              SkArenaAlloc*);
+    static SkBlitter* Create(const SkPixmap&, const SkPaint&, const SkMatrix& ctm,
+                             const SkRasterPipeline& shaderPipeline,
+                             bool is_opaque, bool wants_dither,
+                             SkArenaAlloc*);
 
     SkRasterPipelineBlitter(SkPixmap dst, SkBlendMode blend, SkPM4f paintColor)
         : fDst(dst)
@@ -38,6 +42,9 @@ public:
     // blits using something like a SkRasterPipeline::runFew() method.
 
 private:
+    void finishBuilding(const SkPaint& paint, const SkMatrix& ctm, bool is_oapque,
+                        bool is_constant, bool wants_dither, SkArenaAlloc* alloc);
+
     void append_load_d(SkRasterPipeline*) const;
     void append_blend (SkRasterPipeline*) const;
     void maybe_clamp  (SkRasterPipeline*) const;
@@ -85,48 +92,100 @@ SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst,
             paint.getBlendMode(),
             SkPM4f_from_SkColor(paint.getColor(), dst.colorSpace()));
 
-
-    SkBlendMode*      blend       = &blitter->fBlend;
     SkPM4f*           paintColor  = &blitter->fPaintColor;
     SkRasterPipeline* pipeline    = &blitter->fShader;
+    SkShader*         shader      = paint.getShader();
 
-    SkShader*      shader      = paint.getShader();
-    SkColorFilter* colorFilter = paint.getColorFilter();
-
-    // TODO: Think more about under what conditions we dither:
-    //   - if we're drawing anything into 565 and the user has asked us to dither, or
-    //   - if we're drawing a gradient into 565 or 8888.
-    if ((paint.isDither() && dst.info().colorType() == kRGB_565_SkColorType) ||
-        (shader && shader->asAGradient(nullptr) >= SkShader::kLinear_GradientType)) {
-        switch (dst.info().colorType()) {
-            default:                     blitter->fDitherCtx.rate =     0.0f; break;
-            case   kRGB_565_SkColorType: blitter->fDitherCtx.rate =  1/63.0f; break;
-            case kRGBA_8888_SkColorType:
-            case kBGRA_8888_SkColorType: blitter->fDitherCtx.rate = 1/255.0f; break;
-        }
-    }
+    bool is_opaque    = paintColor->a() == 1.0f,
+         is_constant  = true,
+         wants_dither = false;
 
-    bool is_opaque   = paintColor->a() == 1.0f,
-         is_constant = blitter->fDitherCtx.rate == 0.0f;
     if (shader) {
         pipeline->append(SkRasterPipeline::seed_shader, &blitter->fCurrentY);
         if (!shader->appendStages(pipeline, dst.colorSpace(), alloc, ctm, paint)) {
             // When a shader fails to append stages, it means it has vetoed drawing entirely.
             return alloc->make<SkNullBlitter>();
         }
+
         if (!is_opaque) {
-            pipeline->append(SkRasterPipeline::scale_1_float,
-                             &paintColor->fVec[SkPM4f::A]);
+            pipeline->append(SkRasterPipeline::scale_1_float, &paintColor->fVec[SkPM4f::A]);
         }
+        is_opaque    = is_opaque && shader->isOpaque();
+        is_constant  = shader->isConstant();
+        wants_dither = shader->asAGradient(nullptr) >= SkShader::kLinear_GradientType;
 
-        is_opaque   = is_opaque   && shader->isOpaque();
-        is_constant = is_constant && shader->isConstant();
     } else {
         pipeline->append(SkRasterPipeline::constant_color, paintColor);
     }
 
+    blitter->finishBuilding(paint, ctm, is_opaque, is_constant, wants_dither, alloc);
+    return blitter;
+}
+
+SkBlitter* SkCreateRasterPipelineBlitter(const SkPixmap& dst,
+                                         const SkPaint& paint,
+                                         const SkMatrix& ctm,
+                                         const SkRasterPipeline& shaderPipeline,
+                                         bool is_opaque, bool wants_dither,
+                                         SkArenaAlloc* alloc) {
+    return SkRasterPipelineBlitter::Create(dst, paint, ctm, shaderPipeline,
+                                           is_opaque, wants_dither, alloc);
+}
+
+SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst,
+                                           const SkPaint& paint,
+                                           const SkMatrix& ctm,
+                                           const SkRasterPipeline& shaderPipeline,
+                                           bool is_opaque, bool wants_dither,
+                                           SkArenaAlloc* alloc) {
+    auto blitter = alloc->make<SkRasterPipelineBlitter>(
+            dst,
+            paint.getBlendMode(),
+            SkPM4f_from_SkColor(paint.getColor(), dst.colorSpace()));
+
+    bool              is_constant = false;   // we figure a custom shaderPipeline is never constant
+    SkPM4f*           paintColor  = &blitter->fPaintColor;
+    SkRasterPipeline* pipeline    = &blitter->fShader;
+
+    pipeline->append(SkRasterPipeline::seed_shader, &blitter->fCurrentY);
+    pipeline->extend(shaderPipeline);
+
+    if (paintColor->a() != 1.0f) {
+        pipeline->append(SkRasterPipeline::scale_1_float, &paintColor->fVec[SkPM4f::A]);
+        is_opaque = false;
+    }
+
+    blitter->finishBuilding(paint, ctm, is_opaque, is_constant, wants_dither, alloc);
+    return blitter;
+}
+
+void SkRasterPipelineBlitter::finishBuilding(const SkPaint& paint, const SkMatrix& ctm,
+                                             bool is_opaque, bool is_constant, bool wants_dither,
+                                             SkArenaAlloc* alloc) {
+    SkBlendMode*      blend       = &fBlend;
+    SkPM4f*           paintColor  = &fPaintColor;
+    SkRasterPipeline* pipeline    = &fShader;
+    SkColorFilter*    colorFilter = paint.getColorFilter();
+
+    SkASSERT(fDitherCtx.rate == 0);
+    if ((paint.isDither() && fDst.info().colorType() == kRGB_565_SkColorType) || wants_dither) {
+        switch (fDst.info().colorType()) {
+            case kRGB_565_SkColorType:
+                fDitherCtx.rate =  1/63.0f;
+                is_constant = false;
+                break;
+            case kRGBA_8888_SkColorType:
+            case kBGRA_8888_SkColorType:
+                fDitherCtx.rate = 1/255.0f;
+                is_constant = false;
+                break;
+            default:
+                break;
+        }
+    }
+
     if (colorFilter) {
-        colorFilter->appendStages(pipeline, dst.colorSpace(), alloc, is_opaque);
+        colorFilter->appendStages(pipeline, fDst.colorSpace(), alloc, is_opaque);
         is_opaque = is_opaque && (colorFilter->getFlags() & SkColorFilter::kAlphaUnchanged_Flag);
     }
 
@@ -147,14 +206,12 @@ SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst,
     if (is_constant && *blend == SkBlendMode::kSrc) {
         SkRasterPipeline p;
         p.extend(*pipeline);
-        blitter->fDstPtr = &blitter->fMemsetColor;
-        blitter->append_store(&p);
+        fDstPtr = &fMemsetColor;
+        this->append_store(&p);
         p.run(0,1);
 
-        blitter->fCanMemsetInBlitH = true;
+        fCanMemsetInBlitH = true;
     }
-
-    return blitter;
 }
 
 void SkRasterPipelineBlitter::append_load_d(SkRasterPipeline* p) const {
index 6fe7c66..fb90560 100644 (file)
@@ -3504,6 +3504,39 @@ _sk_matrix_4x5_aarch64:
   .long  0x4eb31e63                          // mov           v3.16b, v19.16b
   .long  0xd61f0060                          // br            x3
 
+HIDDEN _sk_matrix_4x3_aarch64
+.globl _sk_matrix_4x3_aarch64
+FUNCTION(_sk_matrix_4x3_aarch64)
+_sk_matrix_4x3_aarch64:
+  .long  0xf9400028                          // ldr           x8, [x1]
+  .long  0xaa0803e9                          // mov           x9, x8
+  .long  0x9100810a                          // add           x10, x8, #0x20
+  .long  0x4ddfc932                          // ld1r          {v18.4s}, [x9], #4
+  .long  0x4d40c950                          // ld1r          {v16.4s}, [x10]
+  .long  0x9100910a                          // add           x10, x8, #0x24
+  .long  0x4d40c951                          // ld1r          {v17.4s}, [x10]
+  .long  0x9100a10a                          // add           x10, x8, #0x28
+  .long  0x2d425113                          // ldp           s19, s20, [x8, #16]
+  .long  0x4d40c942                          // ld1r          {v2.4s}, [x10]
+  .long  0x9100b10a                          // add           x10, x8, #0x2c
+  .long  0x2d435915                          // ldp           s21, s22, [x8, #24]
+  .long  0x4d40c943                          // ld1r          {v3.4s}, [x10]
+  .long  0x4f931030                          // fmla          v16.4s, v1.4s, v19.s[0]
+  .long  0x4e20ce50                          // fmla          v16.4s, v18.4s, v0.4s
+  .long  0xbd400132                          // ldr           s18, [x9]
+  .long  0x4f941031                          // fmla          v17.4s, v1.4s, v20.s[0]
+  .long  0x4f951022                          // fmla          v2.4s, v1.4s, v21.s[0]
+  .long  0x4f961023                          // fmla          v3.4s, v1.4s, v22.s[0]
+  .long  0x2d414d01                          // ldp           s1, s19, [x8, #8]
+  .long  0xf9400423                          // ldr           x3, [x1, #8]
+  .long  0x4f921011                          // fmla          v17.4s, v0.4s, v18.s[0]
+  .long  0x91004021                          // add           x1, x1, #0x10
+  .long  0x4f811002                          // fmla          v2.4s, v0.4s, v1.s[0]
+  .long  0x4f931003                          // fmla          v3.4s, v0.4s, v19.s[0]
+  .long  0x4eb01e00                          // mov           v0.16b, v16.16b
+  .long  0x4eb11e21                          // mov           v1.16b, v17.16b
+  .long  0xd61f0060                          // br            x3
+
 HIDDEN _sk_matrix_perspective_aarch64
 .globl _sk_matrix_perspective_aarch64
 FUNCTION(_sk_matrix_perspective_aarch64)
@@ -3642,7 +3675,7 @@ _sk_gradient_aarch64:
   .long  0x6f00e411                          // movi          v17.2d, #0x0
   .long  0xf9400109                          // ldr           x9, [x8]
   .long  0xf100093f                          // cmp           x9, #0x2
-  .long  0x540001c3                          // b.cc          30b8 <sk_gradient_aarch64+0x58>  // b.lo, b.ul, b.last
+  .long  0x540001c3                          // b.cc          3128 <sk_gradient_aarch64+0x58>  // b.lo, b.ul, b.last
   .long  0xf940250a                          // ldr           x10, [x8, #72]
   .long  0xd1000529                          // sub           x9, x9, #0x1
   .long  0x6f00e401                          // movi          v1.2d, #0x0
@@ -3653,7 +3686,7 @@ _sk_gradient_aarch64:
   .long  0x6e23e403                          // fcmge         v3.4s, v0.4s, v3.4s
   .long  0x4e221c63                          // and           v3.16b, v3.16b, v2.16b
   .long  0x4ea18461                          // add           v1.4s, v3.4s, v1.4s
-  .long  0xb5ffff69                          // cbnz          x9, 3098 <sk_gradient_aarch64+0x38>
+  .long  0xb5ffff69                          // cbnz          x9, 3108 <sk_gradient_aarch64+0x38>
   .long  0x6f20a431                          // uxtl2         v17.2d, v1.4s
   .long  0x2f20a421                          // uxtl          v1.2d, v1.2s
   .long  0xa940b10a                          // ldp           x10, x12, [x8, #8]
@@ -8009,6 +8042,49 @@ _sk_matrix_4x5_vfp4:
   .long  0xe8bd4010                          // pop           {r4, lr}
   .long  0xe12fff1c                          // bx            ip
 
+HIDDEN _sk_matrix_4x3_vfp4
+.globl _sk_matrix_4x3_vfp4
+FUNCTION(_sk_matrix_4x3_vfp4)
+_sk_matrix_4x3_vfp4:
+  .long  0xe92d4010                          // push          {r4, lr}
+  .long  0xe8911008                          // ldm           r1, {r3, ip}
+  .long  0xe2811008                          // add           r1, r1, #8
+  .long  0xe2834018                          // add           r4, r3, #24
+  .long  0xe1a0e003                          // mov           lr, r3
+  .long  0xf4e43c9f                          // vld1.32       {d19[]}, [r4 :32]
+  .long  0xe2834028                          // add           r4, r3, #40
+  .long  0xf4a42c9f                          // vld1.32       {d2[]}, [r4 :32]
+  .long  0xe2834014                          // add           r4, r3, #20
+  .long  0xf2012c33                          // vfma.f32      d2, d1, d19
+  .long  0xf4e44c9f                          // vld1.32       {d20[]}, [r4 :32]
+  .long  0xe2834010                          // add           r4, r3, #16
+  .long  0xf4e41c9f                          // vld1.32       {d17[]}, [r4 :32]
+  .long  0xe2834020                          // add           r4, r3, #32
+  .long  0xf4e40c9f                          // vld1.32       {d16[]}, [r4 :32]
+  .long  0xe283401c                          // add           r4, r3, #28
+  .long  0xf2410c31                          // vfma.f32      d16, d1, d17
+  .long  0xf4e45c9f                          // vld1.32       {d21[]}, [r4 :32]
+  .long  0xe283402c                          // add           r4, r3, #44
+  .long  0xf4a43c9f                          // vld1.32       {d3[]}, [r4 :32]
+  .long  0xe2834024                          // add           r4, r3, #36
+  .long  0xf2013c35                          // vfma.f32      d3, d1, d21
+  .long  0xf4e41c9f                          // vld1.32       {d17[]}, [r4 :32]
+  .long  0xe2834008                          // add           r4, r3, #8
+  .long  0xf2411c34                          // vfma.f32      d17, d1, d20
+  .long  0xe283300c                          // add           r3, r3, #12
+  .long  0xf4ee2c9d                          // vld1.32       {d18[]}, [lr :32]!
+  .long  0xf2400c32                          // vfma.f32      d16, d0, d18
+  .long  0xf4ee4c9f                          // vld1.32       {d20[]}, [lr :32]
+  .long  0xf4e43c9f                          // vld1.32       {d19[]}, [r4 :32]
+  .long  0xf2002c33                          // vfma.f32      d2, d0, d19
+  .long  0xf2401c34                          // vfma.f32      d17, d0, d20
+  .long  0xf4e32c9f                          // vld1.32       {d18[]}, [r3 :32]
+  .long  0xf2003c32                          // vfma.f32      d3, d0, d18
+  .long  0xf22001b0                          // vorr          d0, d16, d16
+  .long  0xf22111b1                          // vorr          d1, d17, d17
+  .long  0xe8bd4010                          // pop           {r4, lr}
+  .long  0xe12fff1c                          // bx            ip
+
 HIDDEN _sk_matrix_perspective_vfp4
 .globl _sk_matrix_perspective_vfp4
 FUNCTION(_sk_matrix_perspective_vfp4)
@@ -8123,7 +8199,7 @@ _sk_gradient_vfp4:
   .long  0xf2c00010                          // vmov.i32      d16, #0
   .long  0xe59c3000                          // ldr           r3, [ip]
   .long  0xe3530002                          // cmp           r3, #2
-  .long  0x3a00000b                          // bcc           3654 <sk_gradient_vfp4+0x50>
+  .long  0x3a00000b                          // bcc           36ec <sk_gradient_vfp4+0x50>
   .long  0xe59c4024                          // ldr           r4, [ip, #36]
   .long  0xf2c01010                          // vmov.i32      d17, #0
   .long  0xf2c02011                          // vmov.i32      d18, #1
@@ -8135,7 +8211,7 @@ _sk_gradient_vfp4:
   .long  0xf3403e23                          // vcge.f32      d19, d0, d19
   .long  0xf35231b1                          // vbsl          d19, d18, d17
   .long  0xf26308a0                          // vadd.i32      d16, d19, d16
-  .long  0x1afffff9                          // bne           363c <sk_gradient_vfp4+0x38>
+  .long  0x1afffff9                          // bne           36d4 <sk_gradient_vfp4+0x38>
   .long  0xee303b90                          // vmov.32       r3, d16[1]
   .long  0xe59c7010                          // ldr           r7, [ip, #16]
   .long  0xee10eb90                          // vmov.32       lr, d16[0]
@@ -8728,14 +8804,14 @@ _sk_seed_shader_hsw:
   .byte  197,249,110,199                     // vmovd         %edi,%xmm0
   .byte  196,226,125,88,192                  // vpbroadcastd  %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,221,70,0,0        // vbroadcastss  0x46dd(%rip),%ymm1        # 47a0 <_sk_callback_hsw+0x128>
+  .byte  196,226,125,24,13,89,71,0,0         // vbroadcastss  0x4759(%rip),%ymm1        # 481c <_sk_callback_hsw+0x127>
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
   .byte  197,252,88,2                        // vaddps        (%rdx),%ymm0,%ymm0
   .byte  196,226,125,24,16                   // vbroadcastss  (%rax),%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  197,236,88,201                      // vaddps        %ymm1,%ymm2,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,21,193,70,0,0        // vbroadcastss  0x46c1(%rip),%ymm2        # 47a4 <_sk_callback_hsw+0x12c>
+  .byte  196,226,125,24,21,61,71,0,0         // vbroadcastss  0x473d(%rip),%ymm2        # 4820 <_sk_callback_hsw+0x12b>
   .byte  197,228,87,219                      // vxorps        %ymm3,%ymm3,%ymm3
   .byte  197,220,87,228                      // vxorps        %ymm4,%ymm4,%ymm4
   .byte  197,212,87,237                      // vxorps        %ymm5,%ymm5,%ymm5
@@ -8756,13 +8832,13 @@ _sk_dither_hsw:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  196,66,125,88,8                     // vpbroadcastd  (%r8),%ymm9
   .byte  196,65,61,239,201                   // vpxor         %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,88,21,128,70,0,0         // vpbroadcastd  0x4680(%rip),%ymm10        # 47a8 <_sk_callback_hsw+0x130>
+  .byte  196,98,125,88,21,252,70,0,0         // vpbroadcastd  0x46fc(%rip),%ymm10        # 4824 <_sk_callback_hsw+0x12f>
   .byte  196,65,53,219,218                   // vpand         %ymm10,%ymm9,%ymm11
   .byte  196,193,37,114,243,5                // vpslld        $0x5,%ymm11,%ymm11
   .byte  196,65,61,219,210                   // vpand         %ymm10,%ymm8,%ymm10
   .byte  196,193,45,114,242,4                // vpslld        $0x4,%ymm10,%ymm10
-  .byte  196,98,125,88,37,101,70,0,0         // vpbroadcastd  0x4665(%rip),%ymm12        # 47ac <_sk_callback_hsw+0x134>
-  .byte  196,98,125,88,45,96,70,0,0          // vpbroadcastd  0x4660(%rip),%ymm13        # 47b0 <_sk_callback_hsw+0x138>
+  .byte  196,98,125,88,37,225,70,0,0         // vpbroadcastd  0x46e1(%rip),%ymm12        # 4828 <_sk_callback_hsw+0x133>
+  .byte  196,98,125,88,45,220,70,0,0         // vpbroadcastd  0x46dc(%rip),%ymm13        # 482c <_sk_callback_hsw+0x137>
   .byte  196,65,53,219,245                   // vpand         %ymm13,%ymm9,%ymm14
   .byte  196,193,13,114,246,2                // vpslld        $0x2,%ymm14,%ymm14
   .byte  196,65,61,219,237                   // vpand         %ymm13,%ymm8,%ymm13
@@ -8777,8 +8853,8 @@ _sk_dither_hsw:
   .byte  196,65,61,235,194                   // vpor          %ymm10,%ymm8,%ymm8
   .byte  196,65,61,235,193                   // vpor          %ymm9,%ymm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,18,70,0,0          // vbroadcastss  0x4612(%rip),%ymm9        # 47b4 <_sk_callback_hsw+0x13c>
-  .byte  196,98,125,24,21,13,70,0,0          // vbroadcastss  0x460d(%rip),%ymm10        # 47b8 <_sk_callback_hsw+0x140>
+  .byte  196,98,125,24,13,142,70,0,0         // vbroadcastss  0x468e(%rip),%ymm9        # 4830 <_sk_callback_hsw+0x13b>
+  .byte  196,98,125,24,21,137,70,0,0         // vbroadcastss  0x4689(%rip),%ymm10        # 4834 <_sk_callback_hsw+0x13f>
   .byte  196,66,61,184,209                   // vfmadd231ps   %ymm9,%ymm8,%ymm10
   .byte  196,98,125,24,64,8                  // vbroadcastss  0x8(%rax),%ymm8
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
@@ -8847,7 +8923,7 @@ HIDDEN _sk_srcatop_hsw
 FUNCTION(_sk_srcatop_hsw)
 _sk_srcatop_hsw:
   .byte  197,252,89,199                      // vmulps        %ymm7,%ymm0,%ymm0
-  .byte  196,98,125,24,5,100,69,0,0          // vbroadcastss  0x4564(%rip),%ymm8        # 47bc <_sk_callback_hsw+0x144>
+  .byte  196,98,125,24,5,224,69,0,0          // vbroadcastss  0x45e0(%rip),%ymm8        # 4838 <_sk_callback_hsw+0x143>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,226,61,184,196                  // vfmadd231ps   %ymm4,%ymm8,%ymm0
   .byte  197,244,89,207                      // vmulps        %ymm7,%ymm1,%ymm1
@@ -8863,7 +8939,7 @@ HIDDEN _sk_dstatop_hsw
 .globl _sk_dstatop_hsw
 FUNCTION(_sk_dstatop_hsw)
 _sk_dstatop_hsw:
-  .byte  196,98,125,24,5,55,69,0,0           // vbroadcastss  0x4537(%rip),%ymm8        # 47c0 <_sk_callback_hsw+0x148>
+  .byte  196,98,125,24,5,179,69,0,0          // vbroadcastss  0x45b3(%rip),%ymm8        # 483c <_sk_callback_hsw+0x147>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  196,226,101,184,196                 // vfmadd231ps   %ymm4,%ymm3,%ymm0
@@ -8902,7 +8978,7 @@ HIDDEN _sk_srcout_hsw
 .globl _sk_srcout_hsw
 FUNCTION(_sk_srcout_hsw)
 _sk_srcout_hsw:
-  .byte  196,98,125,24,5,222,68,0,0          // vbroadcastss  0x44de(%rip),%ymm8        # 47c4 <_sk_callback_hsw+0x14c>
+  .byte  196,98,125,24,5,90,69,0,0           // vbroadcastss  0x455a(%rip),%ymm8        # 4840 <_sk_callback_hsw+0x14b>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -8915,7 +8991,7 @@ HIDDEN _sk_dstout_hsw
 .globl _sk_dstout_hsw
 FUNCTION(_sk_dstout_hsw)
 _sk_dstout_hsw:
-  .byte  196,226,125,24,5,193,68,0,0         // vbroadcastss  0x44c1(%rip),%ymm0        # 47c8 <_sk_callback_hsw+0x150>
+  .byte  196,226,125,24,5,61,69,0,0          // vbroadcastss  0x453d(%rip),%ymm0        # 4844 <_sk_callback_hsw+0x14f>
   .byte  197,252,92,219                      // vsubps        %ymm3,%ymm0,%ymm3
   .byte  197,228,89,196                      // vmulps        %ymm4,%ymm3,%ymm0
   .byte  197,228,89,205                      // vmulps        %ymm5,%ymm3,%ymm1
@@ -8928,7 +9004,7 @@ HIDDEN _sk_srcover_hsw
 .globl _sk_srcover_hsw
 FUNCTION(_sk_srcover_hsw)
 _sk_srcover_hsw:
-  .byte  196,98,125,24,5,164,68,0,0          // vbroadcastss  0x44a4(%rip),%ymm8        # 47cc <_sk_callback_hsw+0x154>
+  .byte  196,98,125,24,5,32,69,0,0           // vbroadcastss  0x4520(%rip),%ymm8        # 4848 <_sk_callback_hsw+0x153>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,194,93,184,192                  // vfmadd231ps   %ymm8,%ymm4,%ymm0
   .byte  196,194,85,184,200                  // vfmadd231ps   %ymm8,%ymm5,%ymm1
@@ -8941,7 +9017,7 @@ HIDDEN _sk_dstover_hsw
 .globl _sk_dstover_hsw
 FUNCTION(_sk_dstover_hsw)
 _sk_dstover_hsw:
-  .byte  196,98,125,24,5,131,68,0,0          // vbroadcastss  0x4483(%rip),%ymm8        # 47d0 <_sk_callback_hsw+0x158>
+  .byte  196,98,125,24,5,255,68,0,0          // vbroadcastss  0x44ff(%rip),%ymm8        # 484c <_sk_callback_hsw+0x157>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  196,226,61,168,196                  // vfmadd213ps   %ymm4,%ymm8,%ymm0
   .byte  196,226,61,168,205                  // vfmadd213ps   %ymm5,%ymm8,%ymm1
@@ -8965,7 +9041,7 @@ HIDDEN _sk_multiply_hsw
 .globl _sk_multiply_hsw
 FUNCTION(_sk_multiply_hsw)
 _sk_multiply_hsw:
-  .byte  196,98,125,24,5,78,68,0,0           // vbroadcastss  0x444e(%rip),%ymm8        # 47d4 <_sk_callback_hsw+0x15c>
+  .byte  196,98,125,24,5,202,68,0,0          // vbroadcastss  0x44ca(%rip),%ymm8        # 4850 <_sk_callback_hsw+0x15b>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,208                       // vmulps        %ymm0,%ymm9,%ymm10
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -9013,7 +9089,7 @@ HIDDEN _sk_xor__hsw
 .globl _sk_xor__hsw
 FUNCTION(_sk_xor__hsw)
 _sk_xor__hsw:
-  .byte  196,98,125,24,5,201,67,0,0          // vbroadcastss  0x43c9(%rip),%ymm8        # 47d8 <_sk_callback_hsw+0x160>
+  .byte  196,98,125,24,5,69,68,0,0           // vbroadcastss  0x4445(%rip),%ymm8        # 4854 <_sk_callback_hsw+0x15f>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -9047,7 +9123,7 @@ _sk_darken_hsw:
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,95,209                  // vmaxps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,81,67,0,0           // vbroadcastss  0x4351(%rip),%ymm8        # 47dc <_sk_callback_hsw+0x164>
+  .byte  196,98,125,24,5,205,67,0,0          // vbroadcastss  0x43cd(%rip),%ymm8        # 4858 <_sk_callback_hsw+0x163>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,194,69,184,216                  // vfmadd231ps   %ymm8,%ymm7,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -9072,7 +9148,7 @@ _sk_lighten_hsw:
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,0,67,0,0            // vbroadcastss  0x4300(%rip),%ymm8        # 47e0 <_sk_callback_hsw+0x168>
+  .byte  196,98,125,24,5,124,67,0,0          // vbroadcastss  0x437c(%rip),%ymm8        # 485c <_sk_callback_hsw+0x167>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,194,69,184,216                  // vfmadd231ps   %ymm8,%ymm7,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -9100,7 +9176,7 @@ _sk_difference_hsw:
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,163,66,0,0          // vbroadcastss  0x42a3(%rip),%ymm8        # 47e4 <_sk_callback_hsw+0x16c>
+  .byte  196,98,125,24,5,31,67,0,0           // vbroadcastss  0x431f(%rip),%ymm8        # 4860 <_sk_callback_hsw+0x16b>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,194,69,184,216                  // vfmadd231ps   %ymm8,%ymm7,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -9122,7 +9198,7 @@ _sk_exclusion_hsw:
   .byte  197,236,89,214                      // vmulps        %ymm6,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,97,66,0,0           // vbroadcastss  0x4261(%rip),%ymm8        # 47e8 <_sk_callback_hsw+0x170>
+  .byte  196,98,125,24,5,221,66,0,0          // vbroadcastss  0x42dd(%rip),%ymm8        # 4864 <_sk_callback_hsw+0x16f>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  196,194,69,184,216                  // vfmadd231ps   %ymm8,%ymm7,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -9132,7 +9208,7 @@ HIDDEN _sk_colorburn_hsw
 .globl _sk_colorburn_hsw
 FUNCTION(_sk_colorburn_hsw)
 _sk_colorburn_hsw:
-  .byte  196,98,125,24,5,79,66,0,0           // vbroadcastss  0x424f(%rip),%ymm8        # 47ec <_sk_callback_hsw+0x174>
+  .byte  196,98,125,24,5,203,66,0,0          // vbroadcastss  0x42cb(%rip),%ymm8        # 4868 <_sk_callback_hsw+0x173>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,216                       // vmulps        %ymm0,%ymm9,%ymm11
   .byte  196,65,44,87,210                    // vxorps        %ymm10,%ymm10,%ymm10
@@ -9190,7 +9266,7 @@ HIDDEN _sk_colordodge_hsw
 FUNCTION(_sk_colordodge_hsw)
 _sk_colordodge_hsw:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
-  .byte  196,98,125,24,13,90,65,0,0          // vbroadcastss  0x415a(%rip),%ymm9        # 47f0 <_sk_callback_hsw+0x178>
+  .byte  196,98,125,24,13,214,65,0,0         // vbroadcastss  0x41d6(%rip),%ymm9        # 486c <_sk_callback_hsw+0x177>
   .byte  197,52,92,215                       // vsubps        %ymm7,%ymm9,%ymm10
   .byte  197,44,89,216                       // vmulps        %ymm0,%ymm10,%ymm11
   .byte  197,52,92,203                       // vsubps        %ymm3,%ymm9,%ymm9
@@ -9243,7 +9319,7 @@ HIDDEN _sk_hardlight_hsw
 .globl _sk_hardlight_hsw
 FUNCTION(_sk_hardlight_hsw)
 _sk_hardlight_hsw:
-  .byte  196,98,125,24,5,123,64,0,0          // vbroadcastss  0x407b(%rip),%ymm8        # 47f4 <_sk_callback_hsw+0x17c>
+  .byte  196,98,125,24,5,247,64,0,0          // vbroadcastss  0x40f7(%rip),%ymm8        # 4870 <_sk_callback_hsw+0x17b>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,216                       // vmulps        %ymm0,%ymm10,%ymm11
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -9294,7 +9370,7 @@ HIDDEN _sk_overlay_hsw
 .globl _sk_overlay_hsw
 FUNCTION(_sk_overlay_hsw)
 _sk_overlay_hsw:
-  .byte  196,98,125,24,5,179,63,0,0          // vbroadcastss  0x3fb3(%rip),%ymm8        # 47f8 <_sk_callback_hsw+0x180>
+  .byte  196,98,125,24,5,47,64,0,0           // vbroadcastss  0x402f(%rip),%ymm8        # 4874 <_sk_callback_hsw+0x17f>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,216                       // vmulps        %ymm0,%ymm10,%ymm11
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -9355,10 +9431,10 @@ _sk_softlight_hsw:
   .byte  196,65,20,88,197                    // vaddps        %ymm13,%ymm13,%ymm8
   .byte  196,65,60,88,192                    // vaddps        %ymm8,%ymm8,%ymm8
   .byte  196,66,61,168,192                   // vfmadd213ps   %ymm8,%ymm8,%ymm8
-  .byte  196,98,125,24,29,190,62,0,0         // vbroadcastss  0x3ebe(%rip),%ymm11        # 4800 <_sk_callback_hsw+0x188>
+  .byte  196,98,125,24,29,58,63,0,0          // vbroadcastss  0x3f3a(%rip),%ymm11        # 487c <_sk_callback_hsw+0x187>
   .byte  196,65,20,88,227                    // vaddps        %ymm11,%ymm13,%ymm12
   .byte  196,65,28,89,192                    // vmulps        %ymm8,%ymm12,%ymm8
-  .byte  196,98,125,24,37,175,62,0,0         // vbroadcastss  0x3eaf(%rip),%ymm12        # 4804 <_sk_callback_hsw+0x18c>
+  .byte  196,98,125,24,37,43,63,0,0          // vbroadcastss  0x3f2b(%rip),%ymm12        # 4880 <_sk_callback_hsw+0x18b>
   .byte  196,66,21,184,196                   // vfmadd231ps   %ymm12,%ymm13,%ymm8
   .byte  196,65,124,82,245                   // vrsqrtps      %ymm13,%ymm14
   .byte  196,65,124,83,246                   // vrcpps        %ymm14,%ymm14
@@ -9368,7 +9444,7 @@ _sk_softlight_hsw:
   .byte  197,4,194,255,2                     // vcmpleps      %ymm7,%ymm15,%ymm15
   .byte  196,67,13,74,240,240                // vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   .byte  197,116,88,249                      // vaddps        %ymm1,%ymm1,%ymm15
-  .byte  196,98,125,24,5,114,62,0,0          // vbroadcastss  0x3e72(%rip),%ymm8        # 47fc <_sk_callback_hsw+0x184>
+  .byte  196,98,125,24,5,238,62,0,0          // vbroadcastss  0x3eee(%rip),%ymm8        # 4878 <_sk_callback_hsw+0x183>
   .byte  196,65,60,92,237                    // vsubps        %ymm13,%ymm8,%ymm13
   .byte  197,132,92,195                      // vsubps        %ymm3,%ymm15,%ymm0
   .byte  196,98,125,168,235                  // vfmadd213ps   %ymm3,%ymm0,%ymm13
@@ -9481,11 +9557,11 @@ _sk_hue_hsw:
   .byte  196,65,28,89,210                    // vmulps        %ymm10,%ymm12,%ymm10
   .byte  196,65,44,94,214                    // vdivps        %ymm14,%ymm10,%ymm10
   .byte  196,67,45,74,224,240                // vblendvps     %ymm15,%ymm8,%ymm10,%ymm12
-  .byte  196,98,125,24,53,118,60,0,0         // vbroadcastss  0x3c76(%rip),%ymm14        # 4808 <_sk_callback_hsw+0x190>
-  .byte  196,98,125,24,61,113,60,0,0         // vbroadcastss  0x3c71(%rip),%ymm15        # 480c <_sk_callback_hsw+0x194>
+  .byte  196,98,125,24,53,242,60,0,0         // vbroadcastss  0x3cf2(%rip),%ymm14        # 4884 <_sk_callback_hsw+0x18f>
+  .byte  196,98,125,24,61,237,60,0,0         // vbroadcastss  0x3ced(%rip),%ymm15        # 4888 <_sk_callback_hsw+0x193>
   .byte  196,65,84,89,239                    // vmulps        %ymm15,%ymm5,%ymm13
   .byte  196,66,93,184,238                   // vfmadd231ps   %ymm14,%ymm4,%ymm13
-  .byte  196,226,125,24,5,98,60,0,0          // vbroadcastss  0x3c62(%rip),%ymm0        # 4810 <_sk_callback_hsw+0x198>
+  .byte  196,226,125,24,5,222,60,0,0         // vbroadcastss  0x3cde(%rip),%ymm0        # 488c <_sk_callback_hsw+0x197>
   .byte  196,98,77,184,232                   // vfmadd231ps   %ymm0,%ymm6,%ymm13
   .byte  196,65,116,89,215                   // vmulps        %ymm15,%ymm1,%ymm10
   .byte  196,66,53,184,214                   // vfmadd231ps   %ymm14,%ymm9,%ymm10
@@ -9540,7 +9616,7 @@ _sk_hue_hsw:
   .byte  196,193,124,95,192                  // vmaxps        %ymm8,%ymm0,%ymm0
   .byte  196,65,36,95,200                    // vmaxps        %ymm8,%ymm11,%ymm9
   .byte  196,65,116,95,192                   // vmaxps        %ymm8,%ymm1,%ymm8
-  .byte  196,226,125,24,13,79,59,0,0         // vbroadcastss  0x3b4f(%rip),%ymm1        # 4814 <_sk_callback_hsw+0x19c>
+  .byte  196,226,125,24,13,203,59,0,0        // vbroadcastss  0x3bcb(%rip),%ymm1        # 4890 <_sk_callback_hsw+0x19b>
   .byte  197,116,92,215                      // vsubps        %ymm7,%ymm1,%ymm10
   .byte  197,172,89,210                      // vmulps        %ymm2,%ymm10,%ymm2
   .byte  197,116,92,219                      // vsubps        %ymm3,%ymm1,%ymm11
@@ -9594,11 +9670,11 @@ _sk_saturation_hsw:
   .byte  196,65,28,89,210                    // vmulps        %ymm10,%ymm12,%ymm10
   .byte  196,65,44,94,214                    // vdivps        %ymm14,%ymm10,%ymm10
   .byte  196,67,45,74,224,240                // vblendvps     %ymm15,%ymm8,%ymm10,%ymm12
-  .byte  196,98,125,24,53,102,58,0,0         // vbroadcastss  0x3a66(%rip),%ymm14        # 4818 <_sk_callback_hsw+0x1a0>
-  .byte  196,98,125,24,61,97,58,0,0          // vbroadcastss  0x3a61(%rip),%ymm15        # 481c <_sk_callback_hsw+0x1a4>
+  .byte  196,98,125,24,53,226,58,0,0         // vbroadcastss  0x3ae2(%rip),%ymm14        # 4894 <_sk_callback_hsw+0x19f>
+  .byte  196,98,125,24,61,221,58,0,0         // vbroadcastss  0x3add(%rip),%ymm15        # 4898 <_sk_callback_hsw+0x1a3>
   .byte  196,65,84,89,239                    // vmulps        %ymm15,%ymm5,%ymm13
   .byte  196,66,93,184,238                   // vfmadd231ps   %ymm14,%ymm4,%ymm13
-  .byte  196,226,125,24,5,82,58,0,0          // vbroadcastss  0x3a52(%rip),%ymm0        # 4820 <_sk_callback_hsw+0x1a8>
+  .byte  196,226,125,24,5,206,58,0,0         // vbroadcastss  0x3ace(%rip),%ymm0        # 489c <_sk_callback_hsw+0x1a7>
   .byte  196,98,77,184,232                   // vfmadd231ps   %ymm0,%ymm6,%ymm13
   .byte  196,65,116,89,215                   // vmulps        %ymm15,%ymm1,%ymm10
   .byte  196,66,53,184,214                   // vfmadd231ps   %ymm14,%ymm9,%ymm10
@@ -9653,7 +9729,7 @@ _sk_saturation_hsw:
   .byte  196,193,124,95,192                  // vmaxps        %ymm8,%ymm0,%ymm0
   .byte  196,65,36,95,200                    // vmaxps        %ymm8,%ymm11,%ymm9
   .byte  196,65,116,95,192                   // vmaxps        %ymm8,%ymm1,%ymm8
-  .byte  196,226,125,24,13,63,57,0,0         // vbroadcastss  0x393f(%rip),%ymm1        # 4824 <_sk_callback_hsw+0x1ac>
+  .byte  196,226,125,24,13,187,57,0,0        // vbroadcastss  0x39bb(%rip),%ymm1        # 48a0 <_sk_callback_hsw+0x1ab>
   .byte  197,116,92,215                      // vsubps        %ymm7,%ymm1,%ymm10
   .byte  197,172,89,210                      // vmulps        %ymm2,%ymm10,%ymm2
   .byte  197,116,92,219                      // vsubps        %ymm3,%ymm1,%ymm11
@@ -9681,11 +9757,11 @@ _sk_color_hsw:
   .byte  197,108,89,199                      // vmulps        %ymm7,%ymm2,%ymm8
   .byte  197,116,89,215                      // vmulps        %ymm7,%ymm1,%ymm10
   .byte  197,52,89,223                       // vmulps        %ymm7,%ymm9,%ymm11
-  .byte  196,98,125,24,45,216,56,0,0         // vbroadcastss  0x38d8(%rip),%ymm13        # 4828 <_sk_callback_hsw+0x1b0>
-  .byte  196,98,125,24,53,211,56,0,0         // vbroadcastss  0x38d3(%rip),%ymm14        # 482c <_sk_callback_hsw+0x1b4>
+  .byte  196,98,125,24,45,84,57,0,0          // vbroadcastss  0x3954(%rip),%ymm13        # 48a4 <_sk_callback_hsw+0x1af>
+  .byte  196,98,125,24,53,79,57,0,0          // vbroadcastss  0x394f(%rip),%ymm14        # 48a8 <_sk_callback_hsw+0x1b3>
   .byte  196,65,84,89,230                    // vmulps        %ymm14,%ymm5,%ymm12
   .byte  196,66,93,184,229                   // vfmadd231ps   %ymm13,%ymm4,%ymm12
-  .byte  196,98,125,24,61,196,56,0,0         // vbroadcastss  0x38c4(%rip),%ymm15        # 4830 <_sk_callback_hsw+0x1b8>
+  .byte  196,98,125,24,61,64,57,0,0          // vbroadcastss  0x3940(%rip),%ymm15        # 48ac <_sk_callback_hsw+0x1b7>
   .byte  196,66,77,184,231                   // vfmadd231ps   %ymm15,%ymm6,%ymm12
   .byte  196,65,44,89,206                    // vmulps        %ymm14,%ymm10,%ymm9
   .byte  196,66,61,184,205                   // vfmadd231ps   %ymm13,%ymm8,%ymm9
@@ -9741,7 +9817,7 @@ _sk_color_hsw:
   .byte  196,193,116,95,206                  // vmaxps        %ymm14,%ymm1,%ymm1
   .byte  196,65,44,95,198                    // vmaxps        %ymm14,%ymm10,%ymm8
   .byte  196,65,124,95,206                   // vmaxps        %ymm14,%ymm0,%ymm9
-  .byte  196,226,125,24,5,166,55,0,0         // vbroadcastss  0x37a6(%rip),%ymm0        # 4834 <_sk_callback_hsw+0x1bc>
+  .byte  196,226,125,24,5,34,56,0,0          // vbroadcastss  0x3822(%rip),%ymm0        # 48b0 <_sk_callback_hsw+0x1bb>
   .byte  197,124,92,215                      // vsubps        %ymm7,%ymm0,%ymm10
   .byte  197,172,89,210                      // vmulps        %ymm2,%ymm10,%ymm2
   .byte  197,124,92,219                      // vsubps        %ymm3,%ymm0,%ymm11
@@ -9769,11 +9845,11 @@ _sk_luminosity_hsw:
   .byte  197,100,89,196                      // vmulps        %ymm4,%ymm3,%ymm8
   .byte  197,100,89,213                      // vmulps        %ymm5,%ymm3,%ymm10
   .byte  197,100,89,222                      // vmulps        %ymm6,%ymm3,%ymm11
-  .byte  196,98,125,24,45,63,55,0,0          // vbroadcastss  0x373f(%rip),%ymm13        # 4838 <_sk_callback_hsw+0x1c0>
-  .byte  196,98,125,24,53,58,55,0,0          // vbroadcastss  0x373a(%rip),%ymm14        # 483c <_sk_callback_hsw+0x1c4>
+  .byte  196,98,125,24,45,187,55,0,0         // vbroadcastss  0x37bb(%rip),%ymm13        # 48b4 <_sk_callback_hsw+0x1bf>
+  .byte  196,98,125,24,53,182,55,0,0         // vbroadcastss  0x37b6(%rip),%ymm14        # 48b8 <_sk_callback_hsw+0x1c3>
   .byte  196,65,116,89,230                   // vmulps        %ymm14,%ymm1,%ymm12
   .byte  196,66,109,184,229                  // vfmadd231ps   %ymm13,%ymm2,%ymm12
-  .byte  196,98,125,24,61,43,55,0,0          // vbroadcastss  0x372b(%rip),%ymm15        # 4840 <_sk_callback_hsw+0x1c8>
+  .byte  196,98,125,24,61,167,55,0,0         // vbroadcastss  0x37a7(%rip),%ymm15        # 48bc <_sk_callback_hsw+0x1c7>
   .byte  196,66,53,184,231                   // vfmadd231ps   %ymm15,%ymm9,%ymm12
   .byte  196,65,44,89,206                    // vmulps        %ymm14,%ymm10,%ymm9
   .byte  196,66,61,184,205                   // vfmadd231ps   %ymm13,%ymm8,%ymm9
@@ -9829,7 +9905,7 @@ _sk_luminosity_hsw:
   .byte  196,193,116,95,206                  // vmaxps        %ymm14,%ymm1,%ymm1
   .byte  196,65,44,95,198                    // vmaxps        %ymm14,%ymm10,%ymm8
   .byte  196,65,124,95,206                   // vmaxps        %ymm14,%ymm0,%ymm9
-  .byte  196,226,125,24,5,13,54,0,0          // vbroadcastss  0x360d(%rip),%ymm0        # 4844 <_sk_callback_hsw+0x1cc>
+  .byte  196,226,125,24,5,137,54,0,0         // vbroadcastss  0x3689(%rip),%ymm0        # 48c0 <_sk_callback_hsw+0x1cb>
   .byte  197,124,92,215                      // vsubps        %ymm7,%ymm0,%ymm10
   .byte  197,172,89,210                      // vmulps        %ymm2,%ymm10,%ymm2
   .byte  197,124,92,219                      // vsubps        %ymm3,%ymm0,%ymm11
@@ -9862,7 +9938,7 @@ HIDDEN _sk_clamp_1_hsw
 .globl _sk_clamp_1_hsw
 FUNCTION(_sk_clamp_1_hsw)
 _sk_clamp_1_hsw:
-  .byte  196,98,125,24,5,169,53,0,0          // vbroadcastss  0x35a9(%rip),%ymm8        # 4848 <_sk_callback_hsw+0x1d0>
+  .byte  196,98,125,24,5,37,54,0,0           // vbroadcastss  0x3625(%rip),%ymm8        # 48c4 <_sk_callback_hsw+0x1cf>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
@@ -9874,7 +9950,7 @@ HIDDEN _sk_clamp_a_hsw
 .globl _sk_clamp_a_hsw
 FUNCTION(_sk_clamp_a_hsw)
 _sk_clamp_a_hsw:
-  .byte  196,98,125,24,5,140,53,0,0          // vbroadcastss  0x358c(%rip),%ymm8        # 484c <_sk_callback_hsw+0x1d4>
+  .byte  196,98,125,24,5,8,54,0,0            // vbroadcastss  0x3608(%rip),%ymm8        # 48c8 <_sk_callback_hsw+0x1d3>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  197,252,93,195                      // vminps        %ymm3,%ymm0,%ymm0
   .byte  197,244,93,203                      // vminps        %ymm3,%ymm1,%ymm1
@@ -9960,7 +10036,7 @@ FUNCTION(_sk_unpremul_hsw)
 _sk_unpremul_hsw:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,65,100,194,200,0                // vcmpeqps      %ymm8,%ymm3,%ymm9
-  .byte  196,98,125,24,21,212,52,0,0         // vbroadcastss  0x34d4(%rip),%ymm10        # 4850 <_sk_callback_hsw+0x1d8>
+  .byte  196,98,125,24,21,80,53,0,0          // vbroadcastss  0x3550(%rip),%ymm10        # 48cc <_sk_callback_hsw+0x1d7>
   .byte  197,44,94,211                       // vdivps        %ymm3,%ymm10,%ymm10
   .byte  196,67,45,74,192,144                // vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
@@ -9973,16 +10049,16 @@ HIDDEN _sk_from_srgb_hsw
 .globl _sk_from_srgb_hsw
 FUNCTION(_sk_from_srgb_hsw)
 _sk_from_srgb_hsw:
-  .byte  196,98,125,24,5,181,52,0,0          // vbroadcastss  0x34b5(%rip),%ymm8        # 4854 <_sk_callback_hsw+0x1dc>
+  .byte  196,98,125,24,5,49,53,0,0           // vbroadcastss  0x3531(%rip),%ymm8        # 48d0 <_sk_callback_hsw+0x1db>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  197,124,89,208                      // vmulps        %ymm0,%ymm0,%ymm10
-  .byte  196,98,125,24,29,167,52,0,0         // vbroadcastss  0x34a7(%rip),%ymm11        # 4858 <_sk_callback_hsw+0x1e0>
-  .byte  196,98,125,24,37,162,52,0,0         // vbroadcastss  0x34a2(%rip),%ymm12        # 485c <_sk_callback_hsw+0x1e4>
+  .byte  196,98,125,24,29,35,53,0,0          // vbroadcastss  0x3523(%rip),%ymm11        # 48d4 <_sk_callback_hsw+0x1df>
+  .byte  196,98,125,24,37,30,53,0,0          // vbroadcastss  0x351e(%rip),%ymm12        # 48d8 <_sk_callback_hsw+0x1e3>
   .byte  196,65,124,40,236                   // vmovaps       %ymm12,%ymm13
   .byte  196,66,125,168,235                  // vfmadd213ps   %ymm11,%ymm0,%ymm13
-  .byte  196,98,125,24,53,147,52,0,0         // vbroadcastss  0x3493(%rip),%ymm14        # 4860 <_sk_callback_hsw+0x1e8>
+  .byte  196,98,125,24,53,15,53,0,0          // vbroadcastss  0x350f(%rip),%ymm14        # 48dc <_sk_callback_hsw+0x1e7>
   .byte  196,66,45,168,238                   // vfmadd213ps   %ymm14,%ymm10,%ymm13
-  .byte  196,98,125,24,21,137,52,0,0         // vbroadcastss  0x3489(%rip),%ymm10        # 4864 <_sk_callback_hsw+0x1ec>
+  .byte  196,98,125,24,21,5,53,0,0           // vbroadcastss  0x3505(%rip),%ymm10        # 48e0 <_sk_callback_hsw+0x1eb>
   .byte  196,193,124,194,194,1               // vcmpltps      %ymm10,%ymm0,%ymm0
   .byte  196,195,21,74,193,0                 // vblendvps     %ymm0,%ymm9,%ymm13,%ymm0
   .byte  196,65,116,89,200                   // vmulps        %ymm8,%ymm1,%ymm9
@@ -10006,19 +10082,19 @@ HIDDEN _sk_to_srgb_hsw
 FUNCTION(_sk_to_srgb_hsw)
 _sk_to_srgb_hsw:
   .byte  197,124,82,200                      // vrsqrtps      %ymm0,%ymm9
-  .byte  196,98,125,24,5,45,52,0,0           // vbroadcastss  0x342d(%rip),%ymm8        # 4868 <_sk_callback_hsw+0x1f0>
+  .byte  196,98,125,24,5,169,52,0,0          // vbroadcastss  0x34a9(%rip),%ymm8        # 48e4 <_sk_callback_hsw+0x1ef>
   .byte  196,65,124,89,208                   // vmulps        %ymm8,%ymm0,%ymm10
-  .byte  196,98,125,24,29,35,52,0,0          // vbroadcastss  0x3423(%rip),%ymm11        # 486c <_sk_callback_hsw+0x1f4>
-  .byte  196,98,125,24,37,30,52,0,0          // vbroadcastss  0x341e(%rip),%ymm12        # 4870 <_sk_callback_hsw+0x1f8>
+  .byte  196,98,125,24,29,159,52,0,0         // vbroadcastss  0x349f(%rip),%ymm11        # 48e8 <_sk_callback_hsw+0x1f3>
+  .byte  196,98,125,24,37,154,52,0,0         // vbroadcastss  0x349a(%rip),%ymm12        # 48ec <_sk_callback_hsw+0x1f7>
   .byte  196,65,124,40,236                   // vmovaps       %ymm12,%ymm13
   .byte  196,66,53,168,235                   // vfmadd213ps   %ymm11,%ymm9,%ymm13
-  .byte  196,98,125,24,53,15,52,0,0          // vbroadcastss  0x340f(%rip),%ymm14        # 4874 <_sk_callback_hsw+0x1fc>
+  .byte  196,98,125,24,53,139,52,0,0         // vbroadcastss  0x348b(%rip),%ymm14        # 48f0 <_sk_callback_hsw+0x1fb>
   .byte  196,66,53,168,238                   // vfmadd213ps   %ymm14,%ymm9,%ymm13
-  .byte  196,98,125,24,61,5,52,0,0           // vbroadcastss  0x3405(%rip),%ymm15        # 4878 <_sk_callback_hsw+0x200>
+  .byte  196,98,125,24,61,129,52,0,0         // vbroadcastss  0x3481(%rip),%ymm15        # 48f4 <_sk_callback_hsw+0x1ff>
   .byte  196,65,52,88,207                    // vaddps        %ymm15,%ymm9,%ymm9
   .byte  196,65,124,83,201                   // vrcpps        %ymm9,%ymm9
   .byte  196,65,20,89,201                    // vmulps        %ymm9,%ymm13,%ymm9
-  .byte  196,98,125,24,45,241,51,0,0         // vbroadcastss  0x33f1(%rip),%ymm13        # 487c <_sk_callback_hsw+0x204>
+  .byte  196,98,125,24,45,109,52,0,0         // vbroadcastss  0x346d(%rip),%ymm13        # 48f8 <_sk_callback_hsw+0x203>
   .byte  196,193,124,194,197,1               // vcmpltps      %ymm13,%ymm0,%ymm0
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  197,124,82,201                      // vrsqrtps      %ymm1,%ymm9
@@ -10052,26 +10128,26 @@ _sk_rgb_to_hsl_hsw:
   .byte  197,124,93,201                      // vminps        %ymm1,%ymm0,%ymm9
   .byte  197,52,93,202                       // vminps        %ymm2,%ymm9,%ymm9
   .byte  196,65,60,92,209                    // vsubps        %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,29,102,51,0,0         // vbroadcastss  0x3366(%rip),%ymm11        # 4880 <_sk_callback_hsw+0x208>
+  .byte  196,98,125,24,29,226,51,0,0         // vbroadcastss  0x33e2(%rip),%ymm11        # 48fc <_sk_callback_hsw+0x207>
   .byte  196,65,36,94,218                    // vdivps        %ymm10,%ymm11,%ymm11
   .byte  197,116,92,226                      // vsubps        %ymm2,%ymm1,%ymm12
   .byte  197,116,194,234,1                   // vcmpltps      %ymm2,%ymm1,%ymm13
-  .byte  196,98,125,24,53,83,51,0,0          // vbroadcastss  0x3353(%rip),%ymm14        # 4884 <_sk_callback_hsw+0x20c>
+  .byte  196,98,125,24,53,207,51,0,0         // vbroadcastss  0x33cf(%rip),%ymm14        # 4900 <_sk_callback_hsw+0x20b>
   .byte  196,65,4,87,255                     // vxorps        %ymm15,%ymm15,%ymm15
   .byte  196,67,5,74,238,208                 // vblendvps     %ymm13,%ymm14,%ymm15,%ymm13
   .byte  196,66,37,168,229                   // vfmadd213ps   %ymm13,%ymm11,%ymm12
   .byte  197,236,92,208                      // vsubps        %ymm0,%ymm2,%ymm2
   .byte  197,124,92,233                      // vsubps        %ymm1,%ymm0,%ymm13
-  .byte  196,98,125,24,53,58,51,0,0          // vbroadcastss  0x333a(%rip),%ymm14        # 488c <_sk_callback_hsw+0x214>
+  .byte  196,98,125,24,53,182,51,0,0         // vbroadcastss  0x33b6(%rip),%ymm14        # 4908 <_sk_callback_hsw+0x213>
   .byte  196,66,37,168,238                   // vfmadd213ps   %ymm14,%ymm11,%ymm13
-  .byte  196,98,125,24,53,40,51,0,0          // vbroadcastss  0x3328(%rip),%ymm14        # 4888 <_sk_callback_hsw+0x210>
+  .byte  196,98,125,24,53,164,51,0,0         // vbroadcastss  0x33a4(%rip),%ymm14        # 4904 <_sk_callback_hsw+0x20f>
   .byte  196,194,37,168,214                  // vfmadd213ps   %ymm14,%ymm11,%ymm2
   .byte  197,188,194,201,0                   // vcmpeqps      %ymm1,%ymm8,%ymm1
   .byte  196,227,21,74,202,16                // vblendvps     %ymm1,%ymm2,%ymm13,%ymm1
   .byte  197,188,194,192,0                   // vcmpeqps      %ymm0,%ymm8,%ymm0
   .byte  196,195,117,74,196,0                // vblendvps     %ymm0,%ymm12,%ymm1,%ymm0
   .byte  196,193,60,88,201                   // vaddps        %ymm9,%ymm8,%ymm1
-  .byte  196,98,125,24,29,11,51,0,0          // vbroadcastss  0x330b(%rip),%ymm11        # 4894 <_sk_callback_hsw+0x21c>
+  .byte  196,98,125,24,29,135,51,0,0         // vbroadcastss  0x3387(%rip),%ymm11        # 4910 <_sk_callback_hsw+0x21b>
   .byte  196,193,116,89,211                  // vmulps        %ymm11,%ymm1,%ymm2
   .byte  197,36,194,218,1                    // vcmpltps      %ymm2,%ymm11,%ymm11
   .byte  196,65,12,92,224                    // vsubps        %ymm8,%ymm14,%ymm12
@@ -10081,7 +10157,7 @@ _sk_rgb_to_hsl_hsw:
   .byte  197,172,94,201                      // vdivps        %ymm1,%ymm10,%ymm1
   .byte  196,195,125,74,199,128              // vblendvps     %ymm8,%ymm15,%ymm0,%ymm0
   .byte  196,195,117,74,207,128              // vblendvps     %ymm8,%ymm15,%ymm1,%ymm1
-  .byte  196,98,125,24,5,206,50,0,0          // vbroadcastss  0x32ce(%rip),%ymm8        # 4890 <_sk_callback_hsw+0x218>
+  .byte  196,98,125,24,5,74,51,0,0           // vbroadcastss  0x334a(%rip),%ymm8        # 490c <_sk_callback_hsw+0x217>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -10098,30 +10174,30 @@ _sk_hsl_to_rgb_hsw:
   .byte  197,252,17,92,36,128                // vmovups       %ymm3,-0x80(%rsp)
   .byte  197,252,40,233                      // vmovaps       %ymm1,%ymm5
   .byte  197,252,40,224                      // vmovaps       %ymm0,%ymm4
-  .byte  196,98,125,24,5,155,50,0,0          // vbroadcastss  0x329b(%rip),%ymm8        # 4898 <_sk_callback_hsw+0x220>
+  .byte  196,98,125,24,5,23,51,0,0           // vbroadcastss  0x3317(%rip),%ymm8        # 4914 <_sk_callback_hsw+0x21f>
   .byte  197,60,194,202,2                    // vcmpleps      %ymm2,%ymm8,%ymm9
   .byte  197,84,89,210                       // vmulps        %ymm2,%ymm5,%ymm10
   .byte  196,65,84,92,218                    // vsubps        %ymm10,%ymm5,%ymm11
   .byte  196,67,45,74,203,144                // vblendvps     %ymm9,%ymm11,%ymm10,%ymm9
   .byte  197,52,88,210                       // vaddps        %ymm2,%ymm9,%ymm10
-  .byte  196,98,125,24,13,126,50,0,0         // vbroadcastss  0x327e(%rip),%ymm9        # 489c <_sk_callback_hsw+0x224>
+  .byte  196,98,125,24,13,250,50,0,0         // vbroadcastss  0x32fa(%rip),%ymm9        # 4918 <_sk_callback_hsw+0x223>
   .byte  196,66,109,170,202                  // vfmsub213ps   %ymm10,%ymm2,%ymm9
-  .byte  196,98,125,24,29,116,50,0,0         // vbroadcastss  0x3274(%rip),%ymm11        # 48a0 <_sk_callback_hsw+0x228>
+  .byte  196,98,125,24,29,240,50,0,0         // vbroadcastss  0x32f0(%rip),%ymm11        # 491c <_sk_callback_hsw+0x227>
   .byte  196,65,92,88,219                    // vaddps        %ymm11,%ymm4,%ymm11
   .byte  196,67,125,8,227,1                  // vroundps      $0x1,%ymm11,%ymm12
   .byte  196,65,36,92,252                    // vsubps        %ymm12,%ymm11,%ymm15
   .byte  196,65,44,92,217                    // vsubps        %ymm9,%ymm10,%ymm11
-  .byte  196,98,125,24,45,94,50,0,0          // vbroadcastss  0x325e(%rip),%ymm13        # 48a8 <_sk_callback_hsw+0x230>
+  .byte  196,98,125,24,45,218,50,0,0         // vbroadcastss  0x32da(%rip),%ymm13        # 4924 <_sk_callback_hsw+0x22f>
   .byte  196,193,4,89,197                    // vmulps        %ymm13,%ymm15,%ymm0
-  .byte  196,98,125,24,53,84,50,0,0          // vbroadcastss  0x3254(%rip),%ymm14        # 48ac <_sk_callback_hsw+0x234>
+  .byte  196,98,125,24,53,208,50,0,0         // vbroadcastss  0x32d0(%rip),%ymm14        # 4928 <_sk_callback_hsw+0x233>
   .byte  197,12,92,224                       // vsubps        %ymm0,%ymm14,%ymm12
   .byte  196,66,37,168,225                   // vfmadd213ps   %ymm9,%ymm11,%ymm12
-  .byte  196,226,125,24,29,58,50,0,0         // vbroadcastss  0x323a(%rip),%ymm3        # 48a4 <_sk_callback_hsw+0x22c>
+  .byte  196,226,125,24,29,182,50,0,0        // vbroadcastss  0x32b6(%rip),%ymm3        # 4920 <_sk_callback_hsw+0x22b>
   .byte  196,193,100,194,255,2               // vcmpleps      %ymm15,%ymm3,%ymm7
   .byte  196,195,29,74,249,112               // vblendvps     %ymm7,%ymm9,%ymm12,%ymm7
   .byte  196,65,60,194,231,2                 // vcmpleps      %ymm15,%ymm8,%ymm12
   .byte  196,227,45,74,255,192               // vblendvps     %ymm12,%ymm7,%ymm10,%ymm7
-  .byte  196,98,125,24,37,37,50,0,0          // vbroadcastss  0x3225(%rip),%ymm12        # 48b0 <_sk_callback_hsw+0x238>
+  .byte  196,98,125,24,37,161,50,0,0         // vbroadcastss  0x32a1(%rip),%ymm12        # 492c <_sk_callback_hsw+0x237>
   .byte  196,65,28,194,255,2                 // vcmpleps      %ymm15,%ymm12,%ymm15
   .byte  196,194,37,168,193                  // vfmadd213ps   %ymm9,%ymm11,%ymm0
   .byte  196,99,125,74,255,240               // vblendvps     %ymm15,%ymm7,%ymm0,%ymm15
@@ -10137,7 +10213,7 @@ _sk_hsl_to_rgb_hsw:
   .byte  197,156,194,192,2                   // vcmpleps      %ymm0,%ymm12,%ymm0
   .byte  196,194,37,168,249                  // vfmadd213ps   %ymm9,%ymm11,%ymm7
   .byte  196,227,69,74,201,0                 // vblendvps     %ymm0,%ymm1,%ymm7,%ymm1
-  .byte  196,226,125,24,5,209,49,0,0         // vbroadcastss  0x31d1(%rip),%ymm0        # 48b4 <_sk_callback_hsw+0x23c>
+  .byte  196,226,125,24,5,77,50,0,0          // vbroadcastss  0x324d(%rip),%ymm0        # 4930 <_sk_callback_hsw+0x23b>
   .byte  197,220,88,192                      // vaddps        %ymm0,%ymm4,%ymm0
   .byte  196,227,125,8,224,1                 // vroundps      $0x1,%ymm0,%ymm4
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
@@ -10191,7 +10267,7 @@ _sk_scale_u8_hsw:
   .byte  197,122,126,0                       // vmovq         (%rax),%xmm8
   .byte  196,66,125,49,192                   // vpmovzxbd     %xmm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,17,49,0,0          // vbroadcastss  0x3111(%rip),%ymm9        # 48b8 <_sk_callback_hsw+0x240>
+  .byte  196,98,125,24,13,141,49,0,0         // vbroadcastss  0x318d(%rip),%ymm9        # 4934 <_sk_callback_hsw+0x23f>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -10243,7 +10319,7 @@ _sk_lerp_u8_hsw:
   .byte  197,122,126,0                       // vmovq         (%rax),%xmm8
   .byte  196,66,125,49,192                   // vpmovzxbd     %xmm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,126,48,0,0         // vbroadcastss  0x307e(%rip),%ymm9        # 48bc <_sk_callback_hsw+0x244>
+  .byte  196,98,125,24,13,250,48,0,0         // vbroadcastss  0x30fa(%rip),%ymm9        # 4938 <_sk_callback_hsw+0x243>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,226,61,168,196                  // vfmadd213ps   %ymm4,%ymm8,%ymm0
@@ -10279,20 +10355,20 @@ _sk_lerp_565_hsw:
   .byte  15,133,169,0,0,0                    // jne           194a <_sk_lerp_565_hsw+0xb7>
   .byte  196,65,122,111,4,122                // vmovdqu       (%r10,%rdi,2),%xmm8
   .byte  196,66,125,51,192                   // vpmovzxwd     %xmm8,%ymm8
-  .byte  196,98,125,88,13,11,48,0,0          // vpbroadcastd  0x300b(%rip),%ymm9        # 48c0 <_sk_callback_hsw+0x248>
+  .byte  196,98,125,88,13,135,48,0,0         // vpbroadcastd  0x3087(%rip),%ymm9        # 493c <_sk_callback_hsw+0x247>
   .byte  196,65,61,219,201                   // vpand         %ymm9,%ymm8,%ymm9
   .byte  196,65,124,91,201                   // vcvtdq2ps     %ymm9,%ymm9
-  .byte  196,98,125,24,21,252,47,0,0         // vbroadcastss  0x2ffc(%rip),%ymm10        # 48c4 <_sk_callback_hsw+0x24c>
+  .byte  196,98,125,24,21,120,48,0,0         // vbroadcastss  0x3078(%rip),%ymm10        # 4940 <_sk_callback_hsw+0x24b>
   .byte  196,65,52,89,202                    // vmulps        %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,88,21,242,47,0,0         // vpbroadcastd  0x2ff2(%rip),%ymm10        # 48c8 <_sk_callback_hsw+0x250>
+  .byte  196,98,125,88,21,110,48,0,0         // vpbroadcastd  0x306e(%rip),%ymm10        # 4944 <_sk_callback_hsw+0x24f>
   .byte  196,65,61,219,210                   // vpand         %ymm10,%ymm8,%ymm10
   .byte  196,65,124,91,210                   // vcvtdq2ps     %ymm10,%ymm10
-  .byte  196,98,125,24,29,227,47,0,0         // vbroadcastss  0x2fe3(%rip),%ymm11        # 48cc <_sk_callback_hsw+0x254>
+  .byte  196,98,125,24,29,95,48,0,0          // vbroadcastss  0x305f(%rip),%ymm11        # 4948 <_sk_callback_hsw+0x253>
   .byte  196,65,44,89,211                    // vmulps        %ymm11,%ymm10,%ymm10
-  .byte  196,98,125,88,29,217,47,0,0         // vpbroadcastd  0x2fd9(%rip),%ymm11        # 48d0 <_sk_callback_hsw+0x258>
+  .byte  196,98,125,88,29,85,48,0,0          // vpbroadcastd  0x3055(%rip),%ymm11        # 494c <_sk_callback_hsw+0x257>
   .byte  196,65,61,219,195                   // vpand         %ymm11,%ymm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,29,202,47,0,0         // vbroadcastss  0x2fca(%rip),%ymm11        # 48d4 <_sk_callback_hsw+0x25c>
+  .byte  196,98,125,24,29,70,48,0,0          // vbroadcastss  0x3046(%rip),%ymm11        # 4950 <_sk_callback_hsw+0x25b>
   .byte  196,65,60,89,195                    // vmulps        %ymm11,%ymm8,%ymm8
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,226,53,168,196                  // vfmadd213ps   %ymm4,%ymm9,%ymm0
@@ -10364,21 +10440,21 @@ _sk_load_tables_hsw:
   .byte  77,133,192                          // test          %r8,%r8
   .byte  117,105                             // jne           1a52 <_sk_load_tables_hsw+0x7e>
   .byte  196,193,126,111,25                  // vmovdqu       (%r9),%ymm3
-  .byte  197,229,219,13,138,49,0,0           // vpand         0x318a(%rip),%ymm3,%ymm1        # 4b80 <_sk_callback_hsw+0x508>
+  .byte  197,229,219,13,10,50,0,0            // vpand         0x320a(%rip),%ymm3,%ymm1        # 4c00 <_sk_callback_hsw+0x50b>
   .byte  196,65,61,118,192                   // vpcmpeqd      %ymm8,%ymm8,%ymm8
   .byte  72,139,72,8                         // mov           0x8(%rax),%rcx
   .byte  76,139,72,16                        // mov           0x10(%rax),%r9
   .byte  197,237,118,210                     // vpcmpeqd      %ymm2,%ymm2,%ymm2
   .byte  196,226,109,146,4,137               // vgatherdps    %ymm2,(%rcx,%ymm1,4),%ymm0
-  .byte  196,226,101,0,21,138,49,0,0         // vpshufb       0x318a(%rip),%ymm3,%ymm2        # 4ba0 <_sk_callback_hsw+0x528>
+  .byte  196,226,101,0,21,10,50,0,0          // vpshufb       0x320a(%rip),%ymm3,%ymm2        # 4c20 <_sk_callback_hsw+0x52b>
   .byte  196,65,53,118,201                   // vpcmpeqd      %ymm9,%ymm9,%ymm9
   .byte  196,194,53,146,12,145               // vgatherdps    %ymm9,(%r9,%ymm2,4),%ymm1
   .byte  72,139,64,24                        // mov           0x18(%rax),%rax
-  .byte  196,98,101,0,13,146,49,0,0          // vpshufb       0x3192(%rip),%ymm3,%ymm9        # 4bc0 <_sk_callback_hsw+0x548>
+  .byte  196,98,101,0,13,18,50,0,0           // vpshufb       0x3212(%rip),%ymm3,%ymm9        # 4c40 <_sk_callback_hsw+0x54b>
   .byte  196,162,61,146,20,136               // vgatherdps    %ymm8,(%rax,%ymm9,4),%ymm2
   .byte  197,229,114,211,24                  // vpsrld        $0x18,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,146,46,0,0          // vbroadcastss  0x2e92(%rip),%ymm8        # 48d8 <_sk_callback_hsw+0x260>
+  .byte  196,98,125,24,5,14,47,0,0           // vbroadcastss  0x2f0e(%rip),%ymm8        # 4954 <_sk_callback_hsw+0x25f>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,137,193                          // mov           %r8,%rcx
@@ -10417,7 +10493,7 @@ _sk_load_tables_u16_be_hsw:
   .byte  197,185,108,200                     // vpunpcklqdq   %xmm0,%xmm8,%xmm1
   .byte  197,185,109,208                     // vpunpckhqdq   %xmm0,%xmm8,%xmm2
   .byte  197,49,108,195                      // vpunpcklqdq   %xmm3,%xmm9,%xmm8
-  .byte  197,121,111,21,30,50,0,0            // vmovdqa       0x321e(%rip),%xmm10        # 4d00 <_sk_callback_hsw+0x688>
+  .byte  197,121,111,21,158,50,0,0           // vmovdqa       0x329e(%rip),%xmm10        # 4d80 <_sk_callback_hsw+0x68b>
   .byte  196,193,113,219,194                 // vpand         %xmm10,%xmm1,%xmm0
   .byte  196,226,125,51,200                  // vpmovzxwd     %xmm0,%ymm1
   .byte  196,65,37,118,219                   // vpcmpeqd      %ymm11,%ymm11,%ymm11
@@ -10439,7 +10515,7 @@ _sk_load_tables_u16_be_hsw:
   .byte  197,185,235,219                     // vpor          %xmm3,%xmm8,%xmm3
   .byte  196,226,125,51,219                  // vpmovzxwd     %xmm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,139,45,0,0          // vbroadcastss  0x2d8b(%rip),%ymm8        # 48dc <_sk_callback_hsw+0x264>
+  .byte  196,98,125,24,5,7,46,0,0            // vbroadcastss  0x2e07(%rip),%ymm8        # 4958 <_sk_callback_hsw+0x263>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -10499,7 +10575,7 @@ _sk_load_tables_rgb_u16_be_hsw:
   .byte  197,185,108,218                     // vpunpcklqdq   %xmm2,%xmm8,%xmm3
   .byte  197,185,109,210                     // vpunpckhqdq   %xmm2,%xmm8,%xmm2
   .byte  197,121,108,193                     // vpunpcklqdq   %xmm1,%xmm0,%xmm8
-  .byte  197,121,111,13,190,48,0,0           // vmovdqa       0x30be(%rip),%xmm9        # 4d10 <_sk_callback_hsw+0x698>
+  .byte  197,121,111,13,62,49,0,0            // vmovdqa       0x313e(%rip),%xmm9        # 4d90 <_sk_callback_hsw+0x69b>
   .byte  196,193,97,219,193                  // vpand         %xmm9,%xmm3,%xmm0
   .byte  196,226,125,51,200                  // vpmovzxwd     %xmm0,%ymm1
   .byte  197,229,118,219                     // vpcmpeqd      %ymm3,%ymm3,%ymm3
@@ -10516,7 +10592,7 @@ _sk_load_tables_rgb_u16_be_hsw:
   .byte  196,98,125,51,194                   // vpmovzxwd     %xmm2,%ymm8
   .byte  196,162,101,146,20,128              // vgatherdps    %ymm3,(%rax,%ymm8,4),%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,57,44,0,0         // vbroadcastss  0x2c39(%rip),%ymm3        # 48e0 <_sk_callback_hsw+0x268>
+  .byte  196,226,125,24,29,181,44,0,0        // vbroadcastss  0x2cb5(%rip),%ymm3        # 495c <_sk_callback_hsw+0x267>
   .byte  255,224                             // jmpq          *%rax
   .byte  196,129,121,110,4,72                // vmovd         (%r8,%r9,2),%xmm0
   .byte  196,129,121,196,68,72,4,2           // vpinsrw       $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
@@ -10563,7 +10639,7 @@ _sk_byte_tables_hsw:
   .byte  65,84                               // push          %r12
   .byte  83                                  // push          %rbx
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,119,43,0,0          // vbroadcastss  0x2b77(%rip),%ymm8        # 48e4 <_sk_callback_hsw+0x26c>
+  .byte  196,98,125,24,5,243,43,0,0          // vbroadcastss  0x2bf3(%rip),%ymm8        # 4960 <_sk_callback_hsw+0x26b>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,195,249,22,192,1                // vpextrq       $0x1,%xmm0,%r8
@@ -10600,7 +10676,7 @@ _sk_byte_tables_hsw:
   .byte  196,227,121,32,197,7                // vpinsrb       $0x7,%ebp,%xmm0,%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,200,42,0,0         // vbroadcastss  0x2ac8(%rip),%ymm9        # 48e8 <_sk_callback_hsw+0x270>
+  .byte  196,98,125,24,13,68,43,0,0          // vbroadcastss  0x2b44(%rip),%ymm9        # 4964 <_sk_callback_hsw+0x26f>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -10761,7 +10837,7 @@ _sk_byte_tables_rgb_hsw:
   .byte  196,227,121,32,197,7                // vpinsrb       $0x7,%ebp,%xmm0,%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,1,40,0,0           // vbroadcastss  0x2801(%rip),%ymm9        # 48ec <_sk_callback_hsw+0x274>
+  .byte  196,98,125,24,13,125,40,0,0         // vbroadcastss  0x287d(%rip),%ymm9        # 4968 <_sk_callback_hsw+0x273>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -10924,33 +11000,33 @@ _sk_parametric_r_hsw:
   .byte  196,66,125,168,211                  // vfmadd213ps   %ymm11,%ymm0,%ymm10
   .byte  196,226,125,24,0                    // vbroadcastss  (%rax),%ymm0
   .byte  196,65,124,91,218                   // vcvtdq2ps     %ymm10,%ymm11
-  .byte  196,98,125,24,37,180,37,0,0         // vbroadcastss  0x25b4(%rip),%ymm12        # 48f0 <_sk_callback_hsw+0x278>
-  .byte  196,98,125,24,45,175,37,0,0         // vbroadcastss  0x25af(%rip),%ymm13        # 48f4 <_sk_callback_hsw+0x27c>
+  .byte  196,98,125,24,37,48,38,0,0          // vbroadcastss  0x2630(%rip),%ymm12        # 496c <_sk_callback_hsw+0x277>
+  .byte  196,98,125,24,45,43,38,0,0          // vbroadcastss  0x262b(%rip),%ymm13        # 4970 <_sk_callback_hsw+0x27b>
   .byte  196,65,44,84,213                    // vandps        %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,165,37,0,0         // vbroadcastss  0x25a5(%rip),%ymm13        # 48f8 <_sk_callback_hsw+0x280>
+  .byte  196,98,125,24,45,33,38,0,0          // vbroadcastss  0x2621(%rip),%ymm13        # 4974 <_sk_callback_hsw+0x27f>
   .byte  196,65,44,86,213                    // vorps         %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,155,37,0,0         // vbroadcastss  0x259b(%rip),%ymm13        # 48fc <_sk_callback_hsw+0x284>
+  .byte  196,98,125,24,45,23,38,0,0          // vbroadcastss  0x2617(%rip),%ymm13        # 4978 <_sk_callback_hsw+0x283>
   .byte  196,66,37,184,236                   // vfmadd231ps   %ymm12,%ymm11,%ymm13
-  .byte  196,98,125,24,29,145,37,0,0         // vbroadcastss  0x2591(%rip),%ymm11        # 4900 <_sk_callback_hsw+0x288>
+  .byte  196,98,125,24,29,13,38,0,0          // vbroadcastss  0x260d(%rip),%ymm11        # 497c <_sk_callback_hsw+0x287>
   .byte  196,66,45,172,221                   // vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  .byte  196,98,125,24,37,135,37,0,0         // vbroadcastss  0x2587(%rip),%ymm12        # 4904 <_sk_callback_hsw+0x28c>
+  .byte  196,98,125,24,37,3,38,0,0           // vbroadcastss  0x2603(%rip),%ymm12        # 4980 <_sk_callback_hsw+0x28b>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,125,37,0,0         // vbroadcastss  0x257d(%rip),%ymm12        # 4908 <_sk_callback_hsw+0x290>
+  .byte  196,98,125,24,37,249,37,0,0         // vbroadcastss  0x25f9(%rip),%ymm12        # 4984 <_sk_callback_hsw+0x28f>
   .byte  196,65,28,94,210                    // vdivps        %ymm10,%ymm12,%ymm10
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  196,99,125,8,208,1                  // vroundps      $0x1,%ymm0,%ymm10
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  196,98,125,24,29,94,37,0,0          // vbroadcastss  0x255e(%rip),%ymm11        # 490c <_sk_callback_hsw+0x294>
+  .byte  196,98,125,24,29,218,37,0,0         // vbroadcastss  0x25da(%rip),%ymm11        # 4988 <_sk_callback_hsw+0x293>
   .byte  196,193,124,88,195                  // vaddps        %ymm11,%ymm0,%ymm0
-  .byte  196,98,125,24,29,84,37,0,0          // vbroadcastss  0x2554(%rip),%ymm11        # 4910 <_sk_callback_hsw+0x298>
+  .byte  196,98,125,24,29,208,37,0,0         // vbroadcastss  0x25d0(%rip),%ymm11        # 498c <_sk_callback_hsw+0x297>
   .byte  196,98,45,172,216                   // vfnmadd213ps  %ymm0,%ymm10,%ymm11
-  .byte  196,226,125,24,5,74,37,0,0          // vbroadcastss  0x254a(%rip),%ymm0        # 4914 <_sk_callback_hsw+0x29c>
+  .byte  196,226,125,24,5,198,37,0,0         // vbroadcastss  0x25c6(%rip),%ymm0        # 4990 <_sk_callback_hsw+0x29b>
   .byte  196,193,124,92,194                  // vsubps        %ymm10,%ymm0,%ymm0
-  .byte  196,98,125,24,21,64,37,0,0          // vbroadcastss  0x2540(%rip),%ymm10        # 4918 <_sk_callback_hsw+0x2a0>
+  .byte  196,98,125,24,21,188,37,0,0         // vbroadcastss  0x25bc(%rip),%ymm10        # 4994 <_sk_callback_hsw+0x29f>
   .byte  197,172,94,192                      // vdivps        %ymm0,%ymm10,%ymm0
   .byte  197,164,88,192                      // vaddps        %ymm0,%ymm11,%ymm0
-  .byte  196,98,125,24,21,51,37,0,0          // vbroadcastss  0x2533(%rip),%ymm10        # 491c <_sk_callback_hsw+0x2a4>
+  .byte  196,98,125,24,21,175,37,0,0         // vbroadcastss  0x25af(%rip),%ymm10        # 4998 <_sk_callback_hsw+0x2a3>
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -10958,7 +11034,7 @@ _sk_parametric_r_hsw:
   .byte  196,195,125,74,193,128              // vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,124,95,192                  // vmaxps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,10,37,0,0           // vbroadcastss  0x250a(%rip),%ymm8        # 4920 <_sk_callback_hsw+0x2a8>
+  .byte  196,98,125,24,5,134,37,0,0          // vbroadcastss  0x2586(%rip),%ymm8        # 499c <_sk_callback_hsw+0x2a7>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -10978,33 +11054,33 @@ _sk_parametric_g_hsw:
   .byte  196,66,117,168,211                  // vfmadd213ps   %ymm11,%ymm1,%ymm10
   .byte  196,226,125,24,8                    // vbroadcastss  (%rax),%ymm1
   .byte  196,65,124,91,218                   // vcvtdq2ps     %ymm10,%ymm11
-  .byte  196,98,125,24,37,194,36,0,0         // vbroadcastss  0x24c2(%rip),%ymm12        # 4924 <_sk_callback_hsw+0x2ac>
-  .byte  196,98,125,24,45,189,36,0,0         // vbroadcastss  0x24bd(%rip),%ymm13        # 4928 <_sk_callback_hsw+0x2b0>
+  .byte  196,98,125,24,37,62,37,0,0          // vbroadcastss  0x253e(%rip),%ymm12        # 49a0 <_sk_callback_hsw+0x2ab>
+  .byte  196,98,125,24,45,57,37,0,0          // vbroadcastss  0x2539(%rip),%ymm13        # 49a4 <_sk_callback_hsw+0x2af>
   .byte  196,65,44,84,213                    // vandps        %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,179,36,0,0         // vbroadcastss  0x24b3(%rip),%ymm13        # 492c <_sk_callback_hsw+0x2b4>
+  .byte  196,98,125,24,45,47,37,0,0          // vbroadcastss  0x252f(%rip),%ymm13        # 49a8 <_sk_callback_hsw+0x2b3>
   .byte  196,65,44,86,213                    // vorps         %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,169,36,0,0         // vbroadcastss  0x24a9(%rip),%ymm13        # 4930 <_sk_callback_hsw+0x2b8>
+  .byte  196,98,125,24,45,37,37,0,0          // vbroadcastss  0x2525(%rip),%ymm13        # 49ac <_sk_callback_hsw+0x2b7>
   .byte  196,66,37,184,236                   // vfmadd231ps   %ymm12,%ymm11,%ymm13
-  .byte  196,98,125,24,29,159,36,0,0         // vbroadcastss  0x249f(%rip),%ymm11        # 4934 <_sk_callback_hsw+0x2bc>
+  .byte  196,98,125,24,29,27,37,0,0          // vbroadcastss  0x251b(%rip),%ymm11        # 49b0 <_sk_callback_hsw+0x2bb>
   .byte  196,66,45,172,221                   // vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  .byte  196,98,125,24,37,149,36,0,0         // vbroadcastss  0x2495(%rip),%ymm12        # 4938 <_sk_callback_hsw+0x2c0>
+  .byte  196,98,125,24,37,17,37,0,0          // vbroadcastss  0x2511(%rip),%ymm12        # 49b4 <_sk_callback_hsw+0x2bf>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,139,36,0,0         // vbroadcastss  0x248b(%rip),%ymm12        # 493c <_sk_callback_hsw+0x2c4>
+  .byte  196,98,125,24,37,7,37,0,0           // vbroadcastss  0x2507(%rip),%ymm12        # 49b8 <_sk_callback_hsw+0x2c3>
   .byte  196,65,28,94,210                    // vdivps        %ymm10,%ymm12,%ymm10
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
   .byte  196,193,116,89,202                  // vmulps        %ymm10,%ymm1,%ymm1
   .byte  196,99,125,8,209,1                  // vroundps      $0x1,%ymm1,%ymm10
   .byte  196,65,116,92,210                   // vsubps        %ymm10,%ymm1,%ymm10
-  .byte  196,98,125,24,29,108,36,0,0         // vbroadcastss  0x246c(%rip),%ymm11        # 4940 <_sk_callback_hsw+0x2c8>
+  .byte  196,98,125,24,29,232,36,0,0         // vbroadcastss  0x24e8(%rip),%ymm11        # 49bc <_sk_callback_hsw+0x2c7>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,98,36,0,0          // vbroadcastss  0x2462(%rip),%ymm11        # 4944 <_sk_callback_hsw+0x2cc>
+  .byte  196,98,125,24,29,222,36,0,0         // vbroadcastss  0x24de(%rip),%ymm11        # 49c0 <_sk_callback_hsw+0x2cb>
   .byte  196,98,45,172,217                   // vfnmadd213ps  %ymm1,%ymm10,%ymm11
-  .byte  196,226,125,24,13,88,36,0,0         // vbroadcastss  0x2458(%rip),%ymm1        # 4948 <_sk_callback_hsw+0x2d0>
+  .byte  196,226,125,24,13,212,36,0,0        // vbroadcastss  0x24d4(%rip),%ymm1        # 49c4 <_sk_callback_hsw+0x2cf>
   .byte  196,193,116,92,202                  // vsubps        %ymm10,%ymm1,%ymm1
-  .byte  196,98,125,24,21,78,36,0,0          // vbroadcastss  0x244e(%rip),%ymm10        # 494c <_sk_callback_hsw+0x2d4>
+  .byte  196,98,125,24,21,202,36,0,0         // vbroadcastss  0x24ca(%rip),%ymm10        # 49c8 <_sk_callback_hsw+0x2d3>
   .byte  197,172,94,201                      // vdivps        %ymm1,%ymm10,%ymm1
   .byte  197,164,88,201                      // vaddps        %ymm1,%ymm11,%ymm1
-  .byte  196,98,125,24,21,65,36,0,0          // vbroadcastss  0x2441(%rip),%ymm10        # 4950 <_sk_callback_hsw+0x2d8>
+  .byte  196,98,125,24,21,189,36,0,0         // vbroadcastss  0x24bd(%rip),%ymm10        # 49cc <_sk_callback_hsw+0x2d7>
   .byte  196,193,116,89,202                  // vmulps        %ymm10,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -11012,7 +11088,7 @@ _sk_parametric_g_hsw:
   .byte  196,195,117,74,201,128              // vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,116,95,200                  // vmaxps        %ymm8,%ymm1,%ymm1
-  .byte  196,98,125,24,5,24,36,0,0           // vbroadcastss  0x2418(%rip),%ymm8        # 4954 <_sk_callback_hsw+0x2dc>
+  .byte  196,98,125,24,5,148,36,0,0          // vbroadcastss  0x2494(%rip),%ymm8        # 49d0 <_sk_callback_hsw+0x2db>
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -11032,33 +11108,33 @@ _sk_parametric_b_hsw:
   .byte  196,66,109,168,211                  // vfmadd213ps   %ymm11,%ymm2,%ymm10
   .byte  196,226,125,24,16                   // vbroadcastss  (%rax),%ymm2
   .byte  196,65,124,91,218                   // vcvtdq2ps     %ymm10,%ymm11
-  .byte  196,98,125,24,37,208,35,0,0         // vbroadcastss  0x23d0(%rip),%ymm12        # 4958 <_sk_callback_hsw+0x2e0>
-  .byte  196,98,125,24,45,203,35,0,0         // vbroadcastss  0x23cb(%rip),%ymm13        # 495c <_sk_callback_hsw+0x2e4>
+  .byte  196,98,125,24,37,76,36,0,0          // vbroadcastss  0x244c(%rip),%ymm12        # 49d4 <_sk_callback_hsw+0x2df>
+  .byte  196,98,125,24,45,71,36,0,0          // vbroadcastss  0x2447(%rip),%ymm13        # 49d8 <_sk_callback_hsw+0x2e3>
   .byte  196,65,44,84,213                    // vandps        %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,193,35,0,0         // vbroadcastss  0x23c1(%rip),%ymm13        # 4960 <_sk_callback_hsw+0x2e8>
+  .byte  196,98,125,24,45,61,36,0,0          // vbroadcastss  0x243d(%rip),%ymm13        # 49dc <_sk_callback_hsw+0x2e7>
   .byte  196,65,44,86,213                    // vorps         %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,183,35,0,0         // vbroadcastss  0x23b7(%rip),%ymm13        # 4964 <_sk_callback_hsw+0x2ec>
+  .byte  196,98,125,24,45,51,36,0,0          // vbroadcastss  0x2433(%rip),%ymm13        # 49e0 <_sk_callback_hsw+0x2eb>
   .byte  196,66,37,184,236                   // vfmadd231ps   %ymm12,%ymm11,%ymm13
-  .byte  196,98,125,24,29,173,35,0,0         // vbroadcastss  0x23ad(%rip),%ymm11        # 4968 <_sk_callback_hsw+0x2f0>
+  .byte  196,98,125,24,29,41,36,0,0          // vbroadcastss  0x2429(%rip),%ymm11        # 49e4 <_sk_callback_hsw+0x2ef>
   .byte  196,66,45,172,221                   // vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  .byte  196,98,125,24,37,163,35,0,0         // vbroadcastss  0x23a3(%rip),%ymm12        # 496c <_sk_callback_hsw+0x2f4>
+  .byte  196,98,125,24,37,31,36,0,0          // vbroadcastss  0x241f(%rip),%ymm12        # 49e8 <_sk_callback_hsw+0x2f3>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,153,35,0,0         // vbroadcastss  0x2399(%rip),%ymm12        # 4970 <_sk_callback_hsw+0x2f8>
+  .byte  196,98,125,24,37,21,36,0,0          // vbroadcastss  0x2415(%rip),%ymm12        # 49ec <_sk_callback_hsw+0x2f7>
   .byte  196,65,28,94,210                    // vdivps        %ymm10,%ymm12,%ymm10
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
   .byte  196,193,108,89,210                  // vmulps        %ymm10,%ymm2,%ymm2
   .byte  196,99,125,8,210,1                  // vroundps      $0x1,%ymm2,%ymm10
   .byte  196,65,108,92,210                   // vsubps        %ymm10,%ymm2,%ymm10
-  .byte  196,98,125,24,29,122,35,0,0         // vbroadcastss  0x237a(%rip),%ymm11        # 4974 <_sk_callback_hsw+0x2fc>
+  .byte  196,98,125,24,29,246,35,0,0         // vbroadcastss  0x23f6(%rip),%ymm11        # 49f0 <_sk_callback_hsw+0x2fb>
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
-  .byte  196,98,125,24,29,112,35,0,0         // vbroadcastss  0x2370(%rip),%ymm11        # 4978 <_sk_callback_hsw+0x300>
+  .byte  196,98,125,24,29,236,35,0,0         // vbroadcastss  0x23ec(%rip),%ymm11        # 49f4 <_sk_callback_hsw+0x2ff>
   .byte  196,98,45,172,218                   // vfnmadd213ps  %ymm2,%ymm10,%ymm11
-  .byte  196,226,125,24,21,102,35,0,0        // vbroadcastss  0x2366(%rip),%ymm2        # 497c <_sk_callback_hsw+0x304>
+  .byte  196,226,125,24,21,226,35,0,0        // vbroadcastss  0x23e2(%rip),%ymm2        # 49f8 <_sk_callback_hsw+0x303>
   .byte  196,193,108,92,210                  // vsubps        %ymm10,%ymm2,%ymm2
-  .byte  196,98,125,24,21,92,35,0,0          // vbroadcastss  0x235c(%rip),%ymm10        # 4980 <_sk_callback_hsw+0x308>
+  .byte  196,98,125,24,21,216,35,0,0         // vbroadcastss  0x23d8(%rip),%ymm10        # 49fc <_sk_callback_hsw+0x307>
   .byte  197,172,94,210                      // vdivps        %ymm2,%ymm10,%ymm2
   .byte  197,164,88,210                      // vaddps        %ymm2,%ymm11,%ymm2
-  .byte  196,98,125,24,21,79,35,0,0          // vbroadcastss  0x234f(%rip),%ymm10        # 4984 <_sk_callback_hsw+0x30c>
+  .byte  196,98,125,24,21,203,35,0,0         // vbroadcastss  0x23cb(%rip),%ymm10        # 4a00 <_sk_callback_hsw+0x30b>
   .byte  196,193,108,89,210                  // vmulps        %ymm10,%ymm2,%ymm2
   .byte  197,253,91,210                      // vcvtps2dq     %ymm2,%ymm2
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -11066,7 +11142,7 @@ _sk_parametric_b_hsw:
   .byte  196,195,109,74,209,128              // vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,38,35,0,0           // vbroadcastss  0x2326(%rip),%ymm8        # 4988 <_sk_callback_hsw+0x310>
+  .byte  196,98,125,24,5,162,35,0,0          // vbroadcastss  0x23a2(%rip),%ymm8        # 4a04 <_sk_callback_hsw+0x30f>
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -11086,33 +11162,33 @@ _sk_parametric_a_hsw:
   .byte  196,66,101,168,211                  // vfmadd213ps   %ymm11,%ymm3,%ymm10
   .byte  196,226,125,24,24                   // vbroadcastss  (%rax),%ymm3
   .byte  196,65,124,91,218                   // vcvtdq2ps     %ymm10,%ymm11
-  .byte  196,98,125,24,37,222,34,0,0         // vbroadcastss  0x22de(%rip),%ymm12        # 498c <_sk_callback_hsw+0x314>
-  .byte  196,98,125,24,45,217,34,0,0         // vbroadcastss  0x22d9(%rip),%ymm13        # 4990 <_sk_callback_hsw+0x318>
+  .byte  196,98,125,24,37,90,35,0,0          // vbroadcastss  0x235a(%rip),%ymm12        # 4a08 <_sk_callback_hsw+0x313>
+  .byte  196,98,125,24,45,85,35,0,0          // vbroadcastss  0x2355(%rip),%ymm13        # 4a0c <_sk_callback_hsw+0x317>
   .byte  196,65,44,84,213                    // vandps        %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,207,34,0,0         // vbroadcastss  0x22cf(%rip),%ymm13        # 4994 <_sk_callback_hsw+0x31c>
+  .byte  196,98,125,24,45,75,35,0,0          // vbroadcastss  0x234b(%rip),%ymm13        # 4a10 <_sk_callback_hsw+0x31b>
   .byte  196,65,44,86,213                    // vorps         %ymm13,%ymm10,%ymm10
-  .byte  196,98,125,24,45,197,34,0,0         // vbroadcastss  0x22c5(%rip),%ymm13        # 4998 <_sk_callback_hsw+0x320>
+  .byte  196,98,125,24,45,65,35,0,0          // vbroadcastss  0x2341(%rip),%ymm13        # 4a14 <_sk_callback_hsw+0x31f>
   .byte  196,66,37,184,236                   // vfmadd231ps   %ymm12,%ymm11,%ymm13
-  .byte  196,98,125,24,29,187,34,0,0         // vbroadcastss  0x22bb(%rip),%ymm11        # 499c <_sk_callback_hsw+0x324>
+  .byte  196,98,125,24,29,55,35,0,0          // vbroadcastss  0x2337(%rip),%ymm11        # 4a18 <_sk_callback_hsw+0x323>
   .byte  196,66,45,172,221                   // vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  .byte  196,98,125,24,37,177,34,0,0         // vbroadcastss  0x22b1(%rip),%ymm12        # 49a0 <_sk_callback_hsw+0x328>
+  .byte  196,98,125,24,37,45,35,0,0          // vbroadcastss  0x232d(%rip),%ymm12        # 4a1c <_sk_callback_hsw+0x327>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,167,34,0,0         // vbroadcastss  0x22a7(%rip),%ymm12        # 49a4 <_sk_callback_hsw+0x32c>
+  .byte  196,98,125,24,37,35,35,0,0          // vbroadcastss  0x2323(%rip),%ymm12        # 4a20 <_sk_callback_hsw+0x32b>
   .byte  196,65,28,94,210                    // vdivps        %ymm10,%ymm12,%ymm10
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
   .byte  196,193,100,89,218                  // vmulps        %ymm10,%ymm3,%ymm3
   .byte  196,99,125,8,211,1                  // vroundps      $0x1,%ymm3,%ymm10
   .byte  196,65,100,92,210                   // vsubps        %ymm10,%ymm3,%ymm10
-  .byte  196,98,125,24,29,136,34,0,0         // vbroadcastss  0x2288(%rip),%ymm11        # 49a8 <_sk_callback_hsw+0x330>
+  .byte  196,98,125,24,29,4,35,0,0           // vbroadcastss  0x2304(%rip),%ymm11        # 4a24 <_sk_callback_hsw+0x32f>
   .byte  196,193,100,88,219                  // vaddps        %ymm11,%ymm3,%ymm3
-  .byte  196,98,125,24,29,126,34,0,0         // vbroadcastss  0x227e(%rip),%ymm11        # 49ac <_sk_callback_hsw+0x334>
+  .byte  196,98,125,24,29,250,34,0,0         // vbroadcastss  0x22fa(%rip),%ymm11        # 4a28 <_sk_callback_hsw+0x333>
   .byte  196,98,45,172,219                   // vfnmadd213ps  %ymm3,%ymm10,%ymm11
-  .byte  196,226,125,24,29,116,34,0,0        // vbroadcastss  0x2274(%rip),%ymm3        # 49b0 <_sk_callback_hsw+0x338>
+  .byte  196,226,125,24,29,240,34,0,0        // vbroadcastss  0x22f0(%rip),%ymm3        # 4a2c <_sk_callback_hsw+0x337>
   .byte  196,193,100,92,218                  // vsubps        %ymm10,%ymm3,%ymm3
-  .byte  196,98,125,24,21,106,34,0,0         // vbroadcastss  0x226a(%rip),%ymm10        # 49b4 <_sk_callback_hsw+0x33c>
+  .byte  196,98,125,24,21,230,34,0,0         // vbroadcastss  0x22e6(%rip),%ymm10        # 4a30 <_sk_callback_hsw+0x33b>
   .byte  197,172,94,219                      // vdivps        %ymm3,%ymm10,%ymm3
   .byte  197,164,88,219                      // vaddps        %ymm3,%ymm11,%ymm3
-  .byte  196,98,125,24,21,93,34,0,0          // vbroadcastss  0x225d(%rip),%ymm10        # 49b8 <_sk_callback_hsw+0x340>
+  .byte  196,98,125,24,21,217,34,0,0         // vbroadcastss  0x22d9(%rip),%ymm10        # 4a34 <_sk_callback_hsw+0x33f>
   .byte  196,193,100,89,218                  // vmulps        %ymm10,%ymm3,%ymm3
   .byte  197,253,91,219                      // vcvtps2dq     %ymm3,%ymm3
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -11120,7 +11196,7 @@ _sk_parametric_a_hsw:
   .byte  196,195,101,74,217,128              // vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,100,95,216                  // vmaxps        %ymm8,%ymm3,%ymm3
-  .byte  196,98,125,24,5,52,34,0,0           // vbroadcastss  0x2234(%rip),%ymm8        # 49bc <_sk_callback_hsw+0x344>
+  .byte  196,98,125,24,5,176,34,0,0          // vbroadcastss  0x22b0(%rip),%ymm8        # 4a38 <_sk_callback_hsw+0x343>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -11129,26 +11205,26 @@ HIDDEN _sk_lab_to_xyz_hsw
 .globl _sk_lab_to_xyz_hsw
 FUNCTION(_sk_lab_to_xyz_hsw)
 _sk_lab_to_xyz_hsw:
-  .byte  196,98,125,24,5,38,34,0,0           // vbroadcastss  0x2226(%rip),%ymm8        # 49c0 <_sk_callback_hsw+0x348>
-  .byte  196,98,125,24,13,33,34,0,0          // vbroadcastss  0x2221(%rip),%ymm9        # 49c4 <_sk_callback_hsw+0x34c>
-  .byte  196,98,125,24,21,28,34,0,0          // vbroadcastss  0x221c(%rip),%ymm10        # 49c8 <_sk_callback_hsw+0x350>
+  .byte  196,98,125,24,5,162,34,0,0          // vbroadcastss  0x22a2(%rip),%ymm8        # 4a3c <_sk_callback_hsw+0x347>
+  .byte  196,98,125,24,13,157,34,0,0         // vbroadcastss  0x229d(%rip),%ymm9        # 4a40 <_sk_callback_hsw+0x34b>
+  .byte  196,98,125,24,21,152,34,0,0         // vbroadcastss  0x2298(%rip),%ymm10        # 4a44 <_sk_callback_hsw+0x34f>
   .byte  196,194,53,168,202                  // vfmadd213ps   %ymm10,%ymm9,%ymm1
   .byte  196,194,53,168,210                  // vfmadd213ps   %ymm10,%ymm9,%ymm2
-  .byte  196,98,125,24,13,13,34,0,0          // vbroadcastss  0x220d(%rip),%ymm9        # 49cc <_sk_callback_hsw+0x354>
+  .byte  196,98,125,24,13,137,34,0,0         // vbroadcastss  0x2289(%rip),%ymm9        # 4a48 <_sk_callback_hsw+0x353>
   .byte  196,66,125,184,200                  // vfmadd231ps   %ymm8,%ymm0,%ymm9
-  .byte  196,226,125,24,5,3,34,0,0           // vbroadcastss  0x2203(%rip),%ymm0        # 49d0 <_sk_callback_hsw+0x358>
+  .byte  196,226,125,24,5,127,34,0,0         // vbroadcastss  0x227f(%rip),%ymm0        # 4a4c <_sk_callback_hsw+0x357>
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
-  .byte  196,98,125,24,5,250,33,0,0          // vbroadcastss  0x21fa(%rip),%ymm8        # 49d4 <_sk_callback_hsw+0x35c>
+  .byte  196,98,125,24,5,118,34,0,0          // vbroadcastss  0x2276(%rip),%ymm8        # 4a50 <_sk_callback_hsw+0x35b>
   .byte  196,98,117,168,192                  // vfmadd213ps   %ymm0,%ymm1,%ymm8
-  .byte  196,98,125,24,13,240,33,0,0         // vbroadcastss  0x21f0(%rip),%ymm9        # 49d8 <_sk_callback_hsw+0x360>
+  .byte  196,98,125,24,13,108,34,0,0         // vbroadcastss  0x226c(%rip),%ymm9        # 4a54 <_sk_callback_hsw+0x35f>
   .byte  196,98,109,172,200                  // vfnmadd213ps  %ymm0,%ymm2,%ymm9
   .byte  196,193,60,89,200                   // vmulps        %ymm8,%ymm8,%ymm1
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
-  .byte  196,226,125,24,21,221,33,0,0        // vbroadcastss  0x21dd(%rip),%ymm2        # 49dc <_sk_callback_hsw+0x364>
+  .byte  196,226,125,24,21,89,34,0,0         // vbroadcastss  0x2259(%rip),%ymm2        # 4a58 <_sk_callback_hsw+0x363>
   .byte  197,108,194,209,1                   // vcmpltps      %ymm1,%ymm2,%ymm10
-  .byte  196,98,125,24,29,211,33,0,0         // vbroadcastss  0x21d3(%rip),%ymm11        # 49e0 <_sk_callback_hsw+0x368>
+  .byte  196,98,125,24,29,79,34,0,0          // vbroadcastss  0x224f(%rip),%ymm11        # 4a5c <_sk_callback_hsw+0x367>
   .byte  196,65,60,88,195                    // vaddps        %ymm11,%ymm8,%ymm8
-  .byte  196,98,125,24,37,201,33,0,0         // vbroadcastss  0x21c9(%rip),%ymm12        # 49e4 <_sk_callback_hsw+0x36c>
+  .byte  196,98,125,24,37,69,34,0,0          // vbroadcastss  0x2245(%rip),%ymm12        # 4a60 <_sk_callback_hsw+0x36b>
   .byte  196,65,60,89,196                    // vmulps        %ymm12,%ymm8,%ymm8
   .byte  196,99,61,74,193,160                // vblendvps     %ymm10,%ymm1,%ymm8,%ymm8
   .byte  197,252,89,200                      // vmulps        %ymm0,%ymm0,%ymm1
@@ -11163,9 +11239,9 @@ _sk_lab_to_xyz_hsw:
   .byte  196,65,52,88,203                    // vaddps        %ymm11,%ymm9,%ymm9
   .byte  196,65,52,89,204                    // vmulps        %ymm12,%ymm9,%ymm9
   .byte  196,227,53,74,208,32                // vblendvps     %ymm2,%ymm0,%ymm9,%ymm2
-  .byte  196,226,125,24,5,126,33,0,0         // vbroadcastss  0x217e(%rip),%ymm0        # 49e8 <_sk_callback_hsw+0x370>
+  .byte  196,226,125,24,5,250,33,0,0         // vbroadcastss  0x21fa(%rip),%ymm0        # 4a64 <_sk_callback_hsw+0x36f>
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
-  .byte  196,98,125,24,5,117,33,0,0          // vbroadcastss  0x2175(%rip),%ymm8        # 49ec <_sk_callback_hsw+0x374>
+  .byte  196,98,125,24,5,241,33,0,0          // vbroadcastss  0x21f1(%rip),%ymm8        # 4a68 <_sk_callback_hsw+0x373>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -11183,7 +11259,7 @@ _sk_load_a8_hsw:
   .byte  197,250,126,0                       // vmovq         (%rax),%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,74,33,0,0         // vbroadcastss  0x214a(%rip),%ymm1        # 49f0 <_sk_callback_hsw+0x378>
+  .byte  196,226,125,24,13,198,33,0,0        // vbroadcastss  0x21c6(%rip),%ymm1        # 4a6c <_sk_callback_hsw+0x377>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -11248,7 +11324,7 @@ _sk_gather_a8_hsw:
   .byte  196,227,121,32,192,7                // vpinsrb       $0x7,%eax,%xmm0,%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,85,32,0,0         // vbroadcastss  0x2055(%rip),%ymm1        # 49f4 <_sk_callback_hsw+0x37c>
+  .byte  196,226,125,24,13,209,32,0,0        // vbroadcastss  0x20d1(%rip),%ymm1        # 4a70 <_sk_callback_hsw+0x37b>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -11266,7 +11342,7 @@ FUNCTION(_sk_store_a8_hsw)
 _sk_store_a8_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,48,32,0,0           // vbroadcastss  0x2030(%rip),%ymm8        # 49f8 <_sk_callback_hsw+0x380>
+  .byte  196,98,125,24,5,172,32,0,0          // vbroadcastss  0x20ac(%rip),%ymm8        # 4a74 <_sk_callback_hsw+0x37f>
   .byte  196,65,100,89,192                   // vmulps        %ymm8,%ymm3,%ymm8
   .byte  196,65,125,91,192                   // vcvtps2dq     %ymm8,%ymm8
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
@@ -11333,10 +11409,10 @@ _sk_load_g8_hsw:
   .byte  197,250,126,0                       // vmovq         (%rax),%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,102,31,0,0        // vbroadcastss  0x1f66(%rip),%ymm1        # 49fc <_sk_callback_hsw+0x384>
+  .byte  196,226,125,24,13,226,31,0,0        // vbroadcastss  0x1fe2(%rip),%ymm1        # 4a78 <_sk_callback_hsw+0x383>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,91,31,0,0         // vbroadcastss  0x1f5b(%rip),%ymm3        # 4a00 <_sk_callback_hsw+0x388>
+  .byte  196,226,125,24,29,215,31,0,0        // vbroadcastss  0x1fd7(%rip),%ymm3        # 4a7c <_sk_callback_hsw+0x387>
   .byte  76,137,193                          // mov           %r8,%rcx
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
@@ -11398,10 +11474,10 @@ _sk_gather_g8_hsw:
   .byte  196,227,121,32,192,7                // vpinsrb       $0x7,%eax,%xmm0,%xmm0
   .byte  196,226,125,49,192                  // vpmovzxbd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,112,30,0,0        // vbroadcastss  0x1e70(%rip),%ymm1        # 4a04 <_sk_callback_hsw+0x38c>
+  .byte  196,226,125,24,13,236,30,0,0        // vbroadcastss  0x1eec(%rip),%ymm1        # 4a80 <_sk_callback_hsw+0x38b>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,101,30,0,0        // vbroadcastss  0x1e65(%rip),%ymm3        # 4a08 <_sk_callback_hsw+0x390>
+  .byte  196,226,125,24,29,225,30,0,0        // vbroadcastss  0x1ee1(%rip),%ymm3        # 4a84 <_sk_callback_hsw+0x38f>
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
   .byte  91                                  // pop           %rbx
@@ -11457,14 +11533,14 @@ _sk_gather_i8_hsw:
   .byte  73,139,64,8                         // mov           0x8(%r8),%rax
   .byte  197,245,118,201                     // vpcmpeqd      %ymm1,%ymm1,%ymm1
   .byte  196,226,117,144,28,128              // vpgatherdd    %ymm1,(%rax,%ymm0,4),%ymm3
-  .byte  197,229,219,5,109,31,0,0            // vpand         0x1f6d(%rip),%ymm3,%ymm0        # 4be0 <_sk_callback_hsw+0x568>
+  .byte  197,229,219,5,237,31,0,0            // vpand         0x1fed(%rip),%ymm3,%ymm0        # 4c60 <_sk_callback_hsw+0x56b>
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,5,140,29,0,0          // vbroadcastss  0x1d8c(%rip),%ymm8        # 4a0c <_sk_callback_hsw+0x394>
+  .byte  196,98,125,24,5,8,30,0,0            // vbroadcastss  0x1e08(%rip),%ymm8        # 4a88 <_sk_callback_hsw+0x393>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,226,101,0,13,114,31,0,0         // vpshufb       0x1f72(%rip),%ymm3,%ymm1        # 4c00 <_sk_callback_hsw+0x588>
+  .byte  196,226,101,0,13,242,31,0,0         // vpshufb       0x1ff2(%rip),%ymm3,%ymm1        # 4c80 <_sk_callback_hsw+0x58b>
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  196,226,101,0,21,128,31,0,0         // vpshufb       0x1f80(%rip),%ymm3,%ymm2        # 4c20 <_sk_callback_hsw+0x5a8>
+  .byte  196,226,101,0,21,0,32,0,0           // vpshufb       0x2000(%rip),%ymm3,%ymm2        # 4ca0 <_sk_callback_hsw+0x5ab>
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  197,229,114,211,24                  // vpsrld        $0x18,%ymm3,%ymm3
@@ -11488,23 +11564,23 @@ _sk_load_565_hsw:
   .byte  117,114                             // jne           2d40 <_sk_load_565_hsw+0x7c>
   .byte  196,193,122,111,4,122               // vmovdqu       (%r10,%rdi,2),%xmm0
   .byte  196,226,125,51,208                  // vpmovzxwd     %xmm0,%ymm2
-  .byte  196,226,125,88,5,46,29,0,0          // vpbroadcastd  0x1d2e(%rip),%ymm0        # 4a10 <_sk_callback_hsw+0x398>
+  .byte  196,226,125,88,5,170,29,0,0         // vpbroadcastd  0x1daa(%rip),%ymm0        # 4a8c <_sk_callback_hsw+0x397>
   .byte  197,237,219,192                     // vpand         %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,33,29,0,0         // vbroadcastss  0x1d21(%rip),%ymm1        # 4a14 <_sk_callback_hsw+0x39c>
+  .byte  196,226,125,24,13,157,29,0,0        // vbroadcastss  0x1d9d(%rip),%ymm1        # 4a90 <_sk_callback_hsw+0x39b>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,88,13,24,29,0,0         // vpbroadcastd  0x1d18(%rip),%ymm1        # 4a18 <_sk_callback_hsw+0x3a0>
+  .byte  196,226,125,88,13,148,29,0,0        // vpbroadcastd  0x1d94(%rip),%ymm1        # 4a94 <_sk_callback_hsw+0x39f>
   .byte  197,237,219,201                     // vpand         %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,11,29,0,0         // vbroadcastss  0x1d0b(%rip),%ymm3        # 4a1c <_sk_callback_hsw+0x3a4>
+  .byte  196,226,125,24,29,135,29,0,0        // vbroadcastss  0x1d87(%rip),%ymm3        # 4a98 <_sk_callback_hsw+0x3a3>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,88,29,2,29,0,0          // vpbroadcastd  0x1d02(%rip),%ymm3        # 4a20 <_sk_callback_hsw+0x3a8>
+  .byte  196,226,125,88,29,126,29,0,0        // vpbroadcastd  0x1d7e(%rip),%ymm3        # 4a9c <_sk_callback_hsw+0x3a7>
   .byte  197,237,219,211                     // vpand         %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,245,28,0,0        // vbroadcastss  0x1cf5(%rip),%ymm3        # 4a24 <_sk_callback_hsw+0x3ac>
+  .byte  196,226,125,24,29,113,29,0,0        // vbroadcastss  0x1d71(%rip),%ymm3        # 4aa0 <_sk_callback_hsw+0x3ab>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,234,28,0,0        // vbroadcastss  0x1cea(%rip),%ymm3        # 4a28 <_sk_callback_hsw+0x3b0>
+  .byte  196,226,125,24,29,102,29,0,0        // vbroadcastss  0x1d66(%rip),%ymm3        # 4aa4 <_sk_callback_hsw+0x3af>
   .byte  255,224                             // jmpq          *%rax
   .byte  65,137,200                          // mov           %ecx,%r8d
   .byte  65,128,224,7                        // and           $0x7,%r8b
@@ -11595,23 +11671,23 @@ _sk_gather_565_hsw:
   .byte  65,15,183,4,88                      // movzwl        (%r8,%rbx,2),%eax
   .byte  197,249,196,192,7                   // vpinsrw       $0x7,%eax,%xmm0,%xmm0
   .byte  196,226,125,51,208                  // vpmovzxwd     %xmm0,%ymm2
-  .byte  196,226,125,88,5,173,27,0,0         // vpbroadcastd  0x1bad(%rip),%ymm0        # 4a2c <_sk_callback_hsw+0x3b4>
+  .byte  196,226,125,88,5,41,28,0,0          // vpbroadcastd  0x1c29(%rip),%ymm0        # 4aa8 <_sk_callback_hsw+0x3b3>
   .byte  197,237,219,192                     // vpand         %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,160,27,0,0        // vbroadcastss  0x1ba0(%rip),%ymm1        # 4a30 <_sk_callback_hsw+0x3b8>
+  .byte  196,226,125,24,13,28,28,0,0         // vbroadcastss  0x1c1c(%rip),%ymm1        # 4aac <_sk_callback_hsw+0x3b7>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,88,13,151,27,0,0        // vpbroadcastd  0x1b97(%rip),%ymm1        # 4a34 <_sk_callback_hsw+0x3bc>
+  .byte  196,226,125,88,13,19,28,0,0         // vpbroadcastd  0x1c13(%rip),%ymm1        # 4ab0 <_sk_callback_hsw+0x3bb>
   .byte  197,237,219,201                     // vpand         %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,138,27,0,0        // vbroadcastss  0x1b8a(%rip),%ymm3        # 4a38 <_sk_callback_hsw+0x3c0>
+  .byte  196,226,125,24,29,6,28,0,0          // vbroadcastss  0x1c06(%rip),%ymm3        # 4ab4 <_sk_callback_hsw+0x3bf>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,88,29,129,27,0,0        // vpbroadcastd  0x1b81(%rip),%ymm3        # 4a3c <_sk_callback_hsw+0x3c4>
+  .byte  196,226,125,88,29,253,27,0,0        // vpbroadcastd  0x1bfd(%rip),%ymm3        # 4ab8 <_sk_callback_hsw+0x3c3>
   .byte  197,237,219,211                     // vpand         %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,116,27,0,0        // vbroadcastss  0x1b74(%rip),%ymm3        # 4a40 <_sk_callback_hsw+0x3c8>
+  .byte  196,226,125,24,29,240,27,0,0        // vbroadcastss  0x1bf0(%rip),%ymm3        # 4abc <_sk_callback_hsw+0x3c7>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,105,27,0,0        // vbroadcastss  0x1b69(%rip),%ymm3        # 4a44 <_sk_callback_hsw+0x3cc>
+  .byte  196,226,125,24,29,229,27,0,0        // vbroadcastss  0x1be5(%rip),%ymm3        # 4ac0 <_sk_callback_hsw+0x3cb>
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,94                               // pop           %r14
@@ -11624,11 +11700,11 @@ FUNCTION(_sk_store_565_hsw)
 _sk_store_565_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,86,27,0,0           // vbroadcastss  0x1b56(%rip),%ymm8        # 4a48 <_sk_callback_hsw+0x3d0>
+  .byte  196,98,125,24,5,210,27,0,0          // vbroadcastss  0x1bd2(%rip),%ymm8        # 4ac4 <_sk_callback_hsw+0x3cf>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,53,114,241,11               // vpslld        $0xb,%ymm9,%ymm9
-  .byte  196,98,125,24,21,65,27,0,0          // vbroadcastss  0x1b41(%rip),%ymm10        # 4a4c <_sk_callback_hsw+0x3d4>
+  .byte  196,98,125,24,21,189,27,0,0         // vbroadcastss  0x1bbd(%rip),%ymm10        # 4ac8 <_sk_callback_hsw+0x3d3>
   .byte  196,65,116,89,210                   // vmulps        %ymm10,%ymm1,%ymm10
   .byte  196,65,125,91,210                   // vcvtps2dq     %ymm10,%ymm10
   .byte  196,193,45,114,242,5                // vpslld        $0x5,%ymm10,%ymm10
@@ -11696,25 +11772,25 @@ _sk_load_4444_hsw:
   .byte  15,133,138,0,0,0                    // jne           305c <_sk_load_4444_hsw+0x98>
   .byte  196,193,122,111,4,122               // vmovdqu       (%r10,%rdi,2),%xmm0
   .byte  196,226,125,51,216                  // vpmovzxwd     %xmm0,%ymm3
-  .byte  196,226,125,88,5,106,26,0,0         // vpbroadcastd  0x1a6a(%rip),%ymm0        # 4a50 <_sk_callback_hsw+0x3d8>
+  .byte  196,226,125,88,5,230,26,0,0         // vpbroadcastd  0x1ae6(%rip),%ymm0        # 4acc <_sk_callback_hsw+0x3d7>
   .byte  197,229,219,192                     // vpand         %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,93,26,0,0         // vbroadcastss  0x1a5d(%rip),%ymm1        # 4a54 <_sk_callback_hsw+0x3dc>
+  .byte  196,226,125,24,13,217,26,0,0        // vbroadcastss  0x1ad9(%rip),%ymm1        # 4ad0 <_sk_callback_hsw+0x3db>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,88,13,84,26,0,0         // vpbroadcastd  0x1a54(%rip),%ymm1        # 4a58 <_sk_callback_hsw+0x3e0>
+  .byte  196,226,125,88,13,208,26,0,0        // vpbroadcastd  0x1ad0(%rip),%ymm1        # 4ad4 <_sk_callback_hsw+0x3df>
   .byte  197,229,219,201                     // vpand         %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,71,26,0,0         // vbroadcastss  0x1a47(%rip),%ymm2        # 4a5c <_sk_callback_hsw+0x3e4>
+  .byte  196,226,125,24,21,195,26,0,0        // vbroadcastss  0x1ac3(%rip),%ymm2        # 4ad8 <_sk_callback_hsw+0x3e3>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,88,21,62,26,0,0         // vpbroadcastd  0x1a3e(%rip),%ymm2        # 4a60 <_sk_callback_hsw+0x3e8>
+  .byte  196,226,125,88,21,186,26,0,0        // vpbroadcastd  0x1aba(%rip),%ymm2        # 4adc <_sk_callback_hsw+0x3e7>
   .byte  197,229,219,210                     // vpand         %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,49,26,0,0           // vbroadcastss  0x1a31(%rip),%ymm8        # 4a64 <_sk_callback_hsw+0x3ec>
+  .byte  196,98,125,24,5,173,26,0,0          // vbroadcastss  0x1aad(%rip),%ymm8        # 4ae0 <_sk_callback_hsw+0x3eb>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,88,5,39,26,0,0           // vpbroadcastd  0x1a27(%rip),%ymm8        # 4a68 <_sk_callback_hsw+0x3f0>
+  .byte  196,98,125,88,5,163,26,0,0          // vpbroadcastd  0x1aa3(%rip),%ymm8        # 4ae4 <_sk_callback_hsw+0x3ef>
   .byte  196,193,101,219,216                 // vpand         %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,25,26,0,0           // vbroadcastss  0x1a19(%rip),%ymm8        # 4a6c <_sk_callback_hsw+0x3f4>
+  .byte  196,98,125,24,5,149,26,0,0          // vbroadcastss  0x1a95(%rip),%ymm8        # 4ae8 <_sk_callback_hsw+0x3f3>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -11807,25 +11883,25 @@ _sk_gather_4444_hsw:
   .byte  65,15,183,4,88                      // movzwl        (%r8,%rbx,2),%eax
   .byte  197,249,196,192,7                   // vpinsrw       $0x7,%eax,%xmm0,%xmm0
   .byte  196,226,125,51,216                  // vpmovzxwd     %xmm0,%ymm3
-  .byte  196,226,125,88,5,209,24,0,0         // vpbroadcastd  0x18d1(%rip),%ymm0        # 4a70 <_sk_callback_hsw+0x3f8>
+  .byte  196,226,125,88,5,77,25,0,0          // vpbroadcastd  0x194d(%rip),%ymm0        # 4aec <_sk_callback_hsw+0x3f7>
   .byte  197,229,219,192                     // vpand         %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,196,24,0,0        // vbroadcastss  0x18c4(%rip),%ymm1        # 4a74 <_sk_callback_hsw+0x3fc>
+  .byte  196,226,125,24,13,64,25,0,0         // vbroadcastss  0x1940(%rip),%ymm1        # 4af0 <_sk_callback_hsw+0x3fb>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,88,13,187,24,0,0        // vpbroadcastd  0x18bb(%rip),%ymm1        # 4a78 <_sk_callback_hsw+0x400>
+  .byte  196,226,125,88,13,55,25,0,0         // vpbroadcastd  0x1937(%rip),%ymm1        # 4af4 <_sk_callback_hsw+0x3ff>
   .byte  197,229,219,201                     // vpand         %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,174,24,0,0        // vbroadcastss  0x18ae(%rip),%ymm2        # 4a7c <_sk_callback_hsw+0x404>
+  .byte  196,226,125,24,21,42,25,0,0         // vbroadcastss  0x192a(%rip),%ymm2        # 4af8 <_sk_callback_hsw+0x403>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,88,21,165,24,0,0        // vpbroadcastd  0x18a5(%rip),%ymm2        # 4a80 <_sk_callback_hsw+0x408>
+  .byte  196,226,125,88,21,33,25,0,0         // vpbroadcastd  0x1921(%rip),%ymm2        # 4afc <_sk_callback_hsw+0x407>
   .byte  197,229,219,210                     // vpand         %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,152,24,0,0          // vbroadcastss  0x1898(%rip),%ymm8        # 4a84 <_sk_callback_hsw+0x40c>
+  .byte  196,98,125,24,5,20,25,0,0           // vbroadcastss  0x1914(%rip),%ymm8        # 4b00 <_sk_callback_hsw+0x40b>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,88,5,142,24,0,0          // vpbroadcastd  0x188e(%rip),%ymm8        # 4a88 <_sk_callback_hsw+0x410>
+  .byte  196,98,125,88,5,10,25,0,0           // vpbroadcastd  0x190a(%rip),%ymm8        # 4b04 <_sk_callback_hsw+0x40f>
   .byte  196,193,101,219,216                 // vpand         %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,128,24,0,0          // vbroadcastss  0x1880(%rip),%ymm8        # 4a8c <_sk_callback_hsw+0x414>
+  .byte  196,98,125,24,5,252,24,0,0          // vbroadcastss  0x18fc(%rip),%ymm8        # 4b08 <_sk_callback_hsw+0x413>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -11840,7 +11916,7 @@ FUNCTION(_sk_store_4444_hsw)
 _sk_store_4444_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,102,24,0,0          // vbroadcastss  0x1866(%rip),%ymm8        # 4a90 <_sk_callback_hsw+0x418>
+  .byte  196,98,125,24,5,226,24,0,0          // vbroadcastss  0x18e2(%rip),%ymm8        # 4b0c <_sk_callback_hsw+0x417>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,53,114,241,12               // vpslld        $0xc,%ymm9,%ymm9
@@ -11916,14 +11992,14 @@ _sk_load_8888_hsw:
   .byte  77,133,192                          // test          %r8,%r8
   .byte  117,88                              // jne           3375 <_sk_load_8888_hsw+0x6d>
   .byte  196,193,126,111,25                  // vmovdqu       (%r9),%ymm3
-  .byte  197,229,219,5,22,25,0,0             // vpand         0x1916(%rip),%ymm3,%ymm0        # 4c40 <_sk_callback_hsw+0x5c8>
+  .byte  197,229,219,5,150,25,0,0            // vpand         0x1996(%rip),%ymm3,%ymm0        # 4cc0 <_sk_callback_hsw+0x5cb>
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,5,93,23,0,0           // vbroadcastss  0x175d(%rip),%ymm8        # 4a94 <_sk_callback_hsw+0x41c>
+  .byte  196,98,125,24,5,217,23,0,0          // vbroadcastss  0x17d9(%rip),%ymm8        # 4b10 <_sk_callback_hsw+0x41b>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,226,101,0,13,27,25,0,0          // vpshufb       0x191b(%rip),%ymm3,%ymm1        # 4c60 <_sk_callback_hsw+0x5e8>
+  .byte  196,226,101,0,13,155,25,0,0         // vpshufb       0x199b(%rip),%ymm3,%ymm1        # 4ce0 <_sk_callback_hsw+0x5eb>
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  196,226,101,0,21,41,25,0,0          // vpshufb       0x1929(%rip),%ymm3,%ymm2        # 4c80 <_sk_callback_hsw+0x608>
+  .byte  196,226,101,0,21,169,25,0,0         // vpshufb       0x19a9(%rip),%ymm3,%ymm2        # 4d00 <_sk_callback_hsw+0x60b>
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  197,229,114,211,24                  // vpsrld        $0x18,%ymm3,%ymm3
@@ -11955,14 +12031,14 @@ _sk_gather_8888_hsw:
   .byte  197,245,254,192                     // vpaddd        %ymm0,%ymm1,%ymm0
   .byte  197,245,118,201                     // vpcmpeqd      %ymm1,%ymm1,%ymm1
   .byte  196,194,117,144,28,128              // vpgatherdd    %ymm1,(%r8,%ymm0,4),%ymm3
-  .byte  197,229,219,5,215,24,0,0            // vpand         0x18d7(%rip),%ymm3,%ymm0        # 4ca0 <_sk_callback_hsw+0x628>
+  .byte  197,229,219,5,87,25,0,0             // vpand         0x1957(%rip),%ymm3,%ymm0        # 4d20 <_sk_callback_hsw+0x62b>
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,5,194,22,0,0          // vbroadcastss  0x16c2(%rip),%ymm8        # 4a98 <_sk_callback_hsw+0x420>
+  .byte  196,98,125,24,5,62,23,0,0           // vbroadcastss  0x173e(%rip),%ymm8        # 4b14 <_sk_callback_hsw+0x41f>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,226,101,0,13,220,24,0,0         // vpshufb       0x18dc(%rip),%ymm3,%ymm1        # 4cc0 <_sk_callback_hsw+0x648>
+  .byte  196,226,101,0,13,92,25,0,0          // vpshufb       0x195c(%rip),%ymm3,%ymm1        # 4d40 <_sk_callback_hsw+0x64b>
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  196,226,101,0,21,234,24,0,0         // vpshufb       0x18ea(%rip),%ymm3,%ymm2        # 4ce0 <_sk_callback_hsw+0x668>
+  .byte  196,226,101,0,21,106,25,0,0         // vpshufb       0x196a(%rip),%ymm3,%ymm2        # 4d60 <_sk_callback_hsw+0x66b>
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  197,229,114,211,24                  // vpsrld        $0x18,%ymm3,%ymm3
@@ -11979,7 +12055,7 @@ _sk_store_8888_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,141,12,189,0,0,0,0               // lea           0x0(,%rdi,4),%r9
   .byte  76,3,8                              // add           (%rax),%r9
-  .byte  196,98,125,24,5,114,22,0,0          // vbroadcastss  0x1672(%rip),%ymm8        # 4a9c <_sk_callback_hsw+0x424>
+  .byte  196,98,125,24,5,238,22,0,0          // vbroadcastss  0x16ee(%rip),%ymm8        # 4b18 <_sk_callback_hsw+0x423>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,65,116,89,208                   // vmulps        %ymm8,%ymm1,%ymm10
@@ -12176,7 +12252,7 @@ _sk_load_u16_be_hsw:
   .byte  197,241,235,192                     // vpor          %xmm0,%xmm1,%xmm0
   .byte  196,226,125,51,192                  // vpmovzxwd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,21,105,19,0,0         // vbroadcastss  0x1369(%rip),%ymm10        # 4aa0 <_sk_callback_hsw+0x428>
+  .byte  196,98,125,24,21,229,19,0,0         // vbroadcastss  0x13e5(%rip),%ymm10        # 4b1c <_sk_callback_hsw+0x427>
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  197,185,109,202                     // vpunpckhqdq   %xmm2,%xmm8,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -12260,7 +12336,7 @@ _sk_load_rgb_u16_be_hsw:
   .byte  197,241,235,192                     // vpor          %xmm0,%xmm1,%xmm0
   .byte  196,226,125,51,192                  // vpmovzxwd     %xmm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,21,250,17,0,0         // vbroadcastss  0x11fa(%rip),%ymm10        # 4aa4 <_sk_callback_hsw+0x42c>
+  .byte  196,98,125,24,21,118,18,0,0         // vbroadcastss  0x1276(%rip),%ymm10        # 4b20 <_sk_callback_hsw+0x42b>
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  197,185,109,202                     // vpunpckhqdq   %xmm2,%xmm8,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -12277,7 +12353,7 @@ _sk_load_rgb_u16_be_hsw:
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,210                  // vmulps        %ymm10,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,174,17,0,0        // vbroadcastss  0x11ae(%rip),%ymm3        # 4aa8 <_sk_callback_hsw+0x430>
+  .byte  196,226,125,24,29,42,18,0,0         // vbroadcastss  0x122a(%rip),%ymm3        # 4b24 <_sk_callback_hsw+0x42f>
   .byte  255,224                             // jmpq          *%rax
   .byte  196,193,121,110,4,64                // vmovd         (%r8,%rax,2),%xmm0
   .byte  196,193,121,196,68,64,4,2           // vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
@@ -12320,7 +12396,7 @@ _sk_store_u16_be_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,189,0,0,0,0                // lea           0x0(,%rdi,4),%rax
-  .byte  196,98,125,24,5,235,16,0,0          // vbroadcastss  0x10eb(%rip),%ymm8        # 4aac <_sk_callback_hsw+0x434>
+  .byte  196,98,125,24,5,103,17,0,0          // vbroadcastss  0x1167(%rip),%ymm8        # 4b28 <_sk_callback_hsw+0x433>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,67,125,25,202,1                 // vextractf128  $0x1,%ymm9,%xmm10
@@ -12580,11 +12656,11 @@ HIDDEN _sk_luminance_to_alpha_hsw
 .globl _sk_luminance_to_alpha_hsw
 FUNCTION(_sk_luminance_to_alpha_hsw)
 _sk_luminance_to_alpha_hsw:
-  .byte  196,226,125,24,29,59,13,0,0         // vbroadcastss  0xd3b(%rip),%ymm3        # 4ab0 <_sk_callback_hsw+0x438>
-  .byte  196,98,125,24,5,54,13,0,0           // vbroadcastss  0xd36(%rip),%ymm8        # 4ab4 <_sk_callback_hsw+0x43c>
+  .byte  196,226,125,24,29,183,13,0,0        // vbroadcastss  0xdb7(%rip),%ymm3        # 4b2c <_sk_callback_hsw+0x437>
+  .byte  196,98,125,24,5,178,13,0,0          // vbroadcastss  0xdb2(%rip),%ymm8        # 4b30 <_sk_callback_hsw+0x43b>
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  196,226,125,184,203                 // vfmadd231ps   %ymm3,%ymm0,%ymm1
-  .byte  196,226,125,24,29,39,13,0,0         // vbroadcastss  0xd27(%rip),%ymm3        # 4ab8 <_sk_callback_hsw+0x440>
+  .byte  196,226,125,24,29,163,13,0,0        // vbroadcastss  0xda3(%rip),%ymm3        # 4b34 <_sk_callback_hsw+0x43f>
   .byte  196,226,109,168,217                 // vfmadd213ps   %ymm1,%ymm2,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -12692,6 +12768,36 @@ _sk_matrix_4x5_hsw:
   .byte  197,124,41,219                      // vmovaps       %ymm11,%ymm3
   .byte  255,224                             // jmpq          *%rax
 
+HIDDEN _sk_matrix_4x3_hsw
+.globl _sk_matrix_4x3_hsw
+FUNCTION(_sk_matrix_4x3_hsw)
+_sk_matrix_4x3_hsw:
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  196,226,125,24,16                   // vbroadcastss  (%rax),%ymm2
+  .byte  196,226,125,24,88,16                // vbroadcastss  0x10(%rax),%ymm3
+  .byte  196,98,125,24,64,32                 // vbroadcastss  0x20(%rax),%ymm8
+  .byte  196,98,117,184,195                  // vfmadd231ps   %ymm3,%ymm1,%ymm8
+  .byte  196,98,125,184,194                  // vfmadd231ps   %ymm2,%ymm0,%ymm8
+  .byte  196,226,125,24,80,4                 // vbroadcastss  0x4(%rax),%ymm2
+  .byte  196,226,125,24,88,20                // vbroadcastss  0x14(%rax),%ymm3
+  .byte  196,98,125,24,72,36                 // vbroadcastss  0x24(%rax),%ymm9
+  .byte  196,98,117,184,203                  // vfmadd231ps   %ymm3,%ymm1,%ymm9
+  .byte  196,98,125,184,202                  // vfmadd231ps   %ymm2,%ymm0,%ymm9
+  .byte  196,226,125,24,88,8                 // vbroadcastss  0x8(%rax),%ymm3
+  .byte  196,98,125,24,80,24                 // vbroadcastss  0x18(%rax),%ymm10
+  .byte  196,226,125,24,80,40                // vbroadcastss  0x28(%rax),%ymm2
+  .byte  196,194,117,184,210                 // vfmadd231ps   %ymm10,%ymm1,%ymm2
+  .byte  196,226,125,184,211                 // vfmadd231ps   %ymm3,%ymm0,%ymm2
+  .byte  196,98,125,24,80,12                 // vbroadcastss  0xc(%rax),%ymm10
+  .byte  196,98,125,24,88,28                 // vbroadcastss  0x1c(%rax),%ymm11
+  .byte  196,226,125,24,88,44                // vbroadcastss  0x2c(%rax),%ymm3
+  .byte  196,194,117,184,219                 // vfmadd231ps   %ymm11,%ymm1,%ymm3
+  .byte  196,194,125,184,218                 // vfmadd231ps   %ymm10,%ymm0,%ymm3
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  197,124,41,192                      // vmovaps       %ymm8,%ymm0
+  .byte  197,124,41,201                      // vmovaps       %ymm9,%ymm1
+  .byte  255,224                             // jmpq          *%rax
+
 HIDDEN _sk_matrix_perspective_hsw
 .globl _sk_matrix_perspective_hsw
 FUNCTION(_sk_matrix_perspective_hsw)
@@ -12727,9 +12833,9 @@ _sk_evenly_spaced_gradient_hsw:
   .byte  76,139,64,8                         // mov           0x8(%rax),%r8
   .byte  77,137,202                          // mov           %r9,%r10
   .byte  73,255,202                          // dec           %r10
-  .byte  120,7                               // js            3fcc <_sk_evenly_spaced_gradient_hsw+0x18>
+  .byte  120,7                               // js            4049 <_sk_evenly_spaced_gradient_hsw+0x18>
   .byte  196,193,242,42,202                  // vcvtsi2ss     %r10,%xmm1,%xmm1
-  .byte  235,22                              // jmp           3fe2 <_sk_evenly_spaced_gradient_hsw+0x2e>
+  .byte  235,22                              // jmp           405f <_sk_evenly_spaced_gradient_hsw+0x2e>
   .byte  77,137,211                          // mov           %r10,%r11
   .byte  73,209,235                          // shr           %r11
   .byte  65,131,226,1                        // and           $0x1,%r10d
@@ -12740,7 +12846,7 @@ _sk_evenly_spaced_gradient_hsw:
   .byte  197,244,89,200                      // vmulps        %ymm0,%ymm1,%ymm1
   .byte  197,126,91,217                      // vcvttps2dq    %ymm1,%ymm11
   .byte  73,131,249,8                        // cmp           $0x8,%r9
-  .byte  119,70                              // ja            403b <_sk_evenly_spaced_gradient_hsw+0x87>
+  .byte  119,70                              // ja            40b8 <_sk_evenly_spaced_gradient_hsw+0x87>
   .byte  196,66,37,22,0                      // vpermps       (%r8),%ymm11,%ymm8
   .byte  76,139,64,40                        // mov           0x28(%rax),%r8
   .byte  196,66,37,22,8                      // vpermps       (%r8),%ymm11,%ymm9
@@ -12756,7 +12862,7 @@ _sk_evenly_spaced_gradient_hsw:
   .byte  196,194,37,22,24                    // vpermps       (%r8),%ymm11,%ymm3
   .byte  72,139,64,64                        // mov           0x40(%rax),%rax
   .byte  196,98,37,22,40                     // vpermps       (%rax),%ymm11,%ymm13
-  .byte  235,110                             // jmp           40a9 <_sk_evenly_spaced_gradient_hsw+0xf5>
+  .byte  235,110                             // jmp           4126 <_sk_evenly_spaced_gradient_hsw+0xf5>
   .byte  196,65,13,118,246                   // vpcmpeqd      %ymm14,%ymm14,%ymm14
   .byte  197,245,118,201                     // vpcmpeqd      %ymm1,%ymm1,%ymm1
   .byte  196,2,117,146,4,152                 // vgatherdps    %ymm1,(%r8,%ymm11,4),%ymm8
@@ -12795,11 +12901,11 @@ _sk_gradient_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  73,131,248,1                        // cmp           $0x1,%r8
-  .byte  15,134,180,0,0,0                    // jbe           4188 <_sk_gradient_hsw+0xc3>
+  .byte  15,134,180,0,0,0                    // jbe           4205 <_sk_gradient_hsw+0xc3>
   .byte  76,139,72,72                        // mov           0x48(%rax),%r9
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  65,186,1,0,0,0                      // mov           $0x1,%r10d
-  .byte  196,226,125,24,21,209,9,0,0         // vbroadcastss  0x9d1(%rip),%ymm2        # 4abc <_sk_callback_hsw+0x444>
+  .byte  196,226,125,24,21,208,9,0,0         // vbroadcastss  0x9d0(%rip),%ymm2        # 4b38 <_sk_callback_hsw+0x443>
   .byte  196,65,53,239,201                   // vpxor         %ymm9,%ymm9,%ymm9
   .byte  196,130,125,24,28,145               // vbroadcastss  (%r9,%r10,4),%ymm3
   .byte  197,228,194,216,2                   // vcmpleps      %ymm0,%ymm3,%ymm3
@@ -12807,10 +12913,10 @@ _sk_gradient_hsw:
   .byte  196,65,101,254,201                  // vpaddd        %ymm9,%ymm3,%ymm9
   .byte  73,255,194                          // inc           %r10
   .byte  77,57,208                           // cmp           %r10,%r8
-  .byte  117,226                             // jne           40f0 <_sk_gradient_hsw+0x2b>
+  .byte  117,226                             // jne           416d <_sk_gradient_hsw+0x2b>
   .byte  76,139,72,8                         // mov           0x8(%rax),%r9
   .byte  73,131,248,8                        // cmp           $0x8,%r8
-  .byte  118,121                             // jbe           4191 <_sk_gradient_hsw+0xcc>
+  .byte  118,121                             // jbe           420e <_sk_gradient_hsw+0xcc>
   .byte  196,65,13,118,246                   // vpcmpeqd      %ymm14,%ymm14,%ymm14
   .byte  197,245,118,201                     // vpcmpeqd      %ymm1,%ymm1,%ymm1
   .byte  196,2,117,146,4,137                 // vgatherdps    %ymm1,(%r9,%ymm9,4),%ymm8
@@ -12834,7 +12940,7 @@ _sk_gradient_hsw:
   .byte  196,130,21,146,28,136               // vgatherdps    %ymm13,(%r8,%ymm9,4),%ymm3
   .byte  72,139,64,64                        // mov           0x40(%rax),%rax
   .byte  196,34,13,146,44,136                // vgatherdps    %ymm14,(%rax,%ymm9,4),%ymm13
-  .byte  235,77                              // jmp           41d5 <_sk_gradient_hsw+0x110>
+  .byte  235,77                              // jmp           4252 <_sk_gradient_hsw+0x110>
   .byte  76,139,72,8                         // mov           0x8(%rax),%r9
   .byte  196,65,52,87,201                    // vxorps        %ymm9,%ymm9,%ymm9
   .byte  196,66,53,22,1                      // vpermps       (%r9),%ymm9,%ymm8
@@ -12894,24 +13000,24 @@ _sk_xy_to_unit_angle_hsw:
   .byte  196,65,52,95,226                    // vmaxps        %ymm10,%ymm9,%ymm12
   .byte  196,65,36,94,220                    // vdivps        %ymm12,%ymm11,%ymm11
   .byte  196,65,36,89,227                    // vmulps        %ymm11,%ymm11,%ymm12
-  .byte  196,98,125,24,45,80,8,0,0           // vbroadcastss  0x850(%rip),%ymm13        # 4ac0 <_sk_callback_hsw+0x448>
-  .byte  196,98,125,24,53,75,8,0,0           // vbroadcastss  0x84b(%rip),%ymm14        # 4ac4 <_sk_callback_hsw+0x44c>
+  .byte  196,98,125,24,45,79,8,0,0           // vbroadcastss  0x84f(%rip),%ymm13        # 4b3c <_sk_callback_hsw+0x447>
+  .byte  196,98,125,24,53,74,8,0,0           // vbroadcastss  0x84a(%rip),%ymm14        # 4b40 <_sk_callback_hsw+0x44b>
   .byte  196,66,29,184,245                   // vfmadd231ps   %ymm13,%ymm12,%ymm14
-  .byte  196,98,125,24,45,65,8,0,0           // vbroadcastss  0x841(%rip),%ymm13        # 4ac8 <_sk_callback_hsw+0x450>
+  .byte  196,98,125,24,45,64,8,0,0           // vbroadcastss  0x840(%rip),%ymm13        # 4b44 <_sk_callback_hsw+0x44f>
   .byte  196,66,29,184,238                   // vfmadd231ps   %ymm14,%ymm12,%ymm13
-  .byte  196,98,125,24,53,55,8,0,0           // vbroadcastss  0x837(%rip),%ymm14        # 4acc <_sk_callback_hsw+0x454>
+  .byte  196,98,125,24,53,54,8,0,0           // vbroadcastss  0x836(%rip),%ymm14        # 4b48 <_sk_callback_hsw+0x453>
   .byte  196,66,29,184,245                   // vfmadd231ps   %ymm13,%ymm12,%ymm14
   .byte  196,65,36,89,222                    // vmulps        %ymm14,%ymm11,%ymm11
   .byte  196,65,52,194,202,1                 // vcmpltps      %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,34,8,0,0           // vbroadcastss  0x822(%rip),%ymm10        # 4ad0 <_sk_callback_hsw+0x458>
+  .byte  196,98,125,24,21,33,8,0,0           // vbroadcastss  0x821(%rip),%ymm10        # 4b4c <_sk_callback_hsw+0x457>
   .byte  196,65,44,92,211                    // vsubps        %ymm11,%ymm10,%ymm10
   .byte  196,67,37,74,202,144                // vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   .byte  196,193,124,194,192,1               // vcmpltps      %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,21,12,8,0,0           // vbroadcastss  0x80c(%rip),%ymm10        # 4ad4 <_sk_callback_hsw+0x45c>
+  .byte  196,98,125,24,21,11,8,0,0           // vbroadcastss  0x80b(%rip),%ymm10        # 4b50 <_sk_callback_hsw+0x45b>
   .byte  196,65,44,92,209                    // vsubps        %ymm9,%ymm10,%ymm10
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  196,65,116,194,200,1                // vcmpltps      %ymm8,%ymm1,%ymm9
-  .byte  196,98,125,24,21,246,7,0,0          // vbroadcastss  0x7f6(%rip),%ymm10        # 4ad8 <_sk_callback_hsw+0x460>
+  .byte  196,98,125,24,21,245,7,0,0          // vbroadcastss  0x7f5(%rip),%ymm10        # 4b54 <_sk_callback_hsw+0x45f>
   .byte  197,44,92,208                       // vsubps        %ymm0,%ymm10,%ymm10
   .byte  196,195,125,74,194,144              // vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   .byte  196,65,124,194,200,3                // vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -12934,7 +13040,7 @@ HIDDEN _sk_save_xy_hsw
 FUNCTION(_sk_save_xy_hsw)
 _sk_save_xy_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,195,7,0,0           // vbroadcastss  0x7c3(%rip),%ymm8        # 4adc <_sk_callback_hsw+0x464>
+  .byte  196,98,125,24,5,194,7,0,0           // vbroadcastss  0x7c2(%rip),%ymm8        # 4b58 <_sk_callback_hsw+0x463>
   .byte  196,65,124,88,200                   // vaddps        %ymm8,%ymm0,%ymm9
   .byte  196,67,125,8,209,1                  // vroundps      $0x1,%ymm9,%ymm10
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
@@ -12968,9 +13074,9 @@ HIDDEN _sk_bilinear_nx_hsw
 FUNCTION(_sk_bilinear_nx_hsw)
 _sk_bilinear_nx_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,87,7,0,0           // vbroadcastss  0x757(%rip),%ymm0        # 4ae0 <_sk_callback_hsw+0x468>
+  .byte  196,226,125,24,5,86,7,0,0           // vbroadcastss  0x756(%rip),%ymm0        # 4b5c <_sk_callback_hsw+0x467>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,78,7,0,0            // vbroadcastss  0x74e(%rip),%ymm8        # 4ae4 <_sk_callback_hsw+0x46c>
+  .byte  196,98,125,24,5,77,7,0,0            // vbroadcastss  0x74d(%rip),%ymm8        # 4b60 <_sk_callback_hsw+0x46b>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -12981,7 +13087,7 @@ HIDDEN _sk_bilinear_px_hsw
 FUNCTION(_sk_bilinear_px_hsw)
 _sk_bilinear_px_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,54,7,0,0           // vbroadcastss  0x736(%rip),%ymm0        # 4ae8 <_sk_callback_hsw+0x470>
+  .byte  196,226,125,24,5,53,7,0,0           // vbroadcastss  0x735(%rip),%ymm0        # 4b64 <_sk_callback_hsw+0x46f>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -12993,9 +13099,9 @@ HIDDEN _sk_bilinear_ny_hsw
 FUNCTION(_sk_bilinear_ny_hsw)
 _sk_bilinear_ny_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,26,7,0,0          // vbroadcastss  0x71a(%rip),%ymm1        # 4aec <_sk_callback_hsw+0x474>
+  .byte  196,226,125,24,13,25,7,0,0          // vbroadcastss  0x719(%rip),%ymm1        # 4b68 <_sk_callback_hsw+0x473>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,16,7,0,0            // vbroadcastss  0x710(%rip),%ymm8        # 4af0 <_sk_callback_hsw+0x478>
+  .byte  196,98,125,24,5,15,7,0,0            // vbroadcastss  0x70f(%rip),%ymm8        # 4b6c <_sk_callback_hsw+0x477>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -13006,7 +13112,7 @@ HIDDEN _sk_bilinear_py_hsw
 FUNCTION(_sk_bilinear_py_hsw)
 _sk_bilinear_py_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,248,6,0,0         // vbroadcastss  0x6f8(%rip),%ymm1        # 4af4 <_sk_callback_hsw+0x47c>
+  .byte  196,226,125,24,13,247,6,0,0         // vbroadcastss  0x6f7(%rip),%ymm1        # 4b70 <_sk_callback_hsw+0x47b>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -13018,13 +13124,13 @@ HIDDEN _sk_bicubic_n3x_hsw
 FUNCTION(_sk_bicubic_n3x_hsw)
 _sk_bicubic_n3x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,219,6,0,0          // vbroadcastss  0x6db(%rip),%ymm0        # 4af8 <_sk_callback_hsw+0x480>
+  .byte  196,226,125,24,5,218,6,0,0          // vbroadcastss  0x6da(%rip),%ymm0        # 4b74 <_sk_callback_hsw+0x47f>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,210,6,0,0           // vbroadcastss  0x6d2(%rip),%ymm8        # 4afc <_sk_callback_hsw+0x484>
+  .byte  196,98,125,24,5,209,6,0,0           // vbroadcastss  0x6d1(%rip),%ymm8        # 4b78 <_sk_callback_hsw+0x483>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,195,6,0,0          // vbroadcastss  0x6c3(%rip),%ymm10        # 4b00 <_sk_callback_hsw+0x488>
-  .byte  196,98,125,24,29,190,6,0,0          // vbroadcastss  0x6be(%rip),%ymm11        # 4b04 <_sk_callback_hsw+0x48c>
+  .byte  196,98,125,24,21,194,6,0,0          // vbroadcastss  0x6c2(%rip),%ymm10        # 4b7c <_sk_callback_hsw+0x487>
+  .byte  196,98,125,24,29,189,6,0,0          // vbroadcastss  0x6bd(%rip),%ymm11        # 4b80 <_sk_callback_hsw+0x48b>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,36,89,193                    // vmulps        %ymm9,%ymm11,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -13036,16 +13142,16 @@ HIDDEN _sk_bicubic_n1x_hsw
 FUNCTION(_sk_bicubic_n1x_hsw)
 _sk_bicubic_n1x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,161,6,0,0          // vbroadcastss  0x6a1(%rip),%ymm0        # 4b08 <_sk_callback_hsw+0x490>
+  .byte  196,226,125,24,5,160,6,0,0          // vbroadcastss  0x6a0(%rip),%ymm0        # 4b84 <_sk_callback_hsw+0x48f>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,152,6,0,0           // vbroadcastss  0x698(%rip),%ymm8        # 4b0c <_sk_callback_hsw+0x494>
+  .byte  196,98,125,24,5,151,6,0,0           // vbroadcastss  0x697(%rip),%ymm8        # 4b88 <_sk_callback_hsw+0x493>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,142,6,0,0          // vbroadcastss  0x68e(%rip),%ymm9        # 4b10 <_sk_callback_hsw+0x498>
-  .byte  196,98,125,24,21,137,6,0,0          // vbroadcastss  0x689(%rip),%ymm10        # 4b14 <_sk_callback_hsw+0x49c>
+  .byte  196,98,125,24,13,141,6,0,0          // vbroadcastss  0x68d(%rip),%ymm9        # 4b8c <_sk_callback_hsw+0x497>
+  .byte  196,98,125,24,21,136,6,0,0          // vbroadcastss  0x688(%rip),%ymm10        # 4b90 <_sk_callback_hsw+0x49b>
   .byte  196,66,61,168,209                   // vfmadd213ps   %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,13,127,6,0,0          // vbroadcastss  0x67f(%rip),%ymm9        # 4b18 <_sk_callback_hsw+0x4a0>
+  .byte  196,98,125,24,13,126,6,0,0          // vbroadcastss  0x67e(%rip),%ymm9        # 4b94 <_sk_callback_hsw+0x49f>
   .byte  196,66,61,184,202                   // vfmadd231ps   %ymm10,%ymm8,%ymm9
-  .byte  196,98,125,24,21,117,6,0,0          // vbroadcastss  0x675(%rip),%ymm10        # 4b1c <_sk_callback_hsw+0x4a4>
+  .byte  196,98,125,24,21,116,6,0,0          // vbroadcastss  0x674(%rip),%ymm10        # 4b98 <_sk_callback_hsw+0x4a3>
   .byte  196,66,61,184,209                   // vfmadd231ps   %ymm9,%ymm8,%ymm10
   .byte  197,124,17,144,128,0,0,0            // vmovups       %ymm10,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -13056,14 +13162,14 @@ HIDDEN _sk_bicubic_p1x_hsw
 FUNCTION(_sk_bicubic_p1x_hsw)
 _sk_bicubic_p1x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,93,6,0,0            // vbroadcastss  0x65d(%rip),%ymm8        # 4b20 <_sk_callback_hsw+0x4a8>
+  .byte  196,98,125,24,5,92,6,0,0            // vbroadcastss  0x65c(%rip),%ymm8        # 4b9c <_sk_callback_hsw+0x4a7>
   .byte  197,188,88,0                        // vaddps        (%rax),%ymm8,%ymm0
   .byte  197,124,16,72,64                    // vmovups       0x40(%rax),%ymm9
-  .byte  196,98,125,24,21,79,6,0,0           // vbroadcastss  0x64f(%rip),%ymm10        # 4b24 <_sk_callback_hsw+0x4ac>
-  .byte  196,98,125,24,29,74,6,0,0           // vbroadcastss  0x64a(%rip),%ymm11        # 4b28 <_sk_callback_hsw+0x4b0>
+  .byte  196,98,125,24,21,78,6,0,0           // vbroadcastss  0x64e(%rip),%ymm10        # 4ba0 <_sk_callback_hsw+0x4ab>
+  .byte  196,98,125,24,29,73,6,0,0           // vbroadcastss  0x649(%rip),%ymm11        # 4ba4 <_sk_callback_hsw+0x4af>
   .byte  196,66,53,168,218                   // vfmadd213ps   %ymm10,%ymm9,%ymm11
   .byte  196,66,53,168,216                   // vfmadd213ps   %ymm8,%ymm9,%ymm11
-  .byte  196,98,125,24,5,59,6,0,0            // vbroadcastss  0x63b(%rip),%ymm8        # 4b2c <_sk_callback_hsw+0x4b4>
+  .byte  196,98,125,24,5,58,6,0,0            // vbroadcastss  0x63a(%rip),%ymm8        # 4ba8 <_sk_callback_hsw+0x4b3>
   .byte  196,66,53,184,195                   // vfmadd231ps   %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -13074,12 +13180,12 @@ HIDDEN _sk_bicubic_p3x_hsw
 FUNCTION(_sk_bicubic_p3x_hsw)
 _sk_bicubic_p3x_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,35,6,0,0           // vbroadcastss  0x623(%rip),%ymm0        # 4b30 <_sk_callback_hsw+0x4b8>
+  .byte  196,226,125,24,5,34,6,0,0           // vbroadcastss  0x622(%rip),%ymm0        # 4bac <_sk_callback_hsw+0x4b7>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,16,6,0,0           // vbroadcastss  0x610(%rip),%ymm10        # 4b34 <_sk_callback_hsw+0x4bc>
-  .byte  196,98,125,24,29,11,6,0,0           // vbroadcastss  0x60b(%rip),%ymm11        # 4b38 <_sk_callback_hsw+0x4c0>
+  .byte  196,98,125,24,21,15,6,0,0           // vbroadcastss  0x60f(%rip),%ymm10        # 4bb0 <_sk_callback_hsw+0x4bb>
+  .byte  196,98,125,24,29,10,6,0,0           // vbroadcastss  0x60a(%rip),%ymm11        # 4bb4 <_sk_callback_hsw+0x4bf>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,52,89,195                    // vmulps        %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -13091,13 +13197,13 @@ HIDDEN _sk_bicubic_n3y_hsw
 FUNCTION(_sk_bicubic_n3y_hsw)
 _sk_bicubic_n3y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,238,5,0,0         // vbroadcastss  0x5ee(%rip),%ymm1        # 4b3c <_sk_callback_hsw+0x4c4>
+  .byte  196,226,125,24,13,237,5,0,0         // vbroadcastss  0x5ed(%rip),%ymm1        # 4bb8 <_sk_callback_hsw+0x4c3>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,228,5,0,0           // vbroadcastss  0x5e4(%rip),%ymm8        # 4b40 <_sk_callback_hsw+0x4c8>
+  .byte  196,98,125,24,5,227,5,0,0           // vbroadcastss  0x5e3(%rip),%ymm8        # 4bbc <_sk_callback_hsw+0x4c7>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,213,5,0,0          // vbroadcastss  0x5d5(%rip),%ymm10        # 4b44 <_sk_callback_hsw+0x4cc>
-  .byte  196,98,125,24,29,208,5,0,0          // vbroadcastss  0x5d0(%rip),%ymm11        # 4b48 <_sk_callback_hsw+0x4d0>
+  .byte  196,98,125,24,21,212,5,0,0          // vbroadcastss  0x5d4(%rip),%ymm10        # 4bc0 <_sk_callback_hsw+0x4cb>
+  .byte  196,98,125,24,29,207,5,0,0          // vbroadcastss  0x5cf(%rip),%ymm11        # 4bc4 <_sk_callback_hsw+0x4cf>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,36,89,193                    // vmulps        %ymm9,%ymm11,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -13109,16 +13215,16 @@ HIDDEN _sk_bicubic_n1y_hsw
 FUNCTION(_sk_bicubic_n1y_hsw)
 _sk_bicubic_n1y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,179,5,0,0         // vbroadcastss  0x5b3(%rip),%ymm1        # 4b4c <_sk_callback_hsw+0x4d4>
+  .byte  196,226,125,24,13,178,5,0,0         // vbroadcastss  0x5b2(%rip),%ymm1        # 4bc8 <_sk_callback_hsw+0x4d3>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,169,5,0,0           // vbroadcastss  0x5a9(%rip),%ymm8        # 4b50 <_sk_callback_hsw+0x4d8>
+  .byte  196,98,125,24,5,168,5,0,0           // vbroadcastss  0x5a8(%rip),%ymm8        # 4bcc <_sk_callback_hsw+0x4d7>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,159,5,0,0          // vbroadcastss  0x59f(%rip),%ymm9        # 4b54 <_sk_callback_hsw+0x4dc>
-  .byte  196,98,125,24,21,154,5,0,0          // vbroadcastss  0x59a(%rip),%ymm10        # 4b58 <_sk_callback_hsw+0x4e0>
+  .byte  196,98,125,24,13,158,5,0,0          // vbroadcastss  0x59e(%rip),%ymm9        # 4bd0 <_sk_callback_hsw+0x4db>
+  .byte  196,98,125,24,21,153,5,0,0          // vbroadcastss  0x599(%rip),%ymm10        # 4bd4 <_sk_callback_hsw+0x4df>
   .byte  196,66,61,168,209                   // vfmadd213ps   %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,13,144,5,0,0          // vbroadcastss  0x590(%rip),%ymm9        # 4b5c <_sk_callback_hsw+0x4e4>
+  .byte  196,98,125,24,13,143,5,0,0          // vbroadcastss  0x58f(%rip),%ymm9        # 4bd8 <_sk_callback_hsw+0x4e3>
   .byte  196,66,61,184,202                   // vfmadd231ps   %ymm10,%ymm8,%ymm9
-  .byte  196,98,125,24,21,134,5,0,0          // vbroadcastss  0x586(%rip),%ymm10        # 4b60 <_sk_callback_hsw+0x4e8>
+  .byte  196,98,125,24,21,133,5,0,0          // vbroadcastss  0x585(%rip),%ymm10        # 4bdc <_sk_callback_hsw+0x4e7>
   .byte  196,66,61,184,209                   // vfmadd231ps   %ymm9,%ymm8,%ymm10
   .byte  197,124,17,144,160,0,0,0            // vmovups       %ymm10,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -13129,14 +13235,14 @@ HIDDEN _sk_bicubic_p1y_hsw
 FUNCTION(_sk_bicubic_p1y_hsw)
 _sk_bicubic_p1y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,110,5,0,0           // vbroadcastss  0x56e(%rip),%ymm8        # 4b64 <_sk_callback_hsw+0x4ec>
+  .byte  196,98,125,24,5,109,5,0,0           // vbroadcastss  0x56d(%rip),%ymm8        # 4be0 <_sk_callback_hsw+0x4eb>
   .byte  197,188,88,72,32                    // vaddps        0x20(%rax),%ymm8,%ymm1
   .byte  197,124,16,72,96                    // vmovups       0x60(%rax),%ymm9
-  .byte  196,98,125,24,21,95,5,0,0           // vbroadcastss  0x55f(%rip),%ymm10        # 4b68 <_sk_callback_hsw+0x4f0>
-  .byte  196,98,125,24,29,90,5,0,0           // vbroadcastss  0x55a(%rip),%ymm11        # 4b6c <_sk_callback_hsw+0x4f4>
+  .byte  196,98,125,24,21,94,5,0,0           // vbroadcastss  0x55e(%rip),%ymm10        # 4be4 <_sk_callback_hsw+0x4ef>
+  .byte  196,98,125,24,29,89,5,0,0           // vbroadcastss  0x559(%rip),%ymm11        # 4be8 <_sk_callback_hsw+0x4f3>
   .byte  196,66,53,168,218                   // vfmadd213ps   %ymm10,%ymm9,%ymm11
   .byte  196,66,53,168,216                   // vfmadd213ps   %ymm8,%ymm9,%ymm11
-  .byte  196,98,125,24,5,75,5,0,0            // vbroadcastss  0x54b(%rip),%ymm8        # 4b70 <_sk_callback_hsw+0x4f8>
+  .byte  196,98,125,24,5,74,5,0,0            // vbroadcastss  0x54a(%rip),%ymm8        # 4bec <_sk_callback_hsw+0x4f7>
   .byte  196,66,53,184,195                   // vfmadd231ps   %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -13147,12 +13253,12 @@ HIDDEN _sk_bicubic_p3y_hsw
 FUNCTION(_sk_bicubic_p3y_hsw)
 _sk_bicubic_p3y_hsw:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,51,5,0,0          // vbroadcastss  0x533(%rip),%ymm1        # 4b74 <_sk_callback_hsw+0x4fc>
+  .byte  196,226,125,24,13,50,5,0,0          // vbroadcastss  0x532(%rip),%ymm1        # 4bf0 <_sk_callback_hsw+0x4fb>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,31,5,0,0           // vbroadcastss  0x51f(%rip),%ymm10        # 4b78 <_sk_callback_hsw+0x500>
-  .byte  196,98,125,24,29,26,5,0,0           // vbroadcastss  0x51a(%rip),%ymm11        # 4b7c <_sk_callback_hsw+0x504>
+  .byte  196,98,125,24,21,30,5,0,0           // vbroadcastss  0x51e(%rip),%ymm10        # 4bf4 <_sk_callback_hsw+0x4ff>
+  .byte  196,98,125,24,29,25,5,0,0           // vbroadcastss  0x519(%rip),%ymm11        # 4bf8 <_sk_callback_hsw+0x503>
   .byte  196,66,61,168,218                   // vfmadd213ps   %ymm10,%ymm8,%ymm11
   .byte  196,65,52,89,195                    // vmulps        %ymm11,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -13276,25 +13382,25 @@ BALIGN4
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 4851 <.literal4+0xb1>
+  .byte  71,225,61                           // rex.RXB       loope 48cd <.literal4+0xb1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 4861 <.literal4+0xc1>
+  .byte  71,225,61                           // rex.RXB       loope 48dd <.literal4+0xc1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 4871 <.literal4+0xd1>
+  .byte  71,225,61                           // rex.RXB       loope 48ed <.literal4+0xd1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 4881 <.literal4+0xe1>
+  .byte  71,225,61                           // rex.RXB       loope 48fd <.literal4+0xe1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -13341,7 +13447,7 @@ BALIGN4
   .byte  190,129,128,128,59                  // mov           $0x3b808081,%esi
   .byte  129,128,128,59,0,248,0,0,8,33       // addl          $0x21080000,-0x7ffc480(%rax)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        48d1 <.literal4+0x131>
+  .byte  224,7                               // loopne        494d <.literal4+0x131>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -13357,10 +13463,10 @@ BALIGN4
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
   .byte  0,52,255                            // add           %dh,(%rdi,%rdi,8)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            48f8 <.literal4+0x158>
+  .byte  127,0                               // jg            4974 <.literal4+0x158>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            4971 <.literal4+0x1d1>
+  .byte  119,115                             // ja            49ed <.literal4+0x1d1>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -13374,10 +13480,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            492c <.literal4+0x18c>
+  .byte  127,0                               // jg            49a8 <.literal4+0x18c>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            49a5 <.literal4+0x205>
+  .byte  119,115                             // ja            4a21 <.literal4+0x205>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -13391,10 +13497,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4960 <.literal4+0x1c0>
+  .byte  127,0                               // jg            49dc <.literal4+0x1c0>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            49d9 <.literal4+0x239>
+  .byte  119,115                             // ja            4a55 <.literal4+0x239>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -13408,10 +13514,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4994 <.literal4+0x1f4>
+  .byte  127,0                               // jg            4a10 <.literal4+0x1f4>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            4a0d <.literal4+0x26d>
+  .byte  119,115                             // ja            4a89 <.literal4+0x26d>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -13424,7 +13530,7 @@ BALIGN4
   .byte  0,75,0                              // add           %cl,0x0(%rbx)
   .byte  0,128,63,0,0,200                    // add           %al,-0x37ffffc1(%rax)
   .byte  66,0,0                              // rex.X         add %al,(%rax)
-  .byte  127,67                              // jg            4a0b <.literal4+0x26b>
+  .byte  127,67                              // jg            4a87 <.literal4+0x26b>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,195                               // add           %al,%bl
   .byte  0,0                                 // add           %al,(%rax)
@@ -13436,10 +13542,10 @@ BALIGN4
   .byte  190,80,128,3,62                     // mov           $0x3e038050,%esi
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           4a2b <.literal4+0x28b>
+  .byte  118,63                              // jbe           4aa7 <.literal4+0x28b>
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            4a3f <.literal4+0x29f>
+  .byte  127,67                              // jg            4abb <.literal4+0x29f>
   .byte  129,128,128,59,0,0,128,63,129,128   // addl          $0x80813f80,0x3b80(%rax)
   .byte  128,59,0                            // cmpb          $0x0,(%rbx)
   .byte  0,128,63,129,128,128                // add           %al,-0x7f7f7ec1(%rax)
@@ -13448,7 +13554,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        4a21 <.literal4+0x281>
+  .byte  224,7                               // loopne        4a9d <.literal4+0x281>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -13460,7 +13566,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        4a3d <.literal4+0x29d>
+  .byte  224,7                               // loopne        4ab9 <.literal4+0x29d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -13471,7 +13577,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  248                                 // clc
   .byte  65,0,0                              // add           %al,(%r8)
-  .byte  124,66                              // jl            4a92 <.literal4+0x2f2>
+  .byte  124,66                              // jl            4b0e <.literal4+0x2f2>
   .byte  0,240                               // add           %dh,%al
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,55,0,15                 // mov           %ecx,0xf003788(%rax)
@@ -13489,9 +13595,9 @@ BALIGN4
   .byte  137,136,136,59,15,0                 // mov           %ecx,0xf3b88(%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,61,0,0                  // mov           %ecx,0x3d88(%rax)
-  .byte  112,65                              // jo            4ad5 <.literal4+0x335>
+  .byte  112,65                              // jo            4b51 <.literal4+0x335>
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            4ae3 <.literal4+0x343>
+  .byte  127,67                              // jg            4b5f <.literal4+0x343>
   .byte  128,0,128                           // addb          $0x80,(%rax)
   .byte  55                                  // (bad)
   .byte  128,0,128                           // addb          $0x80,(%rax)
@@ -13499,7 +13605,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  255                                 // (bad)
-  .byte  127,71                              // jg            4af7 <.literal4+0x357>
+  .byte  127,71                              // jg            4b73 <.literal4+0x357>
   .byte  208                                 // (bad)
   .byte  179,89                              // mov           $0x59,%bl
   .byte  62,89                               // ds            pop %rcx
@@ -13599,16 +13705,16 @@ BALIGN32
   .byte  0,0                                 // add           %al,(%rax)
   .byte  1,255                               // add           %edi,%edi
   .byte  255                                 // (bad)
-  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004ba8 <_sk_callback_hsw+0xa000530>
+  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004c28 <_sk_callback_hsw+0xa000533>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004bb0 <_sk_callback_hsw+0x12000538>
+  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004c30 <_sk_callback_hsw+0x1200053b>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004bb8 <_sk_callback_hsw+0x1a000540>
+  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004c38 <_sk_callback_hsw+0x1a000543>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004bc0 <_sk_callback_hsw+0x3000548>
+  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004c40 <_sk_callback_hsw+0x300054b>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -13651,16 +13757,16 @@ BALIGN32
   .byte  0,0                                 // add           %al,(%rax)
   .byte  1,255                               // add           %edi,%edi
   .byte  255                                 // (bad)
-  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004c08 <_sk_callback_hsw+0xa000590>
+  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004c88 <_sk_callback_hsw+0xa000593>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004c10 <_sk_callback_hsw+0x12000598>
+  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004c90 <_sk_callback_hsw+0x1200059b>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004c18 <_sk_callback_hsw+0x1a0005a0>
+  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004c98 <_sk_callback_hsw+0x1a0005a3>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004c20 <_sk_callback_hsw+0x30005a8>
+  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004ca0 <_sk_callback_hsw+0x30005ab>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -13703,16 +13809,16 @@ BALIGN32
   .byte  0,0                                 // add           %al,(%rax)
   .byte  1,255                               // add           %edi,%edi
   .byte  255                                 // (bad)
-  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004c68 <_sk_callback_hsw+0xa0005f0>
+  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004ce8 <_sk_callback_hsw+0xa0005f3>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004c70 <_sk_callback_hsw+0x120005f8>
+  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004cf0 <_sk_callback_hsw+0x120005fb>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004c78 <_sk_callback_hsw+0x1a000600>
+  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004cf8 <_sk_callback_hsw+0x1a000603>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004c80 <_sk_callback_hsw+0x3000608>
+  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004d00 <_sk_callback_hsw+0x300060b>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -13755,16 +13861,16 @@ BALIGN32
   .byte  0,0                                 // add           %al,(%rax)
   .byte  1,255                               // add           %edi,%edi
   .byte  255                                 // (bad)
-  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004cc8 <_sk_callback_hsw+0xa000650>
+  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004d48 <_sk_callback_hsw+0xa000653>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004cd0 <_sk_callback_hsw+0x12000658>
+  .byte  255,13,255,255,255,17               // decl          0x11ffffff(%rip)        # 12004d50 <_sk_callback_hsw+0x1200065b>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004cd8 <_sk_callback_hsw+0x1a000660>
+  .byte  255,21,255,255,255,25               // callq         *0x19ffffff(%rip)        # 1a004d58 <_sk_callback_hsw+0x1a000663>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004ce0 <_sk_callback_hsw+0x3000668>
+  .byte  255,29,255,255,255,2                // lcall         *0x2ffffff(%rip)        # 3004d60 <_sk_callback_hsw+0x300066b>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -13885,14 +13991,14 @@ _sk_seed_shader_avx:
   .byte  197,249,112,192,0                   // vpshufd       $0x0,%xmm0,%xmm0
   .byte  196,227,125,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,191,98,0,0        // vbroadcastss  0x62bf(%rip),%ymm1        # 6388 <_sk_callback_avx+0x126>
+  .byte  196,226,125,24,13,87,99,0,0         // vbroadcastss  0x6357(%rip),%ymm1        # 6420 <_sk_callback_avx+0x125>
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
   .byte  197,252,88,2                        // vaddps        (%rdx),%ymm0,%ymm0
   .byte  196,226,125,24,16                   // vbroadcastss  (%rax),%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  197,236,88,201                      // vaddps        %ymm1,%ymm2,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,21,163,98,0,0        // vbroadcastss  0x62a3(%rip),%ymm2        # 638c <_sk_callback_avx+0x12a>
+  .byte  196,226,125,24,21,59,99,0,0         // vbroadcastss  0x633b(%rip),%ymm2        # 6424 <_sk_callback_avx+0x129>
   .byte  197,228,87,219                      // vxorps        %ymm3,%ymm3,%ymm3
   .byte  197,220,87,228                      // vxorps        %ymm4,%ymm4,%ymm4
   .byte  197,212,87,237                      // vxorps        %ymm5,%ymm5,%ymm5
@@ -13914,7 +14020,7 @@ _sk_dither_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  196,66,125,24,8                     // vbroadcastss  (%r8),%ymm9
   .byte  196,65,60,87,209                    // vxorps        %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,29,91,98,0,0          // vbroadcastss  0x625b(%rip),%ymm11        # 6390 <_sk_callback_avx+0x12e>
+  .byte  196,98,125,24,29,243,98,0,0         // vbroadcastss  0x62f3(%rip),%ymm11        # 6428 <_sk_callback_avx+0x12d>
   .byte  196,65,44,84,203                    // vandps        %ymm11,%ymm10,%ymm9
   .byte  196,193,25,114,241,5                // vpslld        $0x5,%xmm9,%xmm12
   .byte  196,67,125,25,201,1                 // vextractf128  $0x1,%ymm9,%xmm9
@@ -13925,8 +14031,8 @@ _sk_dither_avx:
   .byte  196,67,125,25,219,1                 // vextractf128  $0x1,%ymm11,%xmm11
   .byte  196,193,33,114,243,4                // vpslld        $0x4,%xmm11,%xmm11
   .byte  196,67,29,24,219,1                  // vinsertf128   $0x1,%xmm11,%ymm12,%ymm11
-  .byte  196,98,125,24,37,28,98,0,0          // vbroadcastss  0x621c(%rip),%ymm12        # 6394 <_sk_callback_avx+0x132>
-  .byte  196,98,125,24,45,23,98,0,0          // vbroadcastss  0x6217(%rip),%ymm13        # 6398 <_sk_callback_avx+0x136>
+  .byte  196,98,125,24,37,180,98,0,0         // vbroadcastss  0x62b4(%rip),%ymm12        # 642c <_sk_callback_avx+0x131>
+  .byte  196,98,125,24,45,175,98,0,0         // vbroadcastss  0x62af(%rip),%ymm13        # 6430 <_sk_callback_avx+0x135>
   .byte  196,65,44,84,245                    // vandps        %ymm13,%ymm10,%ymm14
   .byte  196,193,1,114,246,2                 // vpslld        $0x2,%xmm14,%xmm15
   .byte  196,67,125,25,246,1                 // vextractf128  $0x1,%ymm14,%xmm14
@@ -13953,9 +14059,9 @@ _sk_dither_avx:
   .byte  196,65,60,86,193                    // vorps         %ymm9,%ymm8,%ymm8
   .byte  196,65,60,86,194                    // vorps         %ymm10,%ymm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,130,97,0,0         // vbroadcastss  0x6182(%rip),%ymm9        # 639c <_sk_callback_avx+0x13a>
+  .byte  196,98,125,24,13,26,98,0,0          // vbroadcastss  0x621a(%rip),%ymm9        # 6434 <_sk_callback_avx+0x139>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,120,97,0,0         // vbroadcastss  0x6178(%rip),%ymm9        # 63a0 <_sk_callback_avx+0x13e>
+  .byte  196,98,125,24,13,16,98,0,0          // vbroadcastss  0x6210(%rip),%ymm9        # 6438 <_sk_callback_avx+0x13d>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  196,98,125,24,72,8                  // vbroadcastss  0x8(%rax),%ymm9
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
@@ -14024,7 +14130,7 @@ HIDDEN _sk_srcatop_avx
 FUNCTION(_sk_srcatop_avx)
 _sk_srcatop_avx:
   .byte  197,252,89,199                      // vmulps        %ymm7,%ymm0,%ymm0
-  .byte  196,98,125,24,5,207,96,0,0          // vbroadcastss  0x60cf(%rip),%ymm8        # 63a4 <_sk_callback_avx+0x142>
+  .byte  196,98,125,24,5,103,97,0,0          // vbroadcastss  0x6167(%rip),%ymm8        # 643c <_sk_callback_avx+0x141>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,204                       // vmulps        %ymm4,%ymm8,%ymm9
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -14045,7 +14151,7 @@ HIDDEN _sk_dstatop_avx
 FUNCTION(_sk_dstatop_avx)
 _sk_dstatop_avx:
   .byte  197,100,89,196                      // vmulps        %ymm4,%ymm3,%ymm8
-  .byte  196,98,125,24,13,145,96,0,0         // vbroadcastss  0x6091(%rip),%ymm9        # 63a8 <_sk_callback_avx+0x146>
+  .byte  196,98,125,24,13,41,97,0,0          // vbroadcastss  0x6129(%rip),%ymm9        # 6440 <_sk_callback_avx+0x145>
   .byte  197,52,92,207                       // vsubps        %ymm7,%ymm9,%ymm9
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
   .byte  197,188,88,192                      // vaddps        %ymm0,%ymm8,%ymm0
@@ -14087,7 +14193,7 @@ HIDDEN _sk_srcout_avx
 .globl _sk_srcout_avx
 FUNCTION(_sk_srcout_avx)
 _sk_srcout_avx:
-  .byte  196,98,125,24,5,48,96,0,0           // vbroadcastss  0x6030(%rip),%ymm8        # 63ac <_sk_callback_avx+0x14a>
+  .byte  196,98,125,24,5,200,96,0,0          // vbroadcastss  0x60c8(%rip),%ymm8        # 6444 <_sk_callback_avx+0x149>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -14100,7 +14206,7 @@ HIDDEN _sk_dstout_avx
 .globl _sk_dstout_avx
 FUNCTION(_sk_dstout_avx)
 _sk_dstout_avx:
-  .byte  196,226,125,24,5,19,96,0,0          // vbroadcastss  0x6013(%rip),%ymm0        # 63b0 <_sk_callback_avx+0x14e>
+  .byte  196,226,125,24,5,171,96,0,0         // vbroadcastss  0x60ab(%rip),%ymm0        # 6448 <_sk_callback_avx+0x14d>
   .byte  197,252,92,219                      // vsubps        %ymm3,%ymm0,%ymm3
   .byte  197,228,89,196                      // vmulps        %ymm4,%ymm3,%ymm0
   .byte  197,228,89,205                      // vmulps        %ymm5,%ymm3,%ymm1
@@ -14113,7 +14219,7 @@ HIDDEN _sk_srcover_avx
 .globl _sk_srcover_avx
 FUNCTION(_sk_srcover_avx)
 _sk_srcover_avx:
-  .byte  196,98,125,24,5,246,95,0,0          // vbroadcastss  0x5ff6(%rip),%ymm8        # 63b4 <_sk_callback_avx+0x152>
+  .byte  196,98,125,24,5,142,96,0,0          // vbroadcastss  0x608e(%rip),%ymm8        # 644c <_sk_callback_avx+0x151>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,204                       // vmulps        %ymm4,%ymm8,%ymm9
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -14130,7 +14236,7 @@ HIDDEN _sk_dstover_avx
 .globl _sk_dstover_avx
 FUNCTION(_sk_dstover_avx)
 _sk_dstover_avx:
-  .byte  196,98,125,24,5,201,95,0,0          // vbroadcastss  0x5fc9(%rip),%ymm8        # 63b8 <_sk_callback_avx+0x156>
+  .byte  196,98,125,24,5,97,96,0,0           // vbroadcastss  0x6061(%rip),%ymm8        # 6450 <_sk_callback_avx+0x155>
   .byte  197,60,92,199                       // vsubps        %ymm7,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,252,88,196                      // vaddps        %ymm4,%ymm0,%ymm0
@@ -14158,7 +14264,7 @@ HIDDEN _sk_multiply_avx
 .globl _sk_multiply_avx
 FUNCTION(_sk_multiply_avx)
 _sk_multiply_avx:
-  .byte  196,98,125,24,5,136,95,0,0          // vbroadcastss  0x5f88(%rip),%ymm8        # 63bc <_sk_callback_avx+0x15a>
+  .byte  196,98,125,24,5,32,96,0,0           // vbroadcastss  0x6020(%rip),%ymm8        # 6454 <_sk_callback_avx+0x159>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,208                       // vmulps        %ymm0,%ymm9,%ymm10
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -14218,7 +14324,7 @@ HIDDEN _sk_xor__avx
 .globl _sk_xor__avx
 FUNCTION(_sk_xor__avx)
 _sk_xor__avx:
-  .byte  196,98,125,24,5,215,94,0,0          // vbroadcastss  0x5ed7(%rip),%ymm8        # 63c0 <_sk_callback_avx+0x15e>
+  .byte  196,98,125,24,5,111,95,0,0          // vbroadcastss  0x5f6f(%rip),%ymm8        # 6458 <_sk_callback_avx+0x15d>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,192                      // vmulps        %ymm0,%ymm9,%ymm0
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -14255,7 +14361,7 @@ _sk_darken_avx:
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,95,209                  // vmaxps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,87,94,0,0           // vbroadcastss  0x5e57(%rip),%ymm8        # 63c4 <_sk_callback_avx+0x162>
+  .byte  196,98,125,24,5,239,94,0,0          // vbroadcastss  0x5eef(%rip),%ymm8        # 645c <_sk_callback_avx+0x161>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -14281,7 +14387,7 @@ _sk_lighten_avx:
   .byte  197,100,89,206                      // vmulps        %ymm6,%ymm3,%ymm9
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,3,94,0,0            // vbroadcastss  0x5e03(%rip),%ymm8        # 63c8 <_sk_callback_avx+0x166>
+  .byte  196,98,125,24,5,155,94,0,0          // vbroadcastss  0x5e9b(%rip),%ymm8        # 6460 <_sk_callback_avx+0x165>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -14310,7 +14416,7 @@ _sk_difference_avx:
   .byte  196,193,108,93,209                  // vminps        %ymm9,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,163,93,0,0          // vbroadcastss  0x5da3(%rip),%ymm8        # 63cc <_sk_callback_avx+0x16a>
+  .byte  196,98,125,24,5,59,94,0,0           // vbroadcastss  0x5e3b(%rip),%ymm8        # 6464 <_sk_callback_avx+0x169>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -14333,7 +14439,7 @@ _sk_exclusion_avx:
   .byte  197,236,89,214                      // vmulps        %ymm6,%ymm2,%ymm2
   .byte  197,236,88,210                      // vaddps        %ymm2,%ymm2,%ymm2
   .byte  197,188,92,210                      // vsubps        %ymm2,%ymm8,%ymm2
-  .byte  196,98,125,24,5,94,93,0,0           // vbroadcastss  0x5d5e(%rip),%ymm8        # 63d0 <_sk_callback_avx+0x16e>
+  .byte  196,98,125,24,5,246,93,0,0          // vbroadcastss  0x5df6(%rip),%ymm8        # 6468 <_sk_callback_avx+0x16d>
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
   .byte  197,60,89,199                       // vmulps        %ymm7,%ymm8,%ymm8
   .byte  197,188,88,219                      // vaddps        %ymm3,%ymm8,%ymm3
@@ -14344,7 +14450,7 @@ HIDDEN _sk_colorburn_avx
 .globl _sk_colorburn_avx
 FUNCTION(_sk_colorburn_avx)
 _sk_colorburn_avx:
-  .byte  196,98,125,24,5,73,93,0,0           // vbroadcastss  0x5d49(%rip),%ymm8        # 63d4 <_sk_callback_avx+0x172>
+  .byte  196,98,125,24,5,225,93,0,0          // vbroadcastss  0x5de1(%rip),%ymm8        # 646c <_sk_callback_avx+0x171>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,52,89,216                       // vmulps        %ymm0,%ymm9,%ymm11
   .byte  196,65,44,87,210                    // vxorps        %ymm10,%ymm10,%ymm10
@@ -14406,7 +14512,7 @@ HIDDEN _sk_colordodge_avx
 FUNCTION(_sk_colordodge_avx)
 _sk_colordodge_avx:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
-  .byte  196,98,125,24,13,69,92,0,0          // vbroadcastss  0x5c45(%rip),%ymm9        # 63d8 <_sk_callback_avx+0x176>
+  .byte  196,98,125,24,13,221,92,0,0         // vbroadcastss  0x5cdd(%rip),%ymm9        # 6470 <_sk_callback_avx+0x175>
   .byte  197,52,92,215                       // vsubps        %ymm7,%ymm9,%ymm10
   .byte  197,44,89,216                       // vmulps        %ymm0,%ymm10,%ymm11
   .byte  197,52,92,203                       // vsubps        %ymm3,%ymm9,%ymm9
@@ -14463,7 +14569,7 @@ HIDDEN _sk_hardlight_avx
 .globl _sk_hardlight_avx
 FUNCTION(_sk_hardlight_avx)
 _sk_hardlight_avx:
-  .byte  196,98,125,24,5,87,91,0,0           // vbroadcastss  0x5b57(%rip),%ymm8        # 63dc <_sk_callback_avx+0x17a>
+  .byte  196,98,125,24,5,239,91,0,0          // vbroadcastss  0x5bef(%rip),%ymm8        # 6474 <_sk_callback_avx+0x179>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,200                       // vmulps        %ymm0,%ymm10,%ymm9
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -14518,7 +14624,7 @@ HIDDEN _sk_overlay_avx
 .globl _sk_overlay_avx
 FUNCTION(_sk_overlay_avx)
 _sk_overlay_avx:
-  .byte  196,98,125,24,5,128,90,0,0          // vbroadcastss  0x5a80(%rip),%ymm8        # 63e0 <_sk_callback_avx+0x17e>
+  .byte  196,98,125,24,5,24,91,0,0           // vbroadcastss  0x5b18(%rip),%ymm8        # 6478 <_sk_callback_avx+0x17d>
   .byte  197,60,92,215                       // vsubps        %ymm7,%ymm8,%ymm10
   .byte  197,44,89,200                       // vmulps        %ymm0,%ymm10,%ymm9
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -14584,10 +14690,10 @@ _sk_softlight_avx:
   .byte  196,65,60,88,192                    // vaddps        %ymm8,%ymm8,%ymm8
   .byte  196,65,60,89,216                    // vmulps        %ymm8,%ymm8,%ymm11
   .byte  196,65,60,88,195                    // vaddps        %ymm11,%ymm8,%ymm8
-  .byte  196,98,125,24,29,119,89,0,0         // vbroadcastss  0x5977(%rip),%ymm11        # 63e8 <_sk_callback_avx+0x186>
+  .byte  196,98,125,24,29,15,90,0,0          // vbroadcastss  0x5a0f(%rip),%ymm11        # 6480 <_sk_callback_avx+0x185>
   .byte  196,65,28,88,235                    // vaddps        %ymm11,%ymm12,%ymm13
   .byte  196,65,20,89,192                    // vmulps        %ymm8,%ymm13,%ymm8
-  .byte  196,98,125,24,45,104,89,0,0         // vbroadcastss  0x5968(%rip),%ymm13        # 63ec <_sk_callback_avx+0x18a>
+  .byte  196,98,125,24,45,0,90,0,0           // vbroadcastss  0x5a00(%rip),%ymm13        # 6484 <_sk_callback_avx+0x189>
   .byte  196,65,28,89,245                    // vmulps        %ymm13,%ymm12,%ymm14
   .byte  196,65,12,88,192                    // vaddps        %ymm8,%ymm14,%ymm8
   .byte  196,65,124,82,244                   // vrsqrtps      %ymm12,%ymm14
@@ -14598,7 +14704,7 @@ _sk_softlight_avx:
   .byte  197,4,194,255,2                     // vcmpleps      %ymm7,%ymm15,%ymm15
   .byte  196,67,13,74,240,240                // vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   .byte  197,116,88,249                      // vaddps        %ymm1,%ymm1,%ymm15
-  .byte  196,98,125,24,5,38,89,0,0           // vbroadcastss  0x5926(%rip),%ymm8        # 63e4 <_sk_callback_avx+0x182>
+  .byte  196,98,125,24,5,190,89,0,0          // vbroadcastss  0x59be(%rip),%ymm8        # 647c <_sk_callback_avx+0x181>
   .byte  196,65,60,92,228                    // vsubps        %ymm12,%ymm8,%ymm12
   .byte  197,132,92,195                      // vsubps        %ymm3,%ymm15,%ymm0
   .byte  196,65,124,89,228                   // vmulps        %ymm12,%ymm0,%ymm12
@@ -14725,12 +14831,12 @@ _sk_hue_avx:
   .byte  196,65,28,89,219                    // vmulps        %ymm11,%ymm12,%ymm11
   .byte  196,65,36,94,222                    // vdivps        %ymm14,%ymm11,%ymm11
   .byte  196,67,37,74,224,240                // vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  .byte  196,98,125,24,53,245,86,0,0         // vbroadcastss  0x56f5(%rip),%ymm14        # 63f0 <_sk_callback_avx+0x18e>
+  .byte  196,98,125,24,53,141,87,0,0         // vbroadcastss  0x578d(%rip),%ymm14        # 6488 <_sk_callback_avx+0x18d>
   .byte  196,65,92,89,222                    // vmulps        %ymm14,%ymm4,%ymm11
-  .byte  196,98,125,24,61,235,86,0,0         // vbroadcastss  0x56eb(%rip),%ymm15        # 63f4 <_sk_callback_avx+0x192>
+  .byte  196,98,125,24,61,131,87,0,0         // vbroadcastss  0x5783(%rip),%ymm15        # 648c <_sk_callback_avx+0x191>
   .byte  196,65,84,89,239                    // vmulps        %ymm15,%ymm5,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
-  .byte  196,226,125,24,5,220,86,0,0         // vbroadcastss  0x56dc(%rip),%ymm0        # 63f8 <_sk_callback_avx+0x196>
+  .byte  196,226,125,24,5,116,87,0,0         // vbroadcastss  0x5774(%rip),%ymm0        # 6490 <_sk_callback_avx+0x195>
   .byte  197,76,89,232                       // vmulps        %ymm0,%ymm6,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
   .byte  196,65,52,89,238                    // vmulps        %ymm14,%ymm9,%ymm13
@@ -14791,7 +14897,7 @@ _sk_hue_avx:
   .byte  196,65,36,95,208                    // vmaxps        %ymm8,%ymm11,%ymm10
   .byte  196,195,109,74,209,240              // vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,181,85,0,0          // vbroadcastss  0x55b5(%rip),%ymm8        # 63fc <_sk_callback_avx+0x19a>
+  .byte  196,98,125,24,5,77,86,0,0           // vbroadcastss  0x564d(%rip),%ymm8        # 6494 <_sk_callback_avx+0x199>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,201                      // vmulps        %ymm1,%ymm9,%ymm1
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -14848,12 +14954,12 @@ _sk_saturation_avx:
   .byte  196,65,28,89,219                    // vmulps        %ymm11,%ymm12,%ymm11
   .byte  196,65,36,94,222                    // vdivps        %ymm14,%ymm11,%ymm11
   .byte  196,67,37,74,224,240                // vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  .byte  196,98,125,24,53,195,84,0,0         // vbroadcastss  0x54c3(%rip),%ymm14        # 6400 <_sk_callback_avx+0x19e>
+  .byte  196,98,125,24,53,91,85,0,0          // vbroadcastss  0x555b(%rip),%ymm14        # 6498 <_sk_callback_avx+0x19d>
   .byte  196,65,92,89,222                    // vmulps        %ymm14,%ymm4,%ymm11
-  .byte  196,98,125,24,61,185,84,0,0         // vbroadcastss  0x54b9(%rip),%ymm15        # 6404 <_sk_callback_avx+0x1a2>
+  .byte  196,98,125,24,61,81,85,0,0          // vbroadcastss  0x5551(%rip),%ymm15        # 649c <_sk_callback_avx+0x1a1>
   .byte  196,65,84,89,239                    // vmulps        %ymm15,%ymm5,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
-  .byte  196,226,125,24,5,170,84,0,0         // vbroadcastss  0x54aa(%rip),%ymm0        # 6408 <_sk_callback_avx+0x1a6>
+  .byte  196,226,125,24,5,66,85,0,0          // vbroadcastss  0x5542(%rip),%ymm0        # 64a0 <_sk_callback_avx+0x1a5>
   .byte  197,76,89,232                       // vmulps        %ymm0,%ymm6,%ymm13
   .byte  196,65,36,88,221                    // vaddps        %ymm13,%ymm11,%ymm11
   .byte  196,65,52,89,238                    // vmulps        %ymm14,%ymm9,%ymm13
@@ -14914,7 +15020,7 @@ _sk_saturation_avx:
   .byte  196,65,36,95,208                    // vmaxps        %ymm8,%ymm11,%ymm10
   .byte  196,195,109,74,209,240              // vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,131,83,0,0          // vbroadcastss  0x5383(%rip),%ymm8        # 640c <_sk_callback_avx+0x1aa>
+  .byte  196,98,125,24,5,27,84,0,0           // vbroadcastss  0x541b(%rip),%ymm8        # 64a4 <_sk_callback_avx+0x1a9>
   .byte  197,60,92,207                       // vsubps        %ymm7,%ymm8,%ymm9
   .byte  197,180,89,201                      // vmulps        %ymm1,%ymm9,%ymm1
   .byte  197,60,92,195                       // vsubps        %ymm3,%ymm8,%ymm8
@@ -14943,12 +15049,12 @@ _sk_color_avx:
   .byte  197,252,17,68,36,168                // vmovups       %ymm0,-0x58(%rsp)
   .byte  197,124,89,199                      // vmulps        %ymm7,%ymm0,%ymm8
   .byte  197,116,89,207                      // vmulps        %ymm7,%ymm1,%ymm9
-  .byte  196,98,125,24,45,25,83,0,0          // vbroadcastss  0x5319(%rip),%ymm13        # 6410 <_sk_callback_avx+0x1ae>
+  .byte  196,98,125,24,45,177,83,0,0         // vbroadcastss  0x53b1(%rip),%ymm13        # 64a8 <_sk_callback_avx+0x1ad>
   .byte  196,65,92,89,213                    // vmulps        %ymm13,%ymm4,%ymm10
-  .byte  196,98,125,24,53,15,83,0,0          // vbroadcastss  0x530f(%rip),%ymm14        # 6414 <_sk_callback_avx+0x1b2>
+  .byte  196,98,125,24,53,167,83,0,0         // vbroadcastss  0x53a7(%rip),%ymm14        # 64ac <_sk_callback_avx+0x1b1>
   .byte  196,65,84,89,222                    // vmulps        %ymm14,%ymm5,%ymm11
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
-  .byte  196,98,125,24,61,0,83,0,0           // vbroadcastss  0x5300(%rip),%ymm15        # 6418 <_sk_callback_avx+0x1b6>
+  .byte  196,98,125,24,61,152,83,0,0         // vbroadcastss  0x5398(%rip),%ymm15        # 64b0 <_sk_callback_avx+0x1b5>
   .byte  196,65,76,89,223                    // vmulps        %ymm15,%ymm6,%ymm11
   .byte  196,193,44,88,195                   // vaddps        %ymm11,%ymm10,%ymm0
   .byte  196,65,60,89,221                    // vmulps        %ymm13,%ymm8,%ymm11
@@ -15011,7 +15117,7 @@ _sk_color_avx:
   .byte  196,65,44,95,207                    // vmaxps        %ymm15,%ymm10,%ymm9
   .byte  196,195,37,74,192,0                 // vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   .byte  196,65,124,95,199                   // vmaxps        %ymm15,%ymm0,%ymm8
-  .byte  196,226,125,24,5,199,81,0,0         // vbroadcastss  0x51c7(%rip),%ymm0        # 641c <_sk_callback_avx+0x1ba>
+  .byte  196,226,125,24,5,95,82,0,0          // vbroadcastss  0x525f(%rip),%ymm0        # 64b4 <_sk_callback_avx+0x1b9>
   .byte  197,124,92,215                      // vsubps        %ymm7,%ymm0,%ymm10
   .byte  197,172,89,84,36,168                // vmulps        -0x58(%rsp),%ymm10,%ymm2
   .byte  197,124,92,219                      // vsubps        %ymm3,%ymm0,%ymm11
@@ -15041,12 +15147,12 @@ _sk_luminosity_avx:
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
   .byte  197,100,89,196                      // vmulps        %ymm4,%ymm3,%ymm8
   .byte  197,100,89,205                      // vmulps        %ymm5,%ymm3,%ymm9
-  .byte  196,98,125,24,45,89,81,0,0          // vbroadcastss  0x5159(%rip),%ymm13        # 6420 <_sk_callback_avx+0x1be>
+  .byte  196,98,125,24,45,241,81,0,0         // vbroadcastss  0x51f1(%rip),%ymm13        # 64b8 <_sk_callback_avx+0x1bd>
   .byte  196,65,108,89,213                   // vmulps        %ymm13,%ymm2,%ymm10
-  .byte  196,98,125,24,53,79,81,0,0          // vbroadcastss  0x514f(%rip),%ymm14        # 6424 <_sk_callback_avx+0x1c2>
+  .byte  196,98,125,24,53,231,81,0,0         // vbroadcastss  0x51e7(%rip),%ymm14        # 64bc <_sk_callback_avx+0x1c1>
   .byte  196,65,116,89,222                   // vmulps        %ymm14,%ymm1,%ymm11
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
-  .byte  196,98,125,24,61,64,81,0,0          // vbroadcastss  0x5140(%rip),%ymm15        # 6428 <_sk_callback_avx+0x1c6>
+  .byte  196,98,125,24,61,216,81,0,0         // vbroadcastss  0x51d8(%rip),%ymm15        # 64c0 <_sk_callback_avx+0x1c5>
   .byte  196,65,28,89,223                    // vmulps        %ymm15,%ymm12,%ymm11
   .byte  196,193,44,88,195                   // vaddps        %ymm11,%ymm10,%ymm0
   .byte  196,65,60,89,221                    // vmulps        %ymm13,%ymm8,%ymm11
@@ -15109,7 +15215,7 @@ _sk_luminosity_avx:
   .byte  196,65,44,95,207                    // vmaxps        %ymm15,%ymm10,%ymm9
   .byte  196,195,37,74,192,0                 // vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   .byte  196,65,124,95,199                   // vmaxps        %ymm15,%ymm0,%ymm8
-  .byte  196,226,125,24,5,7,80,0,0           // vbroadcastss  0x5007(%rip),%ymm0        # 642c <_sk_callback_avx+0x1ca>
+  .byte  196,226,125,24,5,159,80,0,0         // vbroadcastss  0x509f(%rip),%ymm0        # 64c4 <_sk_callback_avx+0x1c9>
   .byte  197,124,92,215                      // vsubps        %ymm7,%ymm0,%ymm10
   .byte  197,172,89,210                      // vmulps        %ymm2,%ymm10,%ymm2
   .byte  197,124,92,219                      // vsubps        %ymm3,%ymm0,%ymm11
@@ -15145,7 +15251,7 @@ HIDDEN _sk_clamp_1_avx
 .globl _sk_clamp_1_avx
 FUNCTION(_sk_clamp_1_avx)
 _sk_clamp_1_avx:
-  .byte  196,98,125,24,5,154,79,0,0          // vbroadcastss  0x4f9a(%rip),%ymm8        # 6430 <_sk_callback_avx+0x1ce>
+  .byte  196,98,125,24,5,50,80,0,0           // vbroadcastss  0x5032(%rip),%ymm8        # 64c8 <_sk_callback_avx+0x1cd>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
@@ -15157,7 +15263,7 @@ HIDDEN _sk_clamp_a_avx
 .globl _sk_clamp_a_avx
 FUNCTION(_sk_clamp_a_avx)
 _sk_clamp_a_avx:
-  .byte  196,98,125,24,5,125,79,0,0          // vbroadcastss  0x4f7d(%rip),%ymm8        # 6434 <_sk_callback_avx+0x1d2>
+  .byte  196,98,125,24,5,21,80,0,0           // vbroadcastss  0x5015(%rip),%ymm8        # 64cc <_sk_callback_avx+0x1d1>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  197,252,93,195                      // vminps        %ymm3,%ymm0,%ymm0
   .byte  197,244,93,203                      // vminps        %ymm3,%ymm1,%ymm1
@@ -15243,7 +15349,7 @@ FUNCTION(_sk_unpremul_avx)
 _sk_unpremul_avx:
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,65,100,194,200,0                // vcmpeqps      %ymm8,%ymm3,%ymm9
-  .byte  196,98,125,24,21,197,78,0,0         // vbroadcastss  0x4ec5(%rip),%ymm10        # 6438 <_sk_callback_avx+0x1d6>
+  .byte  196,98,125,24,21,93,79,0,0          // vbroadcastss  0x4f5d(%rip),%ymm10        # 64d0 <_sk_callback_avx+0x1d5>
   .byte  197,44,94,211                       // vdivps        %ymm3,%ymm10,%ymm10
   .byte  196,67,45,74,192,144                // vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
@@ -15256,17 +15362,17 @@ HIDDEN _sk_from_srgb_avx
 .globl _sk_from_srgb_avx
 FUNCTION(_sk_from_srgb_avx)
 _sk_from_srgb_avx:
-  .byte  196,98,125,24,5,166,78,0,0          // vbroadcastss  0x4ea6(%rip),%ymm8        # 643c <_sk_callback_avx+0x1da>
+  .byte  196,98,125,24,5,62,79,0,0           // vbroadcastss  0x4f3e(%rip),%ymm8        # 64d4 <_sk_callback_avx+0x1d9>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  197,124,89,208                      // vmulps        %ymm0,%ymm0,%ymm10
-  .byte  196,98,125,24,29,152,78,0,0         // vbroadcastss  0x4e98(%rip),%ymm11        # 6440 <_sk_callback_avx+0x1de>
+  .byte  196,98,125,24,29,48,79,0,0          // vbroadcastss  0x4f30(%rip),%ymm11        # 64d8 <_sk_callback_avx+0x1dd>
   .byte  196,65,124,89,227                   // vmulps        %ymm11,%ymm0,%ymm12
-  .byte  196,98,125,24,45,142,78,0,0         // vbroadcastss  0x4e8e(%rip),%ymm13        # 6444 <_sk_callback_avx+0x1e2>
+  .byte  196,98,125,24,45,38,79,0,0          // vbroadcastss  0x4f26(%rip),%ymm13        # 64dc <_sk_callback_avx+0x1e1>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,44,89,212                    // vmulps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,37,127,78,0,0         // vbroadcastss  0x4e7f(%rip),%ymm12        # 6448 <_sk_callback_avx+0x1e6>
+  .byte  196,98,125,24,37,23,79,0,0          // vbroadcastss  0x4f17(%rip),%ymm12        # 64e0 <_sk_callback_avx+0x1e5>
   .byte  196,65,44,88,212                    // vaddps        %ymm12,%ymm10,%ymm10
-  .byte  196,98,125,24,53,117,78,0,0         // vbroadcastss  0x4e75(%rip),%ymm14        # 644c <_sk_callback_avx+0x1ea>
+  .byte  196,98,125,24,53,13,79,0,0          // vbroadcastss  0x4f0d(%rip),%ymm14        # 64e4 <_sk_callback_avx+0x1e9>
   .byte  196,193,124,194,198,1               // vcmpltps      %ymm14,%ymm0,%ymm0
   .byte  196,195,45,74,193,0                 // vblendvps     %ymm0,%ymm9,%ymm10,%ymm0
   .byte  196,65,116,89,200                   // vmulps        %ymm8,%ymm1,%ymm9
@@ -15293,20 +15399,20 @@ HIDDEN _sk_to_srgb_avx
 FUNCTION(_sk_to_srgb_avx)
 _sk_to_srgb_avx:
   .byte  197,124,82,200                      // vrsqrtps      %ymm0,%ymm9
-  .byte  196,98,125,24,5,10,78,0,0           // vbroadcastss  0x4e0a(%rip),%ymm8        # 6450 <_sk_callback_avx+0x1ee>
+  .byte  196,98,125,24,5,162,78,0,0          // vbroadcastss  0x4ea2(%rip),%ymm8        # 64e8 <_sk_callback_avx+0x1ed>
   .byte  196,65,124,89,208                   // vmulps        %ymm8,%ymm0,%ymm10
-  .byte  196,98,125,24,29,0,78,0,0           // vbroadcastss  0x4e00(%rip),%ymm11        # 6454 <_sk_callback_avx+0x1f2>
+  .byte  196,98,125,24,29,152,78,0,0         // vbroadcastss  0x4e98(%rip),%ymm11        # 64ec <_sk_callback_avx+0x1f1>
   .byte  196,65,52,89,227                    // vmulps        %ymm11,%ymm9,%ymm12
-  .byte  196,98,125,24,45,246,77,0,0         // vbroadcastss  0x4df6(%rip),%ymm13        # 6458 <_sk_callback_avx+0x1f6>
+  .byte  196,98,125,24,45,142,78,0,0         // vbroadcastss  0x4e8e(%rip),%ymm13        # 64f0 <_sk_callback_avx+0x1f5>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,52,89,228                    // vmulps        %ymm12,%ymm9,%ymm12
-  .byte  196,98,125,24,53,231,77,0,0         // vbroadcastss  0x4de7(%rip),%ymm14        # 645c <_sk_callback_avx+0x1fa>
+  .byte  196,98,125,24,53,127,78,0,0         // vbroadcastss  0x4e7f(%rip),%ymm14        # 64f4 <_sk_callback_avx+0x1f9>
   .byte  196,65,28,88,230                    // vaddps        %ymm14,%ymm12,%ymm12
-  .byte  196,98,125,24,61,221,77,0,0         // vbroadcastss  0x4ddd(%rip),%ymm15        # 6460 <_sk_callback_avx+0x1fe>
+  .byte  196,98,125,24,61,117,78,0,0         // vbroadcastss  0x4e75(%rip),%ymm15        # 64f8 <_sk_callback_avx+0x1fd>
   .byte  196,65,52,88,207                    // vaddps        %ymm15,%ymm9,%ymm9
   .byte  196,65,124,83,201                   // vrcpps        %ymm9,%ymm9
   .byte  196,65,52,89,204                    // vmulps        %ymm12,%ymm9,%ymm9
-  .byte  196,98,125,24,37,201,77,0,0         // vbroadcastss  0x4dc9(%rip),%ymm12        # 6464 <_sk_callback_avx+0x202>
+  .byte  196,98,125,24,37,97,78,0,0          // vbroadcastss  0x4e61(%rip),%ymm12        # 64fc <_sk_callback_avx+0x201>
   .byte  196,193,124,194,196,1               // vcmpltps      %ymm12,%ymm0,%ymm0
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  197,124,82,201                      // vrsqrtps      %ymm1,%ymm9
@@ -15343,7 +15449,7 @@ _sk_rgb_to_hsl_avx:
   .byte  197,124,93,201                      // vminps        %ymm1,%ymm0,%ymm9
   .byte  197,52,93,202                       // vminps        %ymm2,%ymm9,%ymm9
   .byte  196,65,60,92,209                    // vsubps        %ymm9,%ymm8,%ymm10
-  .byte  196,98,125,24,29,47,77,0,0          // vbroadcastss  0x4d2f(%rip),%ymm11        # 6468 <_sk_callback_avx+0x206>
+  .byte  196,98,125,24,29,199,77,0,0         // vbroadcastss  0x4dc7(%rip),%ymm11        # 6500 <_sk_callback_avx+0x205>
   .byte  196,65,36,94,218                    // vdivps        %ymm10,%ymm11,%ymm11
   .byte  197,116,92,226                      // vsubps        %ymm2,%ymm1,%ymm12
   .byte  196,65,28,89,227                    // vmulps        %ymm11,%ymm12,%ymm12
@@ -15353,19 +15459,19 @@ _sk_rgb_to_hsl_avx:
   .byte  196,193,108,89,211                  // vmulps        %ymm11,%ymm2,%ymm2
   .byte  197,252,92,201                      // vsubps        %ymm1,%ymm0,%ymm1
   .byte  196,193,116,89,203                  // vmulps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,8,77,0,0           // vbroadcastss  0x4d08(%rip),%ymm11        # 6474 <_sk_callback_avx+0x212>
+  .byte  196,98,125,24,29,160,77,0,0         // vbroadcastss  0x4da0(%rip),%ymm11        # 650c <_sk_callback_avx+0x211>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,246,76,0,0         // vbroadcastss  0x4cf6(%rip),%ymm11        # 6470 <_sk_callback_avx+0x20e>
+  .byte  196,98,125,24,29,142,77,0,0         // vbroadcastss  0x4d8e(%rip),%ymm11        # 6508 <_sk_callback_avx+0x20d>
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,227,117,74,202,224              // vblendvps     %ymm14,%ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,222,76,0,0        // vbroadcastss  0x4cde(%rip),%ymm2        # 646c <_sk_callback_avx+0x20a>
+  .byte  196,226,125,24,21,118,77,0,0        // vbroadcastss  0x4d76(%rip),%ymm2        # 6504 <_sk_callback_avx+0x209>
   .byte  196,65,12,87,246                    // vxorps        %ymm14,%ymm14,%ymm14
   .byte  196,227,13,74,210,208               // vblendvps     %ymm13,%ymm2,%ymm14,%ymm2
   .byte  197,188,194,192,0                   // vcmpeqps      %ymm0,%ymm8,%ymm0
   .byte  196,193,108,88,212                  // vaddps        %ymm12,%ymm2,%ymm2
   .byte  196,227,117,74,194,0                // vblendvps     %ymm0,%ymm2,%ymm1,%ymm0
   .byte  196,193,60,88,201                   // vaddps        %ymm9,%ymm8,%ymm1
-  .byte  196,98,125,24,37,197,76,0,0         // vbroadcastss  0x4cc5(%rip),%ymm12        # 647c <_sk_callback_avx+0x21a>
+  .byte  196,98,125,24,37,93,77,0,0          // vbroadcastss  0x4d5d(%rip),%ymm12        # 6514 <_sk_callback_avx+0x219>
   .byte  196,193,116,89,212                  // vmulps        %ymm12,%ymm1,%ymm2
   .byte  197,28,194,226,1                    // vcmpltps      %ymm2,%ymm12,%ymm12
   .byte  196,65,36,92,216                    // vsubps        %ymm8,%ymm11,%ymm11
@@ -15375,7 +15481,7 @@ _sk_rgb_to_hsl_avx:
   .byte  197,172,94,201                      // vdivps        %ymm1,%ymm10,%ymm1
   .byte  196,195,125,74,198,128              // vblendvps     %ymm8,%ymm14,%ymm0,%ymm0
   .byte  196,195,117,74,206,128              // vblendvps     %ymm8,%ymm14,%ymm1,%ymm1
-  .byte  196,98,125,24,5,136,76,0,0          // vbroadcastss  0x4c88(%rip),%ymm8        # 6478 <_sk_callback_avx+0x216>
+  .byte  196,98,125,24,5,32,77,0,0           // vbroadcastss  0x4d20(%rip),%ymm8        # 6510 <_sk_callback_avx+0x215>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -15392,7 +15498,7 @@ _sk_hsl_to_rgb_avx:
   .byte  197,252,17,92,36,128                // vmovups       %ymm3,-0x80(%rsp)
   .byte  197,252,40,225                      // vmovaps       %ymm1,%ymm4
   .byte  197,252,40,216                      // vmovaps       %ymm0,%ymm3
-  .byte  196,98,125,24,5,85,76,0,0           // vbroadcastss  0x4c55(%rip),%ymm8        # 6480 <_sk_callback_avx+0x21e>
+  .byte  196,98,125,24,5,237,76,0,0          // vbroadcastss  0x4ced(%rip),%ymm8        # 6518 <_sk_callback_avx+0x21d>
   .byte  197,60,194,202,2                    // vcmpleps      %ymm2,%ymm8,%ymm9
   .byte  197,92,89,210                       // vmulps        %ymm2,%ymm4,%ymm10
   .byte  196,65,92,92,218                    // vsubps        %ymm10,%ymm4,%ymm11
@@ -15400,23 +15506,23 @@ _sk_hsl_to_rgb_avx:
   .byte  197,52,88,210                       // vaddps        %ymm2,%ymm9,%ymm10
   .byte  197,108,88,202                      // vaddps        %ymm2,%ymm2,%ymm9
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,29,47,76,0,0          // vbroadcastss  0x4c2f(%rip),%ymm11        # 6484 <_sk_callback_avx+0x222>
+  .byte  196,98,125,24,29,199,76,0,0         // vbroadcastss  0x4cc7(%rip),%ymm11        # 651c <_sk_callback_avx+0x221>
   .byte  196,65,100,88,219                   // vaddps        %ymm11,%ymm3,%ymm11
   .byte  196,67,125,8,227,1                  // vroundps      $0x1,%ymm11,%ymm12
   .byte  196,65,36,92,252                    // vsubps        %ymm12,%ymm11,%ymm15
   .byte  196,65,44,92,217                    // vsubps        %ymm9,%ymm10,%ymm11
-  .byte  196,98,125,24,37,25,76,0,0          // vbroadcastss  0x4c19(%rip),%ymm12        # 648c <_sk_callback_avx+0x22a>
+  .byte  196,98,125,24,37,177,76,0,0         // vbroadcastss  0x4cb1(%rip),%ymm12        # 6524 <_sk_callback_avx+0x229>
   .byte  196,193,4,89,196                    // vmulps        %ymm12,%ymm15,%ymm0
-  .byte  196,98,125,24,45,15,76,0,0          // vbroadcastss  0x4c0f(%rip),%ymm13        # 6490 <_sk_callback_avx+0x22e>
+  .byte  196,98,125,24,45,167,76,0,0         // vbroadcastss  0x4ca7(%rip),%ymm13        # 6528 <_sk_callback_avx+0x22d>
   .byte  197,20,92,240                       // vsubps        %ymm0,%ymm13,%ymm14
   .byte  196,65,36,89,246                    // vmulps        %ymm14,%ymm11,%ymm14
   .byte  196,65,52,88,246                    // vaddps        %ymm14,%ymm9,%ymm14
-  .byte  196,226,125,24,13,240,75,0,0        // vbroadcastss  0x4bf0(%rip),%ymm1        # 6488 <_sk_callback_avx+0x226>
+  .byte  196,226,125,24,13,136,76,0,0        // vbroadcastss  0x4c88(%rip),%ymm1        # 6520 <_sk_callback_avx+0x225>
   .byte  196,193,116,194,255,2               // vcmpleps      %ymm15,%ymm1,%ymm7
   .byte  196,195,13,74,249,112               // vblendvps     %ymm7,%ymm9,%ymm14,%ymm7
   .byte  196,65,60,194,247,2                 // vcmpleps      %ymm15,%ymm8,%ymm14
   .byte  196,227,45,74,255,224               // vblendvps     %ymm14,%ymm7,%ymm10,%ymm7
-  .byte  196,98,125,24,53,219,75,0,0         // vbroadcastss  0x4bdb(%rip),%ymm14        # 6494 <_sk_callback_avx+0x232>
+  .byte  196,98,125,24,53,115,76,0,0         // vbroadcastss  0x4c73(%rip),%ymm14        # 652c <_sk_callback_avx+0x231>
   .byte  196,65,12,194,255,2                 // vcmpleps      %ymm15,%ymm14,%ymm15
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,180,88,192                      // vaddps        %ymm0,%ymm9,%ymm0
@@ -15435,7 +15541,7 @@ _sk_hsl_to_rgb_avx:
   .byte  197,164,89,247                      // vmulps        %ymm7,%ymm11,%ymm6
   .byte  197,180,88,246                      // vaddps        %ymm6,%ymm9,%ymm6
   .byte  196,227,77,74,237,0                 // vblendvps     %ymm0,%ymm5,%ymm6,%ymm5
-  .byte  196,226,125,24,5,125,75,0,0         // vbroadcastss  0x4b7d(%rip),%ymm0        # 6498 <_sk_callback_avx+0x236>
+  .byte  196,226,125,24,5,21,76,0,0          // vbroadcastss  0x4c15(%rip),%ymm0        # 6530 <_sk_callback_avx+0x235>
   .byte  197,228,88,192                      // vaddps        %ymm0,%ymm3,%ymm0
   .byte  196,227,125,8,216,1                 // vroundps      $0x1,%ymm0,%ymm3
   .byte  197,252,92,195                      // vsubps        %ymm3,%ymm0,%ymm0
@@ -15494,7 +15600,7 @@ _sk_scale_u8_avx:
   .byte  196,66,121,49,192                   // vpmovzxbd     %xmm8,%xmm8
   .byte  196,67,53,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,166,74,0,0         // vbroadcastss  0x4aa6(%rip),%ymm9        # 649c <_sk_callback_avx+0x23a>
+  .byte  196,98,125,24,13,62,75,0,0          // vbroadcastss  0x4b3e(%rip),%ymm9        # 6534 <_sk_callback_avx+0x239>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
@@ -15553,7 +15659,7 @@ _sk_lerp_u8_avx:
   .byte  196,66,121,49,192                   // vpmovzxbd     %xmm8,%xmm8
   .byte  196,67,53,24,192,1                  // vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,13,242,73,0,0         // vbroadcastss  0x49f2(%rip),%ymm9        # 64a0 <_sk_callback_avx+0x23e>
+  .byte  196,98,125,24,13,138,74,0,0         // vbroadcastss  0x4a8a(%rip),%ymm9        # 6538 <_sk_callback_avx+0x23d>
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
@@ -15596,20 +15702,20 @@ _sk_lerp_565_avx:
   .byte  196,65,57,105,201                   // vpunpckhwd    %xmm9,%xmm8,%xmm9
   .byte  196,66,121,51,192                   // vpmovzxwd     %xmm8,%xmm8
   .byte  196,67,61,24,193,1                  // vinsertf128   $0x1,%xmm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,92,73,0,0          // vbroadcastss  0x495c(%rip),%ymm9        # 64a4 <_sk_callback_avx+0x242>
+  .byte  196,98,125,24,13,244,73,0,0         // vbroadcastss  0x49f4(%rip),%ymm9        # 653c <_sk_callback_avx+0x241>
   .byte  196,65,60,84,201                    // vandps        %ymm9,%ymm8,%ymm9
   .byte  196,65,124,91,201                   // vcvtdq2ps     %ymm9,%ymm9
-  .byte  196,98,125,24,21,77,73,0,0          // vbroadcastss  0x494d(%rip),%ymm10        # 64a8 <_sk_callback_avx+0x246>
+  .byte  196,98,125,24,21,229,73,0,0         // vbroadcastss  0x49e5(%rip),%ymm10        # 6540 <_sk_callback_avx+0x245>
   .byte  196,65,52,89,202                    // vmulps        %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,67,73,0,0          // vbroadcastss  0x4943(%rip),%ymm10        # 64ac <_sk_callback_avx+0x24a>
+  .byte  196,98,125,24,21,219,73,0,0         // vbroadcastss  0x49db(%rip),%ymm10        # 6544 <_sk_callback_avx+0x249>
   .byte  196,65,60,84,210                    // vandps        %ymm10,%ymm8,%ymm10
   .byte  196,65,124,91,210                   // vcvtdq2ps     %ymm10,%ymm10
-  .byte  196,98,125,24,29,52,73,0,0          // vbroadcastss  0x4934(%rip),%ymm11        # 64b0 <_sk_callback_avx+0x24e>
+  .byte  196,98,125,24,29,204,73,0,0         // vbroadcastss  0x49cc(%rip),%ymm11        # 6548 <_sk_callback_avx+0x24d>
   .byte  196,65,44,89,211                    // vmulps        %ymm11,%ymm10,%ymm10
-  .byte  196,98,125,24,29,42,73,0,0          // vbroadcastss  0x492a(%rip),%ymm11        # 64b4 <_sk_callback_avx+0x252>
+  .byte  196,98,125,24,29,194,73,0,0         // vbroadcastss  0x49c2(%rip),%ymm11        # 654c <_sk_callback_avx+0x251>
   .byte  196,65,60,84,195                    // vandps        %ymm11,%ymm8,%ymm8
   .byte  196,65,124,91,192                   // vcvtdq2ps     %ymm8,%ymm8
-  .byte  196,98,125,24,29,27,73,0,0          // vbroadcastss  0x491b(%rip),%ymm11        # 64b8 <_sk_callback_avx+0x256>
+  .byte  196,98,125,24,29,179,73,0,0         // vbroadcastss  0x49b3(%rip),%ymm11        # 6550 <_sk_callback_avx+0x255>
   .byte  196,65,60,89,195                    // vmulps        %ymm11,%ymm8,%ymm8
   .byte  197,252,92,196                      // vsubps        %ymm4,%ymm0,%ymm0
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
@@ -15691,7 +15797,7 @@ _sk_load_tables_avx:
   .byte  65,85                               // push          %r13
   .byte  65,84                               // push          %r12
   .byte  83                                  // push          %rbx
-  .byte  197,124,40,13,250,74,0,0            // vmovaps       0x4afa(%rip),%ymm9        # 67a0 <_sk_callback_avx+0x53e>
+  .byte  197,124,40,13,154,75,0,0            // vmovaps       0x4b9a(%rip),%ymm9        # 6840 <_sk_callback_avx+0x545>
   .byte  196,193,60,84,193                   // vandps        %ymm9,%ymm8,%ymm0
   .byte  196,193,249,126,193                 // vmovq         %xmm0,%r9
   .byte  69,137,203                          // mov           %r9d,%r11d
@@ -15783,7 +15889,7 @@ _sk_load_tables_avx:
   .byte  196,193,97,114,210,24               // vpsrld        $0x18,%xmm10,%xmm3
   .byte  196,227,61,24,219,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,39,70,0,0           // vbroadcastss  0x4627(%rip),%ymm8        # 64bc <_sk_callback_avx+0x25a>
+  .byte  196,98,125,24,5,191,70,0,0          // vbroadcastss  0x46bf(%rip),%ymm8        # 6554 <_sk_callback_avx+0x259>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -15875,7 +15981,7 @@ _sk_load_tables_u16_be_avx:
   .byte  197,177,108,208                     // vpunpcklqdq   %xmm0,%xmm9,%xmm2
   .byte  197,177,109,200                     // vpunpckhqdq   %xmm0,%xmm9,%xmm1
   .byte  196,65,57,108,212                   // vpunpcklqdq   %xmm12,%xmm8,%xmm10
-  .byte  197,121,111,29,58,72,0,0            // vmovdqa       0x483a(%rip),%xmm11        # 6820 <_sk_callback_avx+0x5be>
+  .byte  197,121,111,29,218,72,0,0           // vmovdqa       0x48da(%rip),%xmm11        # 68c0 <_sk_callback_avx+0x5c5>
   .byte  196,193,105,219,195                 // vpand         %xmm11,%xmm2,%xmm0
   .byte  196,65,49,239,201                   // vpxor         %xmm9,%xmm9,%xmm9
   .byte  196,193,121,105,209                 // vpunpckhwd    %xmm9,%xmm0,%xmm2
@@ -15974,7 +16080,7 @@ _sk_load_tables_u16_be_avx:
   .byte  196,226,121,51,219                  // vpmovzxwd     %xmm3,%xmm3
   .byte  196,195,101,24,216,1                // vinsertf128   $0x1,%xmm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,216,66,0,0          // vbroadcastss  0x42d8(%rip),%ymm8        # 64c0 <_sk_callback_avx+0x25e>
+  .byte  196,98,125,24,5,112,67,0,0          // vbroadcastss  0x4370(%rip),%ymm8        # 6558 <_sk_callback_avx+0x25d>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -16046,7 +16152,7 @@ _sk_load_tables_rgb_u16_be_avx:
   .byte  197,185,108,202                     // vpunpcklqdq   %xmm2,%xmm8,%xmm1
   .byte  197,185,109,210                     // vpunpckhqdq   %xmm2,%xmm8,%xmm2
   .byte  197,121,108,195                     // vpunpcklqdq   %xmm3,%xmm0,%xmm8
-  .byte  197,121,111,13,51,69,0,0            // vmovdqa       0x4533(%rip),%xmm9        # 6830 <_sk_callback_avx+0x5ce>
+  .byte  197,121,111,13,211,69,0,0           // vmovdqa       0x45d3(%rip),%xmm9        # 68d0 <_sk_callback_avx+0x5d5>
   .byte  196,193,113,219,193                 // vpand         %xmm9,%xmm1,%xmm0
   .byte  196,65,41,239,210                   // vpxor         %xmm10,%xmm10,%xmm10
   .byte  196,193,121,105,202                 // vpunpckhwd    %xmm10,%xmm0,%xmm1
@@ -16138,7 +16244,7 @@ _sk_load_tables_rgb_u16_be_avx:
   .byte  196,227,105,33,211,48               // vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   .byte  196,195,109,24,208,1                // vinsertf128   $0x1,%xmm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,234,63,0,0        // vbroadcastss  0x3fea(%rip),%ymm3        # 64c4 <_sk_callback_avx+0x262>
+  .byte  196,226,125,24,29,130,64,0,0        // vbroadcastss  0x4082(%rip),%ymm3        # 655c <_sk_callback_avx+0x261>
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,93                               // pop           %r13
@@ -16191,7 +16297,7 @@ _sk_byte_tables_avx:
   .byte  65,84                               // push          %r12
   .byte  83                                  // push          %rbx
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,30,63,0,0           // vbroadcastss  0x3f1e(%rip),%ymm8        # 64c8 <_sk_callback_avx+0x266>
+  .byte  196,98,125,24,5,182,63,0,0          // vbroadcastss  0x3fb6(%rip),%ymm8        # 6560 <_sk_callback_avx+0x265>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,195,249,22,192,1                // vpextrq       $0x1,%xmm0,%r8
@@ -16228,7 +16334,7 @@ _sk_byte_tables_avx:
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,53,24,192,1                 // vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,108,62,0,0         // vbroadcastss  0x3e6c(%rip),%ymm9        # 64cc <_sk_callback_avx+0x26a>
+  .byte  196,98,125,24,13,4,63,0,0           // vbroadcastss  0x3f04(%rip),%ymm9        # 6564 <_sk_callback_avx+0x269>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -16390,7 +16496,7 @@ _sk_byte_tables_rgb_avx:
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,53,24,192,1                 // vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,146,59,0,0         // vbroadcastss  0x3b92(%rip),%ymm9        # 64d0 <_sk_callback_avx+0x26e>
+  .byte  196,98,125,24,13,42,60,0,0          // vbroadcastss  0x3c2a(%rip),%ymm9        # 6568 <_sk_callback_avx+0x26d>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  197,188,89,201                      // vmulps        %ymm1,%ymm8,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
@@ -16687,36 +16793,36 @@ _sk_parametric_r_avx:
   .byte  196,193,124,88,195                  // vaddps        %ymm11,%ymm0,%ymm0
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,216                      // vcvtdq2ps     %ymm0,%ymm11
-  .byte  196,98,125,24,37,240,54,0,0         // vbroadcastss  0x36f0(%rip),%ymm12        # 64d4 <_sk_callback_avx+0x272>
+  .byte  196,98,125,24,37,136,55,0,0         // vbroadcastss  0x3788(%rip),%ymm12        # 656c <_sk_callback_avx+0x271>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,230,54,0,0         // vbroadcastss  0x36e6(%rip),%ymm12        # 64d8 <_sk_callback_avx+0x276>
+  .byte  196,98,125,24,37,126,55,0,0         // vbroadcastss  0x377e(%rip),%ymm12        # 6570 <_sk_callback_avx+0x275>
   .byte  196,193,124,84,196                  // vandps        %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,220,54,0,0         // vbroadcastss  0x36dc(%rip),%ymm12        # 64dc <_sk_callback_avx+0x27a>
+  .byte  196,98,125,24,37,116,55,0,0         // vbroadcastss  0x3774(%rip),%ymm12        # 6574 <_sk_callback_avx+0x279>
   .byte  196,193,124,86,196                  // vorps         %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,210,54,0,0         // vbroadcastss  0x36d2(%rip),%ymm12        # 64e0 <_sk_callback_avx+0x27e>
+  .byte  196,98,125,24,37,106,55,0,0         // vbroadcastss  0x376a(%rip),%ymm12        # 6578 <_sk_callback_avx+0x27d>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,200,54,0,0         // vbroadcastss  0x36c8(%rip),%ymm12        # 64e4 <_sk_callback_avx+0x282>
+  .byte  196,98,125,24,37,96,55,0,0          // vbroadcastss  0x3760(%rip),%ymm12        # 657c <_sk_callback_avx+0x281>
   .byte  196,65,124,89,228                   // vmulps        %ymm12,%ymm0,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,185,54,0,0         // vbroadcastss  0x36b9(%rip),%ymm12        # 64e8 <_sk_callback_avx+0x286>
+  .byte  196,98,125,24,37,81,55,0,0          // vbroadcastss  0x3751(%rip),%ymm12        # 6580 <_sk_callback_avx+0x285>
   .byte  196,193,124,88,196                  // vaddps        %ymm12,%ymm0,%ymm0
-  .byte  196,98,125,24,37,175,54,0,0         // vbroadcastss  0x36af(%rip),%ymm12        # 64ec <_sk_callback_avx+0x28a>
+  .byte  196,98,125,24,37,71,55,0,0          // vbroadcastss  0x3747(%rip),%ymm12        # 6584 <_sk_callback_avx+0x289>
   .byte  197,156,94,192                      // vdivps        %ymm0,%ymm12,%ymm0
   .byte  197,164,92,192                      // vsubps        %ymm0,%ymm11,%ymm0
   .byte  197,172,89,192                      // vmulps        %ymm0,%ymm10,%ymm0
   .byte  196,99,125,8,208,1                  // vroundps      $0x1,%ymm0,%ymm10
   .byte  196,65,124,92,210                   // vsubps        %ymm10,%ymm0,%ymm10
-  .byte  196,98,125,24,29,147,54,0,0         // vbroadcastss  0x3693(%rip),%ymm11        # 64f0 <_sk_callback_avx+0x28e>
+  .byte  196,98,125,24,29,43,55,0,0          // vbroadcastss  0x372b(%rip),%ymm11        # 6588 <_sk_callback_avx+0x28d>
   .byte  196,193,124,88,195                  // vaddps        %ymm11,%ymm0,%ymm0
-  .byte  196,98,125,24,29,137,54,0,0         // vbroadcastss  0x3689(%rip),%ymm11        # 64f4 <_sk_callback_avx+0x292>
+  .byte  196,98,125,24,29,33,55,0,0          // vbroadcastss  0x3721(%rip),%ymm11        # 658c <_sk_callback_avx+0x291>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,124,92,195                  // vsubps        %ymm11,%ymm0,%ymm0
-  .byte  196,98,125,24,29,122,54,0,0         // vbroadcastss  0x367a(%rip),%ymm11        # 64f8 <_sk_callback_avx+0x296>
+  .byte  196,98,125,24,29,18,55,0,0          // vbroadcastss  0x3712(%rip),%ymm11        # 6590 <_sk_callback_avx+0x295>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,112,54,0,0         // vbroadcastss  0x3670(%rip),%ymm11        # 64fc <_sk_callback_avx+0x29a>
+  .byte  196,98,125,24,29,8,55,0,0           // vbroadcastss  0x3708(%rip),%ymm11        # 6594 <_sk_callback_avx+0x299>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,124,88,194                  // vaddps        %ymm10,%ymm0,%ymm0
-  .byte  196,98,125,24,21,97,54,0,0          // vbroadcastss  0x3661(%rip),%ymm10        # 6500 <_sk_callback_avx+0x29e>
+  .byte  196,98,125,24,21,249,54,0,0         // vbroadcastss  0x36f9(%rip),%ymm10        # 6598 <_sk_callback_avx+0x29d>
   .byte  196,193,124,89,194                  // vmulps        %ymm10,%ymm0,%ymm0
   .byte  197,253,91,192                      // vcvtps2dq     %ymm0,%ymm0
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -16724,7 +16830,7 @@ _sk_parametric_r_avx:
   .byte  196,195,125,74,193,128              // vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,124,95,192                  // vmaxps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,56,54,0,0           // vbroadcastss  0x3638(%rip),%ymm8        # 6504 <_sk_callback_avx+0x2a2>
+  .byte  196,98,125,24,5,208,54,0,0          // vbroadcastss  0x36d0(%rip),%ymm8        # 659c <_sk_callback_avx+0x2a1>
   .byte  196,193,124,93,192                  // vminps        %ymm8,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -16746,36 +16852,36 @@ _sk_parametric_g_avx:
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,217                      // vcvtdq2ps     %ymm1,%ymm11
-  .byte  196,98,125,24,37,233,53,0,0         // vbroadcastss  0x35e9(%rip),%ymm12        # 6508 <_sk_callback_avx+0x2a6>
+  .byte  196,98,125,24,37,129,54,0,0         // vbroadcastss  0x3681(%rip),%ymm12        # 65a0 <_sk_callback_avx+0x2a5>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,223,53,0,0         // vbroadcastss  0x35df(%rip),%ymm12        # 650c <_sk_callback_avx+0x2aa>
+  .byte  196,98,125,24,37,119,54,0,0         // vbroadcastss  0x3677(%rip),%ymm12        # 65a4 <_sk_callback_avx+0x2a9>
   .byte  196,193,116,84,204                  // vandps        %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,213,53,0,0         // vbroadcastss  0x35d5(%rip),%ymm12        # 6510 <_sk_callback_avx+0x2ae>
+  .byte  196,98,125,24,37,109,54,0,0         // vbroadcastss  0x366d(%rip),%ymm12        # 65a8 <_sk_callback_avx+0x2ad>
   .byte  196,193,116,86,204                  // vorps         %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,203,53,0,0         // vbroadcastss  0x35cb(%rip),%ymm12        # 6514 <_sk_callback_avx+0x2b2>
+  .byte  196,98,125,24,37,99,54,0,0          // vbroadcastss  0x3663(%rip),%ymm12        # 65ac <_sk_callback_avx+0x2b1>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,193,53,0,0         // vbroadcastss  0x35c1(%rip),%ymm12        # 6518 <_sk_callback_avx+0x2b6>
+  .byte  196,98,125,24,37,89,54,0,0          // vbroadcastss  0x3659(%rip),%ymm12        # 65b0 <_sk_callback_avx+0x2b5>
   .byte  196,65,116,89,228                   // vmulps        %ymm12,%ymm1,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,178,53,0,0         // vbroadcastss  0x35b2(%rip),%ymm12        # 651c <_sk_callback_avx+0x2ba>
+  .byte  196,98,125,24,37,74,54,0,0          // vbroadcastss  0x364a(%rip),%ymm12        # 65b4 <_sk_callback_avx+0x2b9>
   .byte  196,193,116,88,204                  // vaddps        %ymm12,%ymm1,%ymm1
-  .byte  196,98,125,24,37,168,53,0,0         // vbroadcastss  0x35a8(%rip),%ymm12        # 6520 <_sk_callback_avx+0x2be>
+  .byte  196,98,125,24,37,64,54,0,0          // vbroadcastss  0x3640(%rip),%ymm12        # 65b8 <_sk_callback_avx+0x2bd>
   .byte  197,156,94,201                      // vdivps        %ymm1,%ymm12,%ymm1
   .byte  197,164,92,201                      // vsubps        %ymm1,%ymm11,%ymm1
   .byte  197,172,89,201                      // vmulps        %ymm1,%ymm10,%ymm1
   .byte  196,99,125,8,209,1                  // vroundps      $0x1,%ymm1,%ymm10
   .byte  196,65,116,92,210                   // vsubps        %ymm10,%ymm1,%ymm10
-  .byte  196,98,125,24,29,140,53,0,0         // vbroadcastss  0x358c(%rip),%ymm11        # 6524 <_sk_callback_avx+0x2c2>
+  .byte  196,98,125,24,29,36,54,0,0          // vbroadcastss  0x3624(%rip),%ymm11        # 65bc <_sk_callback_avx+0x2c1>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,130,53,0,0         // vbroadcastss  0x3582(%rip),%ymm11        # 6528 <_sk_callback_avx+0x2c6>
+  .byte  196,98,125,24,29,26,54,0,0          // vbroadcastss  0x361a(%rip),%ymm11        # 65c0 <_sk_callback_avx+0x2c5>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,116,92,203                  // vsubps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,29,115,53,0,0         // vbroadcastss  0x3573(%rip),%ymm11        # 652c <_sk_callback_avx+0x2ca>
+  .byte  196,98,125,24,29,11,54,0,0          // vbroadcastss  0x360b(%rip),%ymm11        # 65c4 <_sk_callback_avx+0x2c9>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,105,53,0,0         // vbroadcastss  0x3569(%rip),%ymm11        # 6530 <_sk_callback_avx+0x2ce>
+  .byte  196,98,125,24,29,1,54,0,0           // vbroadcastss  0x3601(%rip),%ymm11        # 65c8 <_sk_callback_avx+0x2cd>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,116,88,202                  // vaddps        %ymm10,%ymm1,%ymm1
-  .byte  196,98,125,24,21,90,53,0,0          // vbroadcastss  0x355a(%rip),%ymm10        # 6534 <_sk_callback_avx+0x2d2>
+  .byte  196,98,125,24,21,242,53,0,0         // vbroadcastss  0x35f2(%rip),%ymm10        # 65cc <_sk_callback_avx+0x2d1>
   .byte  196,193,116,89,202                  // vmulps        %ymm10,%ymm1,%ymm1
   .byte  197,253,91,201                      // vcvtps2dq     %ymm1,%ymm1
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -16783,7 +16889,7 @@ _sk_parametric_g_avx:
   .byte  196,195,117,74,201,128              // vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,116,95,200                  // vmaxps        %ymm8,%ymm1,%ymm1
-  .byte  196,98,125,24,5,49,53,0,0           // vbroadcastss  0x3531(%rip),%ymm8        # 6538 <_sk_callback_avx+0x2d6>
+  .byte  196,98,125,24,5,201,53,0,0          // vbroadcastss  0x35c9(%rip),%ymm8        # 65d0 <_sk_callback_avx+0x2d5>
   .byte  196,193,116,93,200                  // vminps        %ymm8,%ymm1,%ymm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -16805,36 +16911,36 @@ _sk_parametric_b_avx:
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,218                      // vcvtdq2ps     %ymm2,%ymm11
-  .byte  196,98,125,24,37,226,52,0,0         // vbroadcastss  0x34e2(%rip),%ymm12        # 653c <_sk_callback_avx+0x2da>
+  .byte  196,98,125,24,37,122,53,0,0         // vbroadcastss  0x357a(%rip),%ymm12        # 65d4 <_sk_callback_avx+0x2d9>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,216,52,0,0         // vbroadcastss  0x34d8(%rip),%ymm12        # 6540 <_sk_callback_avx+0x2de>
+  .byte  196,98,125,24,37,112,53,0,0         // vbroadcastss  0x3570(%rip),%ymm12        # 65d8 <_sk_callback_avx+0x2dd>
   .byte  196,193,108,84,212                  // vandps        %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,206,52,0,0         // vbroadcastss  0x34ce(%rip),%ymm12        # 6544 <_sk_callback_avx+0x2e2>
+  .byte  196,98,125,24,37,102,53,0,0         // vbroadcastss  0x3566(%rip),%ymm12        # 65dc <_sk_callback_avx+0x2e1>
   .byte  196,193,108,86,212                  // vorps         %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,196,52,0,0         // vbroadcastss  0x34c4(%rip),%ymm12        # 6548 <_sk_callback_avx+0x2e6>
+  .byte  196,98,125,24,37,92,53,0,0          // vbroadcastss  0x355c(%rip),%ymm12        # 65e0 <_sk_callback_avx+0x2e5>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,186,52,0,0         // vbroadcastss  0x34ba(%rip),%ymm12        # 654c <_sk_callback_avx+0x2ea>
+  .byte  196,98,125,24,37,82,53,0,0          // vbroadcastss  0x3552(%rip),%ymm12        # 65e4 <_sk_callback_avx+0x2e9>
   .byte  196,65,108,89,228                   // vmulps        %ymm12,%ymm2,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,171,52,0,0         // vbroadcastss  0x34ab(%rip),%ymm12        # 6550 <_sk_callback_avx+0x2ee>
+  .byte  196,98,125,24,37,67,53,0,0          // vbroadcastss  0x3543(%rip),%ymm12        # 65e8 <_sk_callback_avx+0x2ed>
   .byte  196,193,108,88,212                  // vaddps        %ymm12,%ymm2,%ymm2
-  .byte  196,98,125,24,37,161,52,0,0         // vbroadcastss  0x34a1(%rip),%ymm12        # 6554 <_sk_callback_avx+0x2f2>
+  .byte  196,98,125,24,37,57,53,0,0          // vbroadcastss  0x3539(%rip),%ymm12        # 65ec <_sk_callback_avx+0x2f1>
   .byte  197,156,94,210                      // vdivps        %ymm2,%ymm12,%ymm2
   .byte  197,164,92,210                      // vsubps        %ymm2,%ymm11,%ymm2
   .byte  197,172,89,210                      // vmulps        %ymm2,%ymm10,%ymm2
   .byte  196,99,125,8,210,1                  // vroundps      $0x1,%ymm2,%ymm10
   .byte  196,65,108,92,210                   // vsubps        %ymm10,%ymm2,%ymm10
-  .byte  196,98,125,24,29,133,52,0,0         // vbroadcastss  0x3485(%rip),%ymm11        # 6558 <_sk_callback_avx+0x2f6>
+  .byte  196,98,125,24,29,29,53,0,0          // vbroadcastss  0x351d(%rip),%ymm11        # 65f0 <_sk_callback_avx+0x2f5>
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
-  .byte  196,98,125,24,29,123,52,0,0         // vbroadcastss  0x347b(%rip),%ymm11        # 655c <_sk_callback_avx+0x2fa>
+  .byte  196,98,125,24,29,19,53,0,0          // vbroadcastss  0x3513(%rip),%ymm11        # 65f4 <_sk_callback_avx+0x2f9>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,108,92,211                  // vsubps        %ymm11,%ymm2,%ymm2
-  .byte  196,98,125,24,29,108,52,0,0         // vbroadcastss  0x346c(%rip),%ymm11        # 6560 <_sk_callback_avx+0x2fe>
+  .byte  196,98,125,24,29,4,53,0,0           // vbroadcastss  0x3504(%rip),%ymm11        # 65f8 <_sk_callback_avx+0x2fd>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,98,52,0,0          // vbroadcastss  0x3462(%rip),%ymm11        # 6564 <_sk_callback_avx+0x302>
+  .byte  196,98,125,24,29,250,52,0,0         // vbroadcastss  0x34fa(%rip),%ymm11        # 65fc <_sk_callback_avx+0x301>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,108,88,210                  // vaddps        %ymm10,%ymm2,%ymm2
-  .byte  196,98,125,24,21,83,52,0,0          // vbroadcastss  0x3453(%rip),%ymm10        # 6568 <_sk_callback_avx+0x306>
+  .byte  196,98,125,24,21,235,52,0,0         // vbroadcastss  0x34eb(%rip),%ymm10        # 6600 <_sk_callback_avx+0x305>
   .byte  196,193,108,89,210                  // vmulps        %ymm10,%ymm2,%ymm2
   .byte  197,253,91,210                      // vcvtps2dq     %ymm2,%ymm2
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -16842,7 +16948,7 @@ _sk_parametric_b_avx:
   .byte  196,195,109,74,209,128              // vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,108,95,208                  // vmaxps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,42,52,0,0           // vbroadcastss  0x342a(%rip),%ymm8        # 656c <_sk_callback_avx+0x30a>
+  .byte  196,98,125,24,5,194,52,0,0          // vbroadcastss  0x34c2(%rip),%ymm8        # 6604 <_sk_callback_avx+0x309>
   .byte  196,193,108,93,208                  // vminps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -16864,36 +16970,36 @@ _sk_parametric_a_avx:
   .byte  196,193,100,88,219                  // vaddps        %ymm11,%ymm3,%ymm3
   .byte  196,98,125,24,16                    // vbroadcastss  (%rax),%ymm10
   .byte  197,124,91,219                      // vcvtdq2ps     %ymm3,%ymm11
-  .byte  196,98,125,24,37,219,51,0,0         // vbroadcastss  0x33db(%rip),%ymm12        # 6570 <_sk_callback_avx+0x30e>
+  .byte  196,98,125,24,37,115,52,0,0         // vbroadcastss  0x3473(%rip),%ymm12        # 6608 <_sk_callback_avx+0x30d>
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,209,51,0,0         // vbroadcastss  0x33d1(%rip),%ymm12        # 6574 <_sk_callback_avx+0x312>
+  .byte  196,98,125,24,37,105,52,0,0         // vbroadcastss  0x3469(%rip),%ymm12        # 660c <_sk_callback_avx+0x311>
   .byte  196,193,100,84,220                  // vandps        %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,199,51,0,0         // vbroadcastss  0x33c7(%rip),%ymm12        # 6578 <_sk_callback_avx+0x316>
+  .byte  196,98,125,24,37,95,52,0,0          // vbroadcastss  0x345f(%rip),%ymm12        # 6610 <_sk_callback_avx+0x315>
   .byte  196,193,100,86,220                  // vorps         %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,189,51,0,0         // vbroadcastss  0x33bd(%rip),%ymm12        # 657c <_sk_callback_avx+0x31a>
+  .byte  196,98,125,24,37,85,52,0,0          // vbroadcastss  0x3455(%rip),%ymm12        # 6614 <_sk_callback_avx+0x319>
   .byte  196,65,36,88,220                    // vaddps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,179,51,0,0         // vbroadcastss  0x33b3(%rip),%ymm12        # 6580 <_sk_callback_avx+0x31e>
+  .byte  196,98,125,24,37,75,52,0,0          // vbroadcastss  0x344b(%rip),%ymm12        # 6618 <_sk_callback_avx+0x31d>
   .byte  196,65,100,89,228                   // vmulps        %ymm12,%ymm3,%ymm12
   .byte  196,65,36,92,220                    // vsubps        %ymm12,%ymm11,%ymm11
-  .byte  196,98,125,24,37,164,51,0,0         // vbroadcastss  0x33a4(%rip),%ymm12        # 6584 <_sk_callback_avx+0x322>
+  .byte  196,98,125,24,37,60,52,0,0          // vbroadcastss  0x343c(%rip),%ymm12        # 661c <_sk_callback_avx+0x321>
   .byte  196,193,100,88,220                  // vaddps        %ymm12,%ymm3,%ymm3
-  .byte  196,98,125,24,37,154,51,0,0         // vbroadcastss  0x339a(%rip),%ymm12        # 6588 <_sk_callback_avx+0x326>
+  .byte  196,98,125,24,37,50,52,0,0          // vbroadcastss  0x3432(%rip),%ymm12        # 6620 <_sk_callback_avx+0x325>
   .byte  197,156,94,219                      // vdivps        %ymm3,%ymm12,%ymm3
   .byte  197,164,92,219                      // vsubps        %ymm3,%ymm11,%ymm3
   .byte  197,172,89,219                      // vmulps        %ymm3,%ymm10,%ymm3
   .byte  196,99,125,8,211,1                  // vroundps      $0x1,%ymm3,%ymm10
   .byte  196,65,100,92,210                   // vsubps        %ymm10,%ymm3,%ymm10
-  .byte  196,98,125,24,29,126,51,0,0         // vbroadcastss  0x337e(%rip),%ymm11        # 658c <_sk_callback_avx+0x32a>
+  .byte  196,98,125,24,29,22,52,0,0          // vbroadcastss  0x3416(%rip),%ymm11        # 6624 <_sk_callback_avx+0x329>
   .byte  196,193,100,88,219                  // vaddps        %ymm11,%ymm3,%ymm3
-  .byte  196,98,125,24,29,116,51,0,0         // vbroadcastss  0x3374(%rip),%ymm11        # 6590 <_sk_callback_avx+0x32e>
+  .byte  196,98,125,24,29,12,52,0,0          // vbroadcastss  0x340c(%rip),%ymm11        # 6628 <_sk_callback_avx+0x32d>
   .byte  196,65,44,89,219                    // vmulps        %ymm11,%ymm10,%ymm11
   .byte  196,193,100,92,219                  // vsubps        %ymm11,%ymm3,%ymm3
-  .byte  196,98,125,24,29,101,51,0,0         // vbroadcastss  0x3365(%rip),%ymm11        # 6594 <_sk_callback_avx+0x332>
+  .byte  196,98,125,24,29,253,51,0,0         // vbroadcastss  0x33fd(%rip),%ymm11        # 662c <_sk_callback_avx+0x331>
   .byte  196,65,36,92,210                    // vsubps        %ymm10,%ymm11,%ymm10
-  .byte  196,98,125,24,29,91,51,0,0          // vbroadcastss  0x335b(%rip),%ymm11        # 6598 <_sk_callback_avx+0x336>
+  .byte  196,98,125,24,29,243,51,0,0         // vbroadcastss  0x33f3(%rip),%ymm11        # 6630 <_sk_callback_avx+0x335>
   .byte  196,65,36,94,210                    // vdivps        %ymm10,%ymm11,%ymm10
   .byte  196,193,100,88,218                  // vaddps        %ymm10,%ymm3,%ymm3
-  .byte  196,98,125,24,21,76,51,0,0          // vbroadcastss  0x334c(%rip),%ymm10        # 659c <_sk_callback_avx+0x33a>
+  .byte  196,98,125,24,21,228,51,0,0         // vbroadcastss  0x33e4(%rip),%ymm10        # 6634 <_sk_callback_avx+0x339>
   .byte  196,193,100,89,218                  // vmulps        %ymm10,%ymm3,%ymm3
   .byte  197,253,91,219                      // vcvtps2dq     %ymm3,%ymm3
   .byte  196,98,125,24,80,20                 // vbroadcastss  0x14(%rax),%ymm10
@@ -16901,7 +17007,7 @@ _sk_parametric_a_avx:
   .byte  196,195,101,74,217,128              // vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   .byte  196,65,60,87,192                    // vxorps        %ymm8,%ymm8,%ymm8
   .byte  196,193,100,95,216                  // vmaxps        %ymm8,%ymm3,%ymm3
-  .byte  196,98,125,24,5,35,51,0,0           // vbroadcastss  0x3323(%rip),%ymm8        # 65a0 <_sk_callback_avx+0x33e>
+  .byte  196,98,125,24,5,187,51,0,0          // vbroadcastss  0x33bb(%rip),%ymm8        # 6638 <_sk_callback_avx+0x33d>
   .byte  196,193,100,93,216                  // vminps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -16910,31 +17016,31 @@ HIDDEN _sk_lab_to_xyz_avx
 .globl _sk_lab_to_xyz_avx
 FUNCTION(_sk_lab_to_xyz_avx)
 _sk_lab_to_xyz_avx:
-  .byte  196,98,125,24,5,21,51,0,0           // vbroadcastss  0x3315(%rip),%ymm8        # 65a4 <_sk_callback_avx+0x342>
+  .byte  196,98,125,24,5,173,51,0,0          // vbroadcastss  0x33ad(%rip),%ymm8        # 663c <_sk_callback_avx+0x341>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,11,51,0,0           // vbroadcastss  0x330b(%rip),%ymm8        # 65a8 <_sk_callback_avx+0x346>
+  .byte  196,98,125,24,5,163,51,0,0          // vbroadcastss  0x33a3(%rip),%ymm8        # 6640 <_sk_callback_avx+0x345>
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
-  .byte  196,98,125,24,13,1,51,0,0           // vbroadcastss  0x3301(%rip),%ymm9        # 65ac <_sk_callback_avx+0x34a>
+  .byte  196,98,125,24,13,153,51,0,0         // vbroadcastss  0x3399(%rip),%ymm9        # 6644 <_sk_callback_avx+0x349>
   .byte  196,193,116,88,201                  // vaddps        %ymm9,%ymm1,%ymm1
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  196,193,108,88,209                  // vaddps        %ymm9,%ymm2,%ymm2
-  .byte  196,98,125,24,5,237,50,0,0          // vbroadcastss  0x32ed(%rip),%ymm8        # 65b0 <_sk_callback_avx+0x34e>
+  .byte  196,98,125,24,5,133,51,0,0          // vbroadcastss  0x3385(%rip),%ymm8        # 6648 <_sk_callback_avx+0x34d>
   .byte  196,193,124,88,192                  // vaddps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,227,50,0,0          // vbroadcastss  0x32e3(%rip),%ymm8        # 65b4 <_sk_callback_avx+0x352>
+  .byte  196,98,125,24,5,123,51,0,0          // vbroadcastss  0x337b(%rip),%ymm8        # 664c <_sk_callback_avx+0x351>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,5,217,50,0,0          // vbroadcastss  0x32d9(%rip),%ymm8        # 65b8 <_sk_callback_avx+0x356>
+  .byte  196,98,125,24,5,113,51,0,0          // vbroadcastss  0x3371(%rip),%ymm8        # 6650 <_sk_callback_avx+0x355>
   .byte  196,193,116,89,200                  // vmulps        %ymm8,%ymm1,%ymm1
   .byte  197,252,88,201                      // vaddps        %ymm1,%ymm0,%ymm1
-  .byte  196,98,125,24,5,203,50,0,0          // vbroadcastss  0x32cb(%rip),%ymm8        # 65bc <_sk_callback_avx+0x35a>
+  .byte  196,98,125,24,5,99,51,0,0           // vbroadcastss  0x3363(%rip),%ymm8        # 6654 <_sk_callback_avx+0x359>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  197,252,92,210                      // vsubps        %ymm2,%ymm0,%ymm2
   .byte  197,116,89,193                      // vmulps        %ymm1,%ymm1,%ymm8
   .byte  196,65,116,89,192                   // vmulps        %ymm8,%ymm1,%ymm8
-  .byte  196,98,125,24,13,180,50,0,0         // vbroadcastss  0x32b4(%rip),%ymm9        # 65c0 <_sk_callback_avx+0x35e>
+  .byte  196,98,125,24,13,76,51,0,0          // vbroadcastss  0x334c(%rip),%ymm9        # 6658 <_sk_callback_avx+0x35d>
   .byte  196,65,52,194,208,1                 // vcmpltps      %ymm8,%ymm9,%ymm10
-  .byte  196,98,125,24,29,169,50,0,0         // vbroadcastss  0x32a9(%rip),%ymm11        # 65c4 <_sk_callback_avx+0x362>
+  .byte  196,98,125,24,29,65,51,0,0          // vbroadcastss  0x3341(%rip),%ymm11        # 665c <_sk_callback_avx+0x361>
   .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
-  .byte  196,98,125,24,37,159,50,0,0         // vbroadcastss  0x329f(%rip),%ymm12        # 65c8 <_sk_callback_avx+0x366>
+  .byte  196,98,125,24,37,55,51,0,0          // vbroadcastss  0x3337(%rip),%ymm12        # 6660 <_sk_callback_avx+0x365>
   .byte  196,193,116,89,204                  // vmulps        %ymm12,%ymm1,%ymm1
   .byte  196,67,117,74,192,160               // vblendvps     %ymm10,%ymm8,%ymm1,%ymm8
   .byte  197,252,89,200                      // vmulps        %ymm0,%ymm0,%ymm1
@@ -16949,9 +17055,9 @@ _sk_lab_to_xyz_avx:
   .byte  196,193,108,88,211                  // vaddps        %ymm11,%ymm2,%ymm2
   .byte  196,193,108,89,212                  // vmulps        %ymm12,%ymm2,%ymm2
   .byte  196,227,109,74,208,144              // vblendvps     %ymm9,%ymm0,%ymm2,%ymm2
-  .byte  196,226,125,24,5,85,50,0,0          // vbroadcastss  0x3255(%rip),%ymm0        # 65cc <_sk_callback_avx+0x36a>
+  .byte  196,226,125,24,5,237,50,0,0         // vbroadcastss  0x32ed(%rip),%ymm0        # 6664 <_sk_callback_avx+0x369>
   .byte  197,188,89,192                      // vmulps        %ymm0,%ymm8,%ymm0
-  .byte  196,98,125,24,5,76,50,0,0           // vbroadcastss  0x324c(%rip),%ymm8        # 65d0 <_sk_callback_avx+0x36e>
+  .byte  196,98,125,24,5,228,50,0,0          // vbroadcastss  0x32e4(%rip),%ymm8        # 6668 <_sk_callback_avx+0x36d>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -16972,7 +17078,7 @@ _sk_load_a8_avx:
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,16,50,0,0         // vbroadcastss  0x3210(%rip),%ymm1        # 65d4 <_sk_callback_avx+0x372>
+  .byte  196,226,125,24,13,168,50,0,0        // vbroadcastss  0x32a8(%rip),%ymm1        # 666c <_sk_callback_avx+0x371>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -17041,7 +17147,7 @@ _sk_gather_a8_avx:
   .byte  196,226,121,49,201                  // vpmovzxbd     %xmm1,%xmm1
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,5,49,0,0          // vbroadcastss  0x3105(%rip),%ymm1        # 65d8 <_sk_callback_avx+0x376>
+  .byte  196,226,125,24,13,157,49,0,0        // vbroadcastss  0x319d(%rip),%ymm1        # 6670 <_sk_callback_avx+0x375>
   .byte  197,252,89,217                      // vmulps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  197,252,87,192                      // vxorps        %ymm0,%ymm0,%ymm0
@@ -17059,7 +17165,7 @@ FUNCTION(_sk_store_a8_avx)
 _sk_store_a8_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,224,48,0,0          // vbroadcastss  0x30e0(%rip),%ymm8        # 65dc <_sk_callback_avx+0x37a>
+  .byte  196,98,125,24,5,120,49,0,0          // vbroadcastss  0x3178(%rip),%ymm8        # 6674 <_sk_callback_avx+0x379>
   .byte  196,65,100,89,192                   // vmulps        %ymm8,%ymm3,%ymm8
   .byte  196,65,125,91,192                   // vcvtps2dq     %ymm8,%ymm8
   .byte  196,67,125,25,193,1                 // vextractf128  $0x1,%ymm8,%xmm9
@@ -17129,10 +17235,10 @@ _sk_load_g8_avx:
   .byte  196,226,121,49,192                  // vpmovzxbd     %xmm0,%xmm0
   .byte  196,227,117,24,192,1                // vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,5,48,0,0          // vbroadcastss  0x3005(%rip),%ymm1        # 65e0 <_sk_callback_avx+0x37e>
+  .byte  196,226,125,24,13,157,48,0,0        // vbroadcastss  0x309d(%rip),%ymm1        # 6678 <_sk_callback_avx+0x37d>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,250,47,0,0        // vbroadcastss  0x2ffa(%rip),%ymm3        # 65e4 <_sk_callback_avx+0x382>
+  .byte  196,226,125,24,29,146,48,0,0        // vbroadcastss  0x3092(%rip),%ymm3        # 667c <_sk_callback_avx+0x381>
   .byte  76,137,193                          // mov           %r8,%rcx
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
@@ -17198,10 +17304,10 @@ _sk_gather_g8_avx:
   .byte  196,226,121,49,201                  // vpmovzxbd     %xmm1,%xmm1
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,249,46,0,0        // vbroadcastss  0x2ef9(%rip),%ymm1        # 65e8 <_sk_callback_avx+0x386>
+  .byte  196,226,125,24,13,145,47,0,0        // vbroadcastss  0x2f91(%rip),%ymm1        # 6680 <_sk_callback_avx+0x385>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,238,46,0,0        // vbroadcastss  0x2eee(%rip),%ymm3        # 65ec <_sk_callback_avx+0x38a>
+  .byte  196,226,125,24,29,134,47,0,0        // vbroadcastss  0x2f86(%rip),%ymm3        # 6684 <_sk_callback_avx+0x389>
   .byte  197,252,40,200                      // vmovaps       %ymm0,%ymm1
   .byte  197,252,40,208                      // vmovaps       %ymm0,%ymm2
   .byte  91                                  // pop           %rbx
@@ -17281,10 +17387,10 @@ _sk_gather_i8_avx:
   .byte  196,163,121,34,4,163,2              // vpinsrd       $0x2,(%rbx,%r12,4),%xmm0,%xmm0
   .byte  196,163,121,34,28,19,3              // vpinsrd       $0x3,(%rbx,%r10,1),%xmm0,%xmm3
   .byte  196,227,61,24,195,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  .byte  197,124,40,21,118,47,0,0            // vmovaps       0x2f76(%rip),%ymm10        # 67c0 <_sk_callback_avx+0x55e>
+  .byte  197,124,40,21,22,48,0,0             // vmovaps       0x3016(%rip),%ymm10        # 6860 <_sk_callback_avx+0x565>
   .byte  196,193,124,84,194                  // vandps        %ymm10,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,148,45,0,0         // vbroadcastss  0x2d94(%rip),%ymm9        # 65f0 <_sk_callback_avx+0x38e>
+  .byte  196,98,125,24,13,44,46,0,0          // vbroadcastss  0x2e2c(%rip),%ymm9        # 6688 <_sk_callback_avx+0x38d>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,113,114,208,8               // vpsrld        $0x8,%xmm8,%xmm1
   .byte  197,233,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm2
@@ -17324,23 +17430,23 @@ _sk_load_565_avx:
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,209,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  .byte  196,226,125,24,5,254,44,0,0         // vbroadcastss  0x2cfe(%rip),%ymm0        # 65f4 <_sk_callback_avx+0x392>
+  .byte  196,226,125,24,5,150,45,0,0         // vbroadcastss  0x2d96(%rip),%ymm0        # 668c <_sk_callback_avx+0x391>
   .byte  197,236,84,192                      // vandps        %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,241,44,0,0        // vbroadcastss  0x2cf1(%rip),%ymm1        # 65f8 <_sk_callback_avx+0x396>
+  .byte  196,226,125,24,13,137,45,0,0        // vbroadcastss  0x2d89(%rip),%ymm1        # 6690 <_sk_callback_avx+0x395>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,232,44,0,0        // vbroadcastss  0x2ce8(%rip),%ymm1        # 65fc <_sk_callback_avx+0x39a>
+  .byte  196,226,125,24,13,128,45,0,0        // vbroadcastss  0x2d80(%rip),%ymm1        # 6694 <_sk_callback_avx+0x399>
   .byte  197,236,84,201                      // vandps        %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,219,44,0,0        // vbroadcastss  0x2cdb(%rip),%ymm3        # 6600 <_sk_callback_avx+0x39e>
+  .byte  196,226,125,24,29,115,45,0,0        // vbroadcastss  0x2d73(%rip),%ymm3        # 6698 <_sk_callback_avx+0x39d>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,24,29,210,44,0,0        // vbroadcastss  0x2cd2(%rip),%ymm3        # 6604 <_sk_callback_avx+0x3a2>
+  .byte  196,226,125,24,29,106,45,0,0        // vbroadcastss  0x2d6a(%rip),%ymm3        # 669c <_sk_callback_avx+0x3a1>
   .byte  197,236,84,211                      // vandps        %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,197,44,0,0        // vbroadcastss  0x2cc5(%rip),%ymm3        # 6608 <_sk_callback_avx+0x3a6>
+  .byte  196,226,125,24,29,93,45,0,0         // vbroadcastss  0x2d5d(%rip),%ymm3        # 66a0 <_sk_callback_avx+0x3a5>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,186,44,0,0        // vbroadcastss  0x2cba(%rip),%ymm3        # 660c <_sk_callback_avx+0x3aa>
+  .byte  196,226,125,24,29,82,45,0,0         // vbroadcastss  0x2d52(%rip),%ymm3        # 66a4 <_sk_callback_avx+0x3a9>
   .byte  255,224                             // jmpq          *%rax
   .byte  65,137,200                          // mov           %ecx,%r8d
   .byte  65,128,224,7                        // and           $0x7,%r8b
@@ -17439,23 +17545,23 @@ _sk_gather_565_avx:
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,209,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  .byte  196,226,125,24,5,90,43,0,0          // vbroadcastss  0x2b5a(%rip),%ymm0        # 6610 <_sk_callback_avx+0x3ae>
+  .byte  196,226,125,24,5,242,43,0,0         // vbroadcastss  0x2bf2(%rip),%ymm0        # 66a8 <_sk_callback_avx+0x3ad>
   .byte  197,236,84,192                      // vandps        %ymm0,%ymm2,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,77,43,0,0         // vbroadcastss  0x2b4d(%rip),%ymm1        # 6614 <_sk_callback_avx+0x3b2>
+  .byte  196,226,125,24,13,229,43,0,0        // vbroadcastss  0x2be5(%rip),%ymm1        # 66ac <_sk_callback_avx+0x3b1>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,68,43,0,0         // vbroadcastss  0x2b44(%rip),%ymm1        # 6618 <_sk_callback_avx+0x3b6>
+  .byte  196,226,125,24,13,220,43,0,0        // vbroadcastss  0x2bdc(%rip),%ymm1        # 66b0 <_sk_callback_avx+0x3b5>
   .byte  197,236,84,201                      // vandps        %ymm1,%ymm2,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,29,55,43,0,0         // vbroadcastss  0x2b37(%rip),%ymm3        # 661c <_sk_callback_avx+0x3ba>
+  .byte  196,226,125,24,29,207,43,0,0        // vbroadcastss  0x2bcf(%rip),%ymm3        # 66b4 <_sk_callback_avx+0x3b9>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
-  .byte  196,226,125,24,29,46,43,0,0         // vbroadcastss  0x2b2e(%rip),%ymm3        # 6620 <_sk_callback_avx+0x3be>
+  .byte  196,226,125,24,29,198,43,0,0        // vbroadcastss  0x2bc6(%rip),%ymm3        # 66b8 <_sk_callback_avx+0x3bd>
   .byte  197,236,84,211                      // vandps        %ymm3,%ymm2,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,226,125,24,29,33,43,0,0         // vbroadcastss  0x2b21(%rip),%ymm3        # 6624 <_sk_callback_avx+0x3c2>
+  .byte  196,226,125,24,29,185,43,0,0        // vbroadcastss  0x2bb9(%rip),%ymm3        # 66bc <_sk_callback_avx+0x3c1>
   .byte  197,236,89,211                      // vmulps        %ymm3,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,22,43,0,0         // vbroadcastss  0x2b16(%rip),%ymm3        # 6628 <_sk_callback_avx+0x3c6>
+  .byte  196,226,125,24,29,174,43,0,0        // vbroadcastss  0x2bae(%rip),%ymm3        # 66c0 <_sk_callback_avx+0x3c5>
   .byte  91                                  // pop           %rbx
   .byte  65,92                               // pop           %r12
   .byte  65,94                               // pop           %r14
@@ -17469,14 +17575,14 @@ FUNCTION(_sk_store_565_avx)
 _sk_store_565_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,2,43,0,0            // vbroadcastss  0x2b02(%rip),%ymm8        # 662c <_sk_callback_avx+0x3ca>
+  .byte  196,98,125,24,5,154,43,0,0          // vbroadcastss  0x2b9a(%rip),%ymm8        # 66c4 <_sk_callback_avx+0x3c9>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,41,114,241,11               // vpslld        $0xb,%xmm9,%xmm10
   .byte  196,67,125,25,201,1                 // vextractf128  $0x1,%ymm9,%xmm9
   .byte  196,193,49,114,241,11               // vpslld        $0xb,%xmm9,%xmm9
   .byte  196,67,45,24,201,1                  // vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
-  .byte  196,98,125,24,21,219,42,0,0         // vbroadcastss  0x2adb(%rip),%ymm10        # 6630 <_sk_callback_avx+0x3ce>
+  .byte  196,98,125,24,21,115,43,0,0         // vbroadcastss  0x2b73(%rip),%ymm10        # 66c8 <_sk_callback_avx+0x3cd>
   .byte  196,65,116,89,210                   // vmulps        %ymm10,%ymm1,%ymm10
   .byte  196,65,125,91,210                   // vcvtps2dq     %ymm10,%ymm10
   .byte  196,193,33,114,242,5                // vpslld        $0x5,%xmm10,%xmm11
@@ -17550,25 +17656,25 @@ _sk_load_4444_avx:
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,217,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  .byte  196,226,125,24,5,228,41,0,0         // vbroadcastss  0x29e4(%rip),%ymm0        # 6634 <_sk_callback_avx+0x3d2>
+  .byte  196,226,125,24,5,124,42,0,0         // vbroadcastss  0x2a7c(%rip),%ymm0        # 66cc <_sk_callback_avx+0x3d1>
   .byte  197,228,84,192                      // vandps        %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,215,41,0,0        // vbroadcastss  0x29d7(%rip),%ymm1        # 6638 <_sk_callback_avx+0x3d6>
+  .byte  196,226,125,24,13,111,42,0,0        // vbroadcastss  0x2a6f(%rip),%ymm1        # 66d0 <_sk_callback_avx+0x3d5>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,206,41,0,0        // vbroadcastss  0x29ce(%rip),%ymm1        # 663c <_sk_callback_avx+0x3da>
+  .byte  196,226,125,24,13,102,42,0,0        // vbroadcastss  0x2a66(%rip),%ymm1        # 66d4 <_sk_callback_avx+0x3d9>
   .byte  197,228,84,201                      // vandps        %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,193,41,0,0        // vbroadcastss  0x29c1(%rip),%ymm2        # 6640 <_sk_callback_avx+0x3de>
+  .byte  196,226,125,24,21,89,42,0,0         // vbroadcastss  0x2a59(%rip),%ymm2        # 66d8 <_sk_callback_avx+0x3dd>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,184,41,0,0        // vbroadcastss  0x29b8(%rip),%ymm2        # 6644 <_sk_callback_avx+0x3e2>
+  .byte  196,226,125,24,21,80,42,0,0         // vbroadcastss  0x2a50(%rip),%ymm2        # 66dc <_sk_callback_avx+0x3e1>
   .byte  197,228,84,210                      // vandps        %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,171,41,0,0          // vbroadcastss  0x29ab(%rip),%ymm8        # 6648 <_sk_callback_avx+0x3e6>
+  .byte  196,98,125,24,5,67,42,0,0           // vbroadcastss  0x2a43(%rip),%ymm8        # 66e0 <_sk_callback_avx+0x3e5>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,161,41,0,0          // vbroadcastss  0x29a1(%rip),%ymm8        # 664c <_sk_callback_avx+0x3ea>
+  .byte  196,98,125,24,5,57,42,0,0           // vbroadcastss  0x2a39(%rip),%ymm8        # 66e4 <_sk_callback_avx+0x3e9>
   .byte  196,193,100,84,216                  // vandps        %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,147,41,0,0          // vbroadcastss  0x2993(%rip),%ymm8        # 6650 <_sk_callback_avx+0x3ee>
+  .byte  196,98,125,24,5,43,42,0,0           // vbroadcastss  0x2a2b(%rip),%ymm8        # 66e8 <_sk_callback_avx+0x3ed>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -17670,25 +17776,25 @@ _sk_gather_4444_avx:
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,217,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  .byte  196,226,125,24,5,42,40,0,0          // vbroadcastss  0x282a(%rip),%ymm0        # 6654 <_sk_callback_avx+0x3f2>
+  .byte  196,226,125,24,5,194,40,0,0         // vbroadcastss  0x28c2(%rip),%ymm0        # 66ec <_sk_callback_avx+0x3f1>
   .byte  197,228,84,192                      // vandps        %ymm0,%ymm3,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,226,125,24,13,29,40,0,0         // vbroadcastss  0x281d(%rip),%ymm1        # 6658 <_sk_callback_avx+0x3f6>
+  .byte  196,226,125,24,13,181,40,0,0        // vbroadcastss  0x28b5(%rip),%ymm1        # 66f0 <_sk_callback_avx+0x3f5>
   .byte  197,252,89,193                      // vmulps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,20,40,0,0         // vbroadcastss  0x2814(%rip),%ymm1        # 665c <_sk_callback_avx+0x3fa>
+  .byte  196,226,125,24,13,172,40,0,0        // vbroadcastss  0x28ac(%rip),%ymm1        # 66f4 <_sk_callback_avx+0x3f9>
   .byte  197,228,84,201                      // vandps        %ymm1,%ymm3,%ymm1
   .byte  197,252,91,201                      // vcvtdq2ps     %ymm1,%ymm1
-  .byte  196,226,125,24,21,7,40,0,0          // vbroadcastss  0x2807(%rip),%ymm2        # 6660 <_sk_callback_avx+0x3fe>
+  .byte  196,226,125,24,21,159,40,0,0        // vbroadcastss  0x289f(%rip),%ymm2        # 66f8 <_sk_callback_avx+0x3fd>
   .byte  197,244,89,202                      // vmulps        %ymm2,%ymm1,%ymm1
-  .byte  196,226,125,24,21,254,39,0,0        // vbroadcastss  0x27fe(%rip),%ymm2        # 6664 <_sk_callback_avx+0x402>
+  .byte  196,226,125,24,21,150,40,0,0        // vbroadcastss  0x2896(%rip),%ymm2        # 66fc <_sk_callback_avx+0x401>
   .byte  197,228,84,210                      // vandps        %ymm2,%ymm3,%ymm2
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
-  .byte  196,98,125,24,5,241,39,0,0          // vbroadcastss  0x27f1(%rip),%ymm8        # 6668 <_sk_callback_avx+0x406>
+  .byte  196,98,125,24,5,137,40,0,0          // vbroadcastss  0x2889(%rip),%ymm8        # 6700 <_sk_callback_avx+0x405>
   .byte  196,193,108,89,208                  // vmulps        %ymm8,%ymm2,%ymm2
-  .byte  196,98,125,24,5,231,39,0,0          // vbroadcastss  0x27e7(%rip),%ymm8        # 666c <_sk_callback_avx+0x40a>
+  .byte  196,98,125,24,5,127,40,0,0          // vbroadcastss  0x287f(%rip),%ymm8        # 6704 <_sk_callback_avx+0x409>
   .byte  196,193,100,84,216                  // vandps        %ymm8,%ymm3,%ymm3
   .byte  197,252,91,219                      // vcvtdq2ps     %ymm3,%ymm3
-  .byte  196,98,125,24,5,217,39,0,0          // vbroadcastss  0x27d9(%rip),%ymm8        # 6670 <_sk_callback_avx+0x40e>
+  .byte  196,98,125,24,5,113,40,0,0          // vbroadcastss  0x2871(%rip),%ymm8        # 6708 <_sk_callback_avx+0x40d>
   .byte  196,193,100,89,216                  // vmulps        %ymm8,%ymm3,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  91                                  // pop           %rbx
@@ -17704,7 +17810,7 @@ FUNCTION(_sk_store_4444_avx)
 _sk_store_4444_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,190,39,0,0          // vbroadcastss  0x27be(%rip),%ymm8        # 6674 <_sk_callback_avx+0x412>
+  .byte  196,98,125,24,5,86,40,0,0           // vbroadcastss  0x2856(%rip),%ymm8        # 670c <_sk_callback_avx+0x411>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,193,41,114,241,12               // vpslld        $0xc,%xmm9,%xmm10
@@ -17785,10 +17891,10 @@ _sk_load_8888_avx:
   .byte  72,133,201                          // test          %rcx,%rcx
   .byte  15,133,135,0,0,0                    // jne           405d <_sk_load_8888_avx+0x95>
   .byte  196,65,124,16,12,186                // vmovups       (%r10,%rdi,4),%ymm9
-  .byte  197,124,40,21,252,39,0,0            // vmovaps       0x27fc(%rip),%ymm10        # 67e0 <_sk_callback_avx+0x57e>
+  .byte  197,124,40,21,156,40,0,0            // vmovaps       0x289c(%rip),%ymm10        # 6880 <_sk_callback_avx+0x585>
   .byte  196,193,52,84,194                   // vandps        %ymm10,%ymm9,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,5,130,38,0,0          // vbroadcastss  0x2682(%rip),%ymm8        # 6678 <_sk_callback_avx+0x416>
+  .byte  196,98,125,24,5,26,39,0,0           // vbroadcastss  0x271a(%rip),%ymm8        # 6710 <_sk_callback_avx+0x415>
   .byte  196,193,124,89,192                  // vmulps        %ymm8,%ymm0,%ymm0
   .byte  196,193,113,114,209,8               // vpsrld        $0x8,%xmm9,%xmm1
   .byte  196,99,125,25,203,1                 // vextractf128  $0x1,%ymm9,%xmm3
@@ -17903,10 +18009,10 @@ _sk_gather_8888_avx:
   .byte  196,131,121,34,4,152,2              // vpinsrd       $0x2,(%r8,%r11,4),%xmm0,%xmm0
   .byte  196,131,121,34,28,144,3             // vpinsrd       $0x3,(%r8,%r10,4),%xmm0,%xmm3
   .byte  196,227,61,24,195,1                 // vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  .byte  197,124,40,21,38,38,0,0             // vmovaps       0x2626(%rip),%ymm10        # 6800 <_sk_callback_avx+0x59e>
+  .byte  197,124,40,21,198,38,0,0            // vmovaps       0x26c6(%rip),%ymm10        # 68a0 <_sk_callback_avx+0x5a5>
   .byte  196,193,124,84,194                  // vandps        %ymm10,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,13,144,36,0,0         // vbroadcastss  0x2490(%rip),%ymm9        # 667c <_sk_callback_avx+0x41a>
+  .byte  196,98,125,24,13,40,37,0,0          // vbroadcastss  0x2528(%rip),%ymm9        # 6714 <_sk_callback_avx+0x419>
   .byte  196,193,124,89,193                  // vmulps        %ymm9,%ymm0,%ymm0
   .byte  196,193,113,114,208,8               // vpsrld        $0x8,%xmm8,%xmm1
   .byte  197,233,114,211,8                   // vpsrld        $0x8,%xmm3,%xmm2
@@ -17938,7 +18044,7 @@ FUNCTION(_sk_store_8888_avx)
 _sk_store_8888_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,16                           // mov           (%rax),%r10
-  .byte  196,98,125,24,5,30,36,0,0           // vbroadcastss  0x241e(%rip),%ymm8        # 6680 <_sk_callback_avx+0x41e>
+  .byte  196,98,125,24,5,182,36,0,0          // vbroadcastss  0x24b6(%rip),%ymm8        # 6718 <_sk_callback_avx+0x41d>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,65,116,89,208                   // vmulps        %ymm8,%ymm1,%ymm10
@@ -18043,13 +18149,13 @@ _sk_load_f16_avx:
   .byte  197,249,105,201                     // vpunpckhwd    %xmm1,%xmm0,%xmm1
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
-  .byte  196,98,125,24,37,133,34,0,0         // vbroadcastss  0x2285(%rip),%ymm12        # 6684 <_sk_callback_avx+0x422>
+  .byte  196,98,125,24,37,29,35,0,0          // vbroadcastss  0x231d(%rip),%ymm12        # 671c <_sk_callback_avx+0x421>
   .byte  196,193,124,84,204                  // vandps        %ymm12,%ymm0,%ymm1
   .byte  197,252,87,193                      // vxorps        %ymm1,%ymm0,%ymm0
   .byte  196,195,125,25,198,1                // vextractf128  $0x1,%ymm0,%xmm14
-  .byte  196,98,121,24,29,113,34,0,0         // vbroadcastss  0x2271(%rip),%xmm11        # 6688 <_sk_callback_avx+0x426>
+  .byte  196,98,121,24,29,9,35,0,0           // vbroadcastss  0x2309(%rip),%xmm11        # 6720 <_sk_callback_avx+0x425>
   .byte  196,193,8,87,219                    // vxorps        %xmm11,%xmm14,%xmm3
-  .byte  196,98,121,24,45,103,34,0,0         // vbroadcastss  0x2267(%rip),%xmm13        # 668c <_sk_callback_avx+0x42a>
+  .byte  196,98,121,24,45,255,34,0,0         // vbroadcastss  0x22ff(%rip),%xmm13        # 6724 <_sk_callback_avx+0x429>
   .byte  197,145,102,219                     // vpcmpgtd      %xmm3,%xmm13,%xmm3
   .byte  196,65,120,87,211                   // vxorps        %xmm11,%xmm0,%xmm10
   .byte  196,65,17,102,210                   // vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -18063,7 +18169,7 @@ _sk_load_f16_avx:
   .byte  196,227,125,24,195,1                // vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   .byte  197,252,86,193                      // vorps         %ymm1,%ymm0,%ymm0
   .byte  196,227,125,25,193,1                // vextractf128  $0x1,%ymm0,%xmm1
-  .byte  196,226,121,24,29,29,34,0,0         // vbroadcastss  0x221d(%rip),%xmm3        # 6690 <_sk_callback_avx+0x42e>
+  .byte  196,226,121,24,29,181,34,0,0        // vbroadcastss  0x22b5(%rip),%xmm3        # 6728 <_sk_callback_avx+0x42d>
   .byte  197,241,254,203                     // vpaddd        %xmm3,%xmm1,%xmm1
   .byte  197,249,254,195                     // vpaddd        %xmm3,%xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
@@ -18242,13 +18348,13 @@ _sk_gather_f16_avx:
   .byte  197,249,105,210                     // vpunpckhwd    %xmm2,%xmm0,%xmm2
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,194,1                // vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
-  .byte  196,98,125,24,37,225,30,0,0         // vbroadcastss  0x1ee1(%rip),%ymm12        # 6694 <_sk_callback_avx+0x432>
+  .byte  196,98,125,24,37,121,31,0,0         // vbroadcastss  0x1f79(%rip),%ymm12        # 672c <_sk_callback_avx+0x431>
   .byte  196,193,124,84,212                  // vandps        %ymm12,%ymm0,%ymm2
   .byte  197,252,87,194                      // vxorps        %ymm2,%ymm0,%ymm0
   .byte  196,195,125,25,198,1                // vextractf128  $0x1,%ymm0,%xmm14
-  .byte  196,98,121,24,29,205,30,0,0         // vbroadcastss  0x1ecd(%rip),%xmm11        # 6698 <_sk_callback_avx+0x436>
+  .byte  196,98,121,24,29,101,31,0,0         // vbroadcastss  0x1f65(%rip),%xmm11        # 6730 <_sk_callback_avx+0x435>
   .byte  196,193,8,87,219                    // vxorps        %xmm11,%xmm14,%xmm3
-  .byte  196,98,121,24,45,195,30,0,0         // vbroadcastss  0x1ec3(%rip),%xmm13        # 669c <_sk_callback_avx+0x43a>
+  .byte  196,98,121,24,45,91,31,0,0          // vbroadcastss  0x1f5b(%rip),%xmm13        # 6734 <_sk_callback_avx+0x439>
   .byte  197,145,102,219                     // vpcmpgtd      %xmm3,%xmm13,%xmm3
   .byte  196,65,120,87,211                   // vxorps        %xmm11,%xmm0,%xmm10
   .byte  196,65,17,102,210                   // vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -18262,7 +18368,7 @@ _sk_gather_f16_avx:
   .byte  196,227,125,24,195,1                // vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   .byte  197,252,86,194                      // vorps         %ymm2,%ymm0,%ymm0
   .byte  196,227,125,25,194,1                // vextractf128  $0x1,%ymm0,%xmm2
-  .byte  196,226,121,24,29,121,30,0,0        // vbroadcastss  0x1e79(%rip),%xmm3        # 66a0 <_sk_callback_avx+0x43e>
+  .byte  196,226,121,24,29,17,31,0,0         // vbroadcastss  0x1f11(%rip),%xmm3        # 6738 <_sk_callback_avx+0x43d>
   .byte  197,233,254,211                     // vpaddd        %xmm3,%xmm2,%xmm2
   .byte  197,249,254,195                     // vpaddd        %xmm3,%xmm0,%xmm0
   .byte  196,227,125,24,194,1                // vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
@@ -18366,12 +18472,12 @@ _sk_store_f16_avx:
   .byte  197,252,17,52,36                    // vmovups       %ymm6,(%rsp)
   .byte  197,252,17,108,36,224               // vmovups       %ymm5,-0x20(%rsp)
   .byte  197,252,17,100,36,192               // vmovups       %ymm4,-0x40(%rsp)
-  .byte  196,98,125,24,13,146,28,0,0         // vbroadcastss  0x1c92(%rip),%ymm9        # 66a4 <_sk_callback_avx+0x442>
+  .byte  196,98,125,24,13,42,29,0,0          // vbroadcastss  0x1d2a(%rip),%ymm9        # 673c <_sk_callback_avx+0x441>
   .byte  196,65,124,84,209                   // vandps        %ymm9,%ymm0,%ymm10
   .byte  197,252,17,68,36,128                // vmovups       %ymm0,-0x80(%rsp)
   .byte  196,65,124,87,218                   // vxorps        %ymm10,%ymm0,%ymm11
   .byte  196,67,125,25,220,1                 // vextractf128  $0x1,%ymm11,%xmm12
-  .byte  196,98,121,24,5,119,28,0,0          // vbroadcastss  0x1c77(%rip),%xmm8        # 66a8 <_sk_callback_avx+0x446>
+  .byte  196,98,121,24,5,15,29,0,0           // vbroadcastss  0x1d0f(%rip),%xmm8        # 6740 <_sk_callback_avx+0x445>
   .byte  196,65,57,102,236                   // vpcmpgtd      %xmm12,%xmm8,%xmm13
   .byte  196,65,57,102,243                   // vpcmpgtd      %xmm11,%xmm8,%xmm14
   .byte  196,67,13,24,237,1                  // vinsertf128   $0x1,%xmm13,%ymm14,%ymm13
@@ -18381,7 +18487,7 @@ _sk_store_f16_avx:
   .byte  196,67,13,24,242,1                  // vinsertf128   $0x1,%xmm10,%ymm14,%ymm14
   .byte  196,193,33,114,211,13               // vpsrld        $0xd,%xmm11,%xmm11
   .byte  196,193,25,114,212,13               // vpsrld        $0xd,%xmm12,%xmm12
-  .byte  196,98,125,24,21,62,28,0,0          // vbroadcastss  0x1c3e(%rip),%ymm10        # 66ac <_sk_callback_avx+0x44a>
+  .byte  196,98,125,24,21,214,28,0,0         // vbroadcastss  0x1cd6(%rip),%ymm10        # 6744 <_sk_callback_avx+0x449>
   .byte  196,65,12,86,242                    // vorps         %ymm10,%ymm14,%ymm14
   .byte  196,67,125,25,247,1                 // vextractf128  $0x1,%ymm14,%xmm15
   .byte  196,65,1,254,228                    // vpaddd        %xmm12,%xmm15,%xmm12
@@ -18526,7 +18632,7 @@ _sk_load_u16_be_avx:
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,29,150,25,0,0         // vbroadcastss  0x1996(%rip),%ymm11        # 66b0 <_sk_callback_avx+0x44e>
+  .byte  196,98,125,24,29,46,26,0,0          // vbroadcastss  0x1a2e(%rip),%ymm11        # 6748 <_sk_callback_avx+0x44d>
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,177,109,202                     // vpunpckhqdq   %xmm2,%xmm9,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -18619,7 +18725,7 @@ _sk_load_rgb_u16_be_avx:
   .byte  196,226,121,51,192                  // vpmovzxwd     %xmm0,%xmm0
   .byte  196,227,125,24,193,1                // vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   .byte  197,252,91,192                      // vcvtdq2ps     %ymm0,%ymm0
-  .byte  196,98,125,24,29,246,23,0,0         // vbroadcastss  0x17f6(%rip),%ymm11        # 66b4 <_sk_callback_avx+0x452>
+  .byte  196,98,125,24,29,142,24,0,0         // vbroadcastss  0x188e(%rip),%ymm11        # 674c <_sk_callback_avx+0x451>
   .byte  196,193,124,89,195                  // vmulps        %ymm11,%ymm0,%ymm0
   .byte  197,185,109,202                     // vpunpckhqdq   %xmm2,%xmm8,%xmm1
   .byte  197,233,113,241,8                   // vpsllw        $0x8,%xmm1,%xmm2
@@ -18640,7 +18746,7 @@ _sk_load_rgb_u16_be_avx:
   .byte  197,252,91,210                      // vcvtdq2ps     %ymm2,%ymm2
   .byte  196,193,108,89,211                  // vmulps        %ymm11,%ymm2,%ymm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,29,147,23,0,0        // vbroadcastss  0x1793(%rip),%ymm3        # 66b8 <_sk_callback_avx+0x456>
+  .byte  196,226,125,24,29,43,24,0,0         // vbroadcastss  0x182b(%rip),%ymm3        # 6750 <_sk_callback_avx+0x455>
   .byte  255,224                             // jmpq          *%rax
   .byte  196,193,121,110,4,64                // vmovd         (%r8,%rax,2),%xmm0
   .byte  196,193,121,196,68,64,4,2           // vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
@@ -18683,7 +18789,7 @@ _sk_store_u16_be_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  72,141,4,189,0,0,0,0                // lea           0x0(,%rdi,4),%rax
-  .byte  196,98,125,24,5,208,22,0,0          // vbroadcastss  0x16d0(%rip),%ymm8        # 66bc <_sk_callback_avx+0x45a>
+  .byte  196,98,125,24,5,104,23,0,0          // vbroadcastss  0x1768(%rip),%ymm8        # 6754 <_sk_callback_avx+0x459>
   .byte  196,65,124,89,200                   // vmulps        %ymm8,%ymm0,%ymm9
   .byte  196,65,125,91,201                   // vcvtps2dq     %ymm9,%ymm9
   .byte  196,67,125,25,202,1                 // vextractf128  $0x1,%ymm9,%xmm10
@@ -18949,12 +19055,12 @@ HIDDEN _sk_luminance_to_alpha_avx
 .globl _sk_luminance_to_alpha_avx
 FUNCTION(_sk_luminance_to_alpha_avx)
 _sk_luminance_to_alpha_avx:
-  .byte  196,226,125,24,29,247,18,0,0        // vbroadcastss  0x12f7(%rip),%ymm3        # 66c0 <_sk_callback_avx+0x45e>
+  .byte  196,226,125,24,29,143,19,0,0        // vbroadcastss  0x138f(%rip),%ymm3        # 6758 <_sk_callback_avx+0x45d>
   .byte  197,252,89,195                      // vmulps        %ymm3,%ymm0,%ymm0
-  .byte  196,226,125,24,29,238,18,0,0        // vbroadcastss  0x12ee(%rip),%ymm3        # 66c4 <_sk_callback_avx+0x462>
+  .byte  196,226,125,24,29,134,19,0,0        // vbroadcastss  0x1386(%rip),%ymm3        # 675c <_sk_callback_avx+0x461>
   .byte  197,244,89,203                      // vmulps        %ymm3,%ymm1,%ymm1
   .byte  197,252,88,193                      // vaddps        %ymm1,%ymm0,%ymm0
-  .byte  196,226,125,24,13,225,18,0,0        // vbroadcastss  0x12e1(%rip),%ymm1        # 66c8 <_sk_callback_avx+0x466>
+  .byte  196,226,125,24,13,121,19,0,0        // vbroadcastss  0x1379(%rip),%ymm1        # 6760 <_sk_callback_avx+0x465>
   .byte  197,236,89,201                      // vmulps        %ymm1,%ymm2,%ymm1
   .byte  197,252,88,217                      // vaddps        %ymm1,%ymm0,%ymm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19089,6 +19195,44 @@ _sk_matrix_4x5_avx:
   .byte  197,124,41,210                      // vmovaps       %ymm10,%ymm2
   .byte  255,224                             // jmpq          *%rax
 
+HIDDEN _sk_matrix_4x3_avx
+.globl _sk_matrix_4x3_avx
+FUNCTION(_sk_matrix_4x3_avx)
+_sk_matrix_4x3_avx:
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  196,226,125,24,16                   // vbroadcastss  (%rax),%ymm2
+  .byte  196,226,125,24,88,16                // vbroadcastss  0x10(%rax),%ymm3
+  .byte  196,98,125,24,64,32                 // vbroadcastss  0x20(%rax),%ymm8
+  .byte  197,228,89,217                      // vmulps        %ymm1,%ymm3,%ymm3
+  .byte  196,193,100,88,216                  // vaddps        %ymm8,%ymm3,%ymm3
+  .byte  197,236,89,208                      // vmulps        %ymm0,%ymm2,%ymm2
+  .byte  197,108,88,195                      // vaddps        %ymm3,%ymm2,%ymm8
+  .byte  196,226,125,24,80,4                 // vbroadcastss  0x4(%rax),%ymm2
+  .byte  196,226,125,24,88,20                // vbroadcastss  0x14(%rax),%ymm3
+  .byte  196,98,125,24,72,36                 // vbroadcastss  0x24(%rax),%ymm9
+  .byte  197,228,89,217                      // vmulps        %ymm1,%ymm3,%ymm3
+  .byte  196,193,100,88,217                  // vaddps        %ymm9,%ymm3,%ymm3
+  .byte  197,236,89,208                      // vmulps        %ymm0,%ymm2,%ymm2
+  .byte  197,108,88,203                      // vaddps        %ymm3,%ymm2,%ymm9
+  .byte  196,226,125,24,80,8                 // vbroadcastss  0x8(%rax),%ymm2
+  .byte  196,226,125,24,88,24                // vbroadcastss  0x18(%rax),%ymm3
+  .byte  196,98,125,24,80,40                 // vbroadcastss  0x28(%rax),%ymm10
+  .byte  197,228,89,217                      // vmulps        %ymm1,%ymm3,%ymm3
+  .byte  196,193,100,88,218                  // vaddps        %ymm10,%ymm3,%ymm3
+  .byte  197,236,89,208                      // vmulps        %ymm0,%ymm2,%ymm2
+  .byte  197,236,88,211                      // vaddps        %ymm3,%ymm2,%ymm2
+  .byte  196,226,125,24,88,12                // vbroadcastss  0xc(%rax),%ymm3
+  .byte  196,98,125,24,80,28                 // vbroadcastss  0x1c(%rax),%ymm10
+  .byte  196,98,125,24,88,44                 // vbroadcastss  0x2c(%rax),%ymm11
+  .byte  197,172,89,201                      // vmulps        %ymm1,%ymm10,%ymm1
+  .byte  196,193,116,88,203                  // vaddps        %ymm11,%ymm1,%ymm1
+  .byte  197,228,89,192                      // vmulps        %ymm0,%ymm3,%ymm0
+  .byte  197,252,88,217                      // vaddps        %ymm1,%ymm0,%ymm3
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  197,124,41,192                      // vmovaps       %ymm8,%ymm0
+  .byte  197,124,41,201                      // vmovaps       %ymm9,%ymm1
+  .byte  255,224                             // jmpq          *%rax
+
 HIDDEN _sk_matrix_perspective_avx
 .globl _sk_matrix_perspective_avx
 FUNCTION(_sk_matrix_perspective_avx)
@@ -19135,9 +19279,9 @@ _sk_evenly_spaced_gradient_avx:
   .byte  72,139,24                           // mov           (%rax),%rbx
   .byte  72,139,104,8                        // mov           0x8(%rax),%rbp
   .byte  72,255,203                          // dec           %rbx
-  .byte  120,7                               // js            56a4 <_sk_evenly_spaced_gradient_avx+0x1f>
+  .byte  120,7                               // js            573d <_sk_evenly_spaced_gradient_avx+0x1f>
   .byte  196,225,242,42,203                  // vcvtsi2ss     %rbx,%xmm1,%xmm1
-  .byte  235,21                              // jmp           56b9 <_sk_evenly_spaced_gradient_avx+0x34>
+  .byte  235,21                              // jmp           5752 <_sk_evenly_spaced_gradient_avx+0x34>
   .byte  73,137,216                          // mov           %rbx,%r8
   .byte  73,209,232                          // shr           %r8
   .byte  131,227,1                           // and           $0x1,%ebx
@@ -19304,12 +19448,12 @@ _sk_gradient_avx:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  73,131,248,2                        // cmp           $0x2,%r8
-  .byte  114,80                              // jb            5a47 <_sk_gradient_avx+0x69>
+  .byte  114,80                              // jb            5ae0 <_sk_gradient_avx+0x69>
   .byte  72,139,88,72                        // mov           0x48(%rax),%rbx
   .byte  73,255,200                          // dec           %r8
   .byte  72,131,195,4                        // add           $0x4,%rbx
   .byte  196,65,52,87,201                    // vxorps        %ymm9,%ymm9,%ymm9
-  .byte  196,98,125,24,21,188,12,0,0         // vbroadcastss  0xcbc(%rip),%ymm10        # 66cc <_sk_callback_avx+0x46a>
+  .byte  196,98,125,24,21,187,12,0,0         // vbroadcastss  0xcbb(%rip),%ymm10        # 6764 <_sk_callback_avx+0x469>
   .byte  197,244,87,201                      // vxorps        %ymm1,%ymm1,%ymm1
   .byte  196,98,125,24,3                     // vbroadcastss  (%rbx),%ymm8
   .byte  197,60,194,192,2                    // vcmpleps      %ymm0,%ymm8,%ymm8
@@ -19321,7 +19465,7 @@ _sk_gradient_avx:
   .byte  196,227,117,24,202,1                // vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   .byte  72,131,195,4                        // add           $0x4,%rbx
   .byte  73,255,200                          // dec           %r8
-  .byte  117,205                             // jne           5a14 <_sk_gradient_avx+0x36>
+  .byte  117,205                             // jne           5aad <_sk_gradient_avx+0x36>
   .byte  196,195,249,22,200,1                // vpextrq       $0x1,%xmm1,%r8
   .byte  69,137,193                          // mov           %r8d,%r9d
   .byte  73,193,232,32                       // shr           $0x20,%r8
@@ -19503,27 +19647,27 @@ _sk_xy_to_unit_angle_avx:
   .byte  196,65,52,95,226                    // vmaxps        %ymm10,%ymm9,%ymm12
   .byte  196,65,36,94,220                    // vdivps        %ymm12,%ymm11,%ymm11
   .byte  196,65,36,89,227                    // vmulps        %ymm11,%ymm11,%ymm12
-  .byte  196,98,125,24,45,224,8,0,0          // vbroadcastss  0x8e0(%rip),%ymm13        # 66d0 <_sk_callback_avx+0x46e>
+  .byte  196,98,125,24,45,223,8,0,0          // vbroadcastss  0x8df(%rip),%ymm13        # 6768 <_sk_callback_avx+0x46d>
   .byte  196,65,28,89,237                    // vmulps        %ymm13,%ymm12,%ymm13
-  .byte  196,98,125,24,53,214,8,0,0          // vbroadcastss  0x8d6(%rip),%ymm14        # 66d4 <_sk_callback_avx+0x472>
+  .byte  196,98,125,24,53,213,8,0,0          // vbroadcastss  0x8d5(%rip),%ymm14        # 676c <_sk_callback_avx+0x471>
   .byte  196,65,20,88,238                    // vaddps        %ymm14,%ymm13,%ymm13
   .byte  196,65,28,89,237                    // vmulps        %ymm13,%ymm12,%ymm13
-  .byte  196,98,125,24,53,199,8,0,0          // vbroadcastss  0x8c7(%rip),%ymm14        # 66d8 <_sk_callback_avx+0x476>
+  .byte  196,98,125,24,53,198,8,0,0          // vbroadcastss  0x8c6(%rip),%ymm14        # 6770 <_sk_callback_avx+0x475>
   .byte  196,65,20,88,238                    // vaddps        %ymm14,%ymm13,%ymm13
   .byte  196,65,28,89,229                    // vmulps        %ymm13,%ymm12,%ymm12
-  .byte  196,98,125,24,45,184,8,0,0          // vbroadcastss  0x8b8(%rip),%ymm13        # 66dc <_sk_callback_avx+0x47a>
+  .byte  196,98,125,24,45,183,8,0,0          // vbroadcastss  0x8b7(%rip),%ymm13        # 6774 <_sk_callback_avx+0x479>
   .byte  196,65,28,88,229                    // vaddps        %ymm13,%ymm12,%ymm12
   .byte  196,65,36,89,220                    // vmulps        %ymm12,%ymm11,%ymm11
   .byte  196,65,52,194,202,1                 // vcmpltps      %ymm10,%ymm9,%ymm9
-  .byte  196,98,125,24,21,163,8,0,0          // vbroadcastss  0x8a3(%rip),%ymm10        # 66e0 <_sk_callback_avx+0x47e>
+  .byte  196,98,125,24,21,162,8,0,0          // vbroadcastss  0x8a2(%rip),%ymm10        # 6778 <_sk_callback_avx+0x47d>
   .byte  196,65,44,92,211                    // vsubps        %ymm11,%ymm10,%ymm10
   .byte  196,67,37,74,202,144                // vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   .byte  196,193,124,194,192,1               // vcmpltps      %ymm8,%ymm0,%ymm0
-  .byte  196,98,125,24,21,141,8,0,0          // vbroadcastss  0x88d(%rip),%ymm10        # 66e4 <_sk_callback_avx+0x482>
+  .byte  196,98,125,24,21,140,8,0,0          // vbroadcastss  0x88c(%rip),%ymm10        # 677c <_sk_callback_avx+0x481>
   .byte  196,65,44,92,209                    // vsubps        %ymm9,%ymm10,%ymm10
   .byte  196,195,53,74,194,0                 // vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   .byte  196,65,116,194,200,1                // vcmpltps      %ymm8,%ymm1,%ymm9
-  .byte  196,98,125,24,21,119,8,0,0          // vbroadcastss  0x877(%rip),%ymm10        # 66e8 <_sk_callback_avx+0x486>
+  .byte  196,98,125,24,21,118,8,0,0          // vbroadcastss  0x876(%rip),%ymm10        # 6780 <_sk_callback_avx+0x485>
   .byte  197,44,92,208                       // vsubps        %ymm0,%ymm10,%ymm10
   .byte  196,195,125,74,194,144              // vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   .byte  196,65,124,194,200,3                // vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -19547,7 +19691,7 @@ HIDDEN _sk_save_xy_avx
 FUNCTION(_sk_save_xy_avx)
 _sk_save_xy_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,65,8,0,0            // vbroadcastss  0x841(%rip),%ymm8        # 66ec <_sk_callback_avx+0x48a>
+  .byte  196,98,125,24,5,64,8,0,0            // vbroadcastss  0x840(%rip),%ymm8        # 6784 <_sk_callback_avx+0x489>
   .byte  196,65,124,88,200                   // vaddps        %ymm8,%ymm0,%ymm9
   .byte  196,67,125,8,209,1                  // vroundps      $0x1,%ymm9,%ymm10
   .byte  196,65,52,92,202                    // vsubps        %ymm10,%ymm9,%ymm9
@@ -19584,9 +19728,9 @@ HIDDEN _sk_bilinear_nx_avx
 FUNCTION(_sk_bilinear_nx_avx)
 _sk_bilinear_nx_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,205,7,0,0          // vbroadcastss  0x7cd(%rip),%ymm0        # 66f0 <_sk_callback_avx+0x48e>
+  .byte  196,226,125,24,5,204,7,0,0          // vbroadcastss  0x7cc(%rip),%ymm0        # 6788 <_sk_callback_avx+0x48d>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,196,7,0,0           // vbroadcastss  0x7c4(%rip),%ymm8        # 66f4 <_sk_callback_avx+0x492>
+  .byte  196,98,125,24,5,195,7,0,0           // vbroadcastss  0x7c3(%rip),%ymm8        # 678c <_sk_callback_avx+0x491>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19597,7 +19741,7 @@ HIDDEN _sk_bilinear_px_avx
 FUNCTION(_sk_bilinear_px_avx)
 _sk_bilinear_px_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,172,7,0,0          // vbroadcastss  0x7ac(%rip),%ymm0        # 66f8 <_sk_callback_avx+0x496>
+  .byte  196,226,125,24,5,171,7,0,0          // vbroadcastss  0x7ab(%rip),%ymm0        # 6790 <_sk_callback_avx+0x495>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -19609,9 +19753,9 @@ HIDDEN _sk_bilinear_ny_avx
 FUNCTION(_sk_bilinear_ny_avx)
 _sk_bilinear_ny_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,144,7,0,0         // vbroadcastss  0x790(%rip),%ymm1        # 66fc <_sk_callback_avx+0x49a>
+  .byte  196,226,125,24,13,143,7,0,0         // vbroadcastss  0x78f(%rip),%ymm1        # 6794 <_sk_callback_avx+0x499>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,134,7,0,0           // vbroadcastss  0x786(%rip),%ymm8        # 6700 <_sk_callback_avx+0x49e>
+  .byte  196,98,125,24,5,133,7,0,0           // vbroadcastss  0x785(%rip),%ymm8        # 6798 <_sk_callback_avx+0x49d>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19622,7 +19766,7 @@ HIDDEN _sk_bilinear_py_avx
 FUNCTION(_sk_bilinear_py_avx)
 _sk_bilinear_py_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,110,7,0,0         // vbroadcastss  0x76e(%rip),%ymm1        # 6704 <_sk_callback_avx+0x4a2>
+  .byte  196,226,125,24,13,109,7,0,0         // vbroadcastss  0x76d(%rip),%ymm1        # 679c <_sk_callback_avx+0x4a1>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -19634,14 +19778,14 @@ HIDDEN _sk_bicubic_n3x_avx
 FUNCTION(_sk_bicubic_n3x_avx)
 _sk_bicubic_n3x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,81,7,0,0           // vbroadcastss  0x751(%rip),%ymm0        # 6708 <_sk_callback_avx+0x4a6>
+  .byte  196,226,125,24,5,80,7,0,0           // vbroadcastss  0x750(%rip),%ymm0        # 67a0 <_sk_callback_avx+0x4a5>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,72,7,0,0            // vbroadcastss  0x748(%rip),%ymm8        # 670c <_sk_callback_avx+0x4aa>
+  .byte  196,98,125,24,5,71,7,0,0            // vbroadcastss  0x747(%rip),%ymm8        # 67a4 <_sk_callback_avx+0x4a9>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,57,7,0,0           // vbroadcastss  0x739(%rip),%ymm10        # 6710 <_sk_callback_avx+0x4ae>
+  .byte  196,98,125,24,21,56,7,0,0           // vbroadcastss  0x738(%rip),%ymm10        # 67a8 <_sk_callback_avx+0x4ad>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,47,7,0,0           // vbroadcastss  0x72f(%rip),%ymm10        # 6714 <_sk_callback_avx+0x4b2>
+  .byte  196,98,125,24,21,46,7,0,0           // vbroadcastss  0x72e(%rip),%ymm10        # 67ac <_sk_callback_avx+0x4b1>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -19653,19 +19797,19 @@ HIDDEN _sk_bicubic_n1x_avx
 FUNCTION(_sk_bicubic_n1x_avx)
 _sk_bicubic_n1x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,18,7,0,0           // vbroadcastss  0x712(%rip),%ymm0        # 6718 <_sk_callback_avx+0x4b6>
+  .byte  196,226,125,24,5,17,7,0,0           // vbroadcastss  0x711(%rip),%ymm0        # 67b0 <_sk_callback_avx+0x4b5>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
-  .byte  196,98,125,24,5,9,7,0,0             // vbroadcastss  0x709(%rip),%ymm8        # 671c <_sk_callback_avx+0x4ba>
+  .byte  196,98,125,24,5,8,7,0,0             // vbroadcastss  0x708(%rip),%ymm8        # 67b4 <_sk_callback_avx+0x4b9>
   .byte  197,60,92,64,64                     // vsubps        0x40(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,255,6,0,0          // vbroadcastss  0x6ff(%rip),%ymm9        # 6720 <_sk_callback_avx+0x4be>
+  .byte  196,98,125,24,13,254,6,0,0          // vbroadcastss  0x6fe(%rip),%ymm9        # 67b8 <_sk_callback_avx+0x4bd>
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,245,6,0,0          // vbroadcastss  0x6f5(%rip),%ymm10        # 6724 <_sk_callback_avx+0x4c2>
+  .byte  196,98,125,24,21,244,6,0,0          // vbroadcastss  0x6f4(%rip),%ymm10        # 67bc <_sk_callback_avx+0x4c1>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,230,6,0,0          // vbroadcastss  0x6e6(%rip),%ymm10        # 6728 <_sk_callback_avx+0x4c6>
+  .byte  196,98,125,24,21,229,6,0,0          // vbroadcastss  0x6e5(%rip),%ymm10        # 67c0 <_sk_callback_avx+0x4c5>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,215,6,0,0          // vbroadcastss  0x6d7(%rip),%ymm9        # 672c <_sk_callback_avx+0x4ca>
+  .byte  196,98,125,24,13,214,6,0,0          // vbroadcastss  0x6d6(%rip),%ymm9        # 67c4 <_sk_callback_avx+0x4c9>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19676,17 +19820,17 @@ HIDDEN _sk_bicubic_p1x_avx
 FUNCTION(_sk_bicubic_p1x_avx)
 _sk_bicubic_p1x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,191,6,0,0           // vbroadcastss  0x6bf(%rip),%ymm8        # 6730 <_sk_callback_avx+0x4ce>
+  .byte  196,98,125,24,5,190,6,0,0           // vbroadcastss  0x6be(%rip),%ymm8        # 67c8 <_sk_callback_avx+0x4cd>
   .byte  197,188,88,0                        // vaddps        (%rax),%ymm8,%ymm0
   .byte  197,124,16,72,64                    // vmovups       0x40(%rax),%ymm9
-  .byte  196,98,125,24,21,177,6,0,0          // vbroadcastss  0x6b1(%rip),%ymm10        # 6734 <_sk_callback_avx+0x4d2>
+  .byte  196,98,125,24,21,176,6,0,0          // vbroadcastss  0x6b0(%rip),%ymm10        # 67cc <_sk_callback_avx+0x4d1>
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
-  .byte  196,98,125,24,29,167,6,0,0          // vbroadcastss  0x6a7(%rip),%ymm11        # 6738 <_sk_callback_avx+0x4d6>
+  .byte  196,98,125,24,29,166,6,0,0          // vbroadcastss  0x6a6(%rip),%ymm11        # 67d0 <_sk_callback_avx+0x4d5>
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
   .byte  196,65,44,88,192                    // vaddps        %ymm8,%ymm10,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
-  .byte  196,98,125,24,13,142,6,0,0          // vbroadcastss  0x68e(%rip),%ymm9        # 673c <_sk_callback_avx+0x4da>
+  .byte  196,98,125,24,13,141,6,0,0          // vbroadcastss  0x68d(%rip),%ymm9        # 67d4 <_sk_callback_avx+0x4d9>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19697,13 +19841,13 @@ HIDDEN _sk_bicubic_p3x_avx
 FUNCTION(_sk_bicubic_p3x_avx)
 _sk_bicubic_p3x_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,5,118,6,0,0          // vbroadcastss  0x676(%rip),%ymm0        # 6740 <_sk_callback_avx+0x4de>
+  .byte  196,226,125,24,5,117,6,0,0          // vbroadcastss  0x675(%rip),%ymm0        # 67d8 <_sk_callback_avx+0x4dd>
   .byte  197,252,88,0                        // vaddps        (%rax),%ymm0,%ymm0
   .byte  197,124,16,64,64                    // vmovups       0x40(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,99,6,0,0           // vbroadcastss  0x663(%rip),%ymm10        # 6744 <_sk_callback_avx+0x4e2>
+  .byte  196,98,125,24,21,98,6,0,0           // vbroadcastss  0x662(%rip),%ymm10        # 67dc <_sk_callback_avx+0x4e1>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,89,6,0,0           // vbroadcastss  0x659(%rip),%ymm10        # 6748 <_sk_callback_avx+0x4e6>
+  .byte  196,98,125,24,21,88,6,0,0           // vbroadcastss  0x658(%rip),%ymm10        # 67e0 <_sk_callback_avx+0x4e5>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,128,0,0,0            // vmovups       %ymm8,0x80(%rax)
@@ -19715,14 +19859,14 @@ HIDDEN _sk_bicubic_n3y_avx
 FUNCTION(_sk_bicubic_n3y_avx)
 _sk_bicubic_n3y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,60,6,0,0          // vbroadcastss  0x63c(%rip),%ymm1        # 674c <_sk_callback_avx+0x4ea>
+  .byte  196,226,125,24,13,59,6,0,0          // vbroadcastss  0x63b(%rip),%ymm1        # 67e4 <_sk_callback_avx+0x4e9>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,50,6,0,0            // vbroadcastss  0x632(%rip),%ymm8        # 6750 <_sk_callback_avx+0x4ee>
+  .byte  196,98,125,24,5,49,6,0,0            // vbroadcastss  0x631(%rip),%ymm8        # 67e8 <_sk_callback_avx+0x4ed>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,35,6,0,0           // vbroadcastss  0x623(%rip),%ymm10        # 6754 <_sk_callback_avx+0x4f2>
+  .byte  196,98,125,24,21,34,6,0,0           // vbroadcastss  0x622(%rip),%ymm10        # 67ec <_sk_callback_avx+0x4f1>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,25,6,0,0           // vbroadcastss  0x619(%rip),%ymm10        # 6758 <_sk_callback_avx+0x4f6>
+  .byte  196,98,125,24,21,24,6,0,0           // vbroadcastss  0x618(%rip),%ymm10        # 67f0 <_sk_callback_avx+0x4f5>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -19734,19 +19878,19 @@ HIDDEN _sk_bicubic_n1y_avx
 FUNCTION(_sk_bicubic_n1y_avx)
 _sk_bicubic_n1y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,252,5,0,0         // vbroadcastss  0x5fc(%rip),%ymm1        # 675c <_sk_callback_avx+0x4fa>
+  .byte  196,226,125,24,13,251,5,0,0         // vbroadcastss  0x5fb(%rip),%ymm1        # 67f4 <_sk_callback_avx+0x4f9>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
-  .byte  196,98,125,24,5,242,5,0,0           // vbroadcastss  0x5f2(%rip),%ymm8        # 6760 <_sk_callback_avx+0x4fe>
+  .byte  196,98,125,24,5,241,5,0,0           // vbroadcastss  0x5f1(%rip),%ymm8        # 67f8 <_sk_callback_avx+0x4fd>
   .byte  197,60,92,64,96                     // vsubps        0x60(%rax),%ymm8,%ymm8
-  .byte  196,98,125,24,13,232,5,0,0          // vbroadcastss  0x5e8(%rip),%ymm9        # 6764 <_sk_callback_avx+0x502>
+  .byte  196,98,125,24,13,231,5,0,0          // vbroadcastss  0x5e7(%rip),%ymm9        # 67fc <_sk_callback_avx+0x501>
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,222,5,0,0          // vbroadcastss  0x5de(%rip),%ymm10        # 6768 <_sk_callback_avx+0x506>
+  .byte  196,98,125,24,21,221,5,0,0          // vbroadcastss  0x5dd(%rip),%ymm10        # 6800 <_sk_callback_avx+0x505>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,201                    // vmulps        %ymm9,%ymm8,%ymm9
-  .byte  196,98,125,24,21,207,5,0,0          // vbroadcastss  0x5cf(%rip),%ymm10        # 676c <_sk_callback_avx+0x50a>
+  .byte  196,98,125,24,21,206,5,0,0          // vbroadcastss  0x5ce(%rip),%ymm10        # 6804 <_sk_callback_avx+0x509>
   .byte  196,65,52,88,202                    // vaddps        %ymm10,%ymm9,%ymm9
   .byte  196,65,60,89,193                    // vmulps        %ymm9,%ymm8,%ymm8
-  .byte  196,98,125,24,13,192,5,0,0          // vbroadcastss  0x5c0(%rip),%ymm9        # 6770 <_sk_callback_avx+0x50e>
+  .byte  196,98,125,24,13,191,5,0,0          // vbroadcastss  0x5bf(%rip),%ymm9        # 6808 <_sk_callback_avx+0x50d>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19757,17 +19901,17 @@ HIDDEN _sk_bicubic_p1y_avx
 FUNCTION(_sk_bicubic_p1y_avx)
 _sk_bicubic_p1y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,98,125,24,5,168,5,0,0           // vbroadcastss  0x5a8(%rip),%ymm8        # 6774 <_sk_callback_avx+0x512>
+  .byte  196,98,125,24,5,167,5,0,0           // vbroadcastss  0x5a7(%rip),%ymm8        # 680c <_sk_callback_avx+0x511>
   .byte  197,188,88,72,32                    // vaddps        0x20(%rax),%ymm8,%ymm1
   .byte  197,124,16,72,96                    // vmovups       0x60(%rax),%ymm9
-  .byte  196,98,125,24,21,153,5,0,0          // vbroadcastss  0x599(%rip),%ymm10        # 6778 <_sk_callback_avx+0x516>
+  .byte  196,98,125,24,21,152,5,0,0          // vbroadcastss  0x598(%rip),%ymm10        # 6810 <_sk_callback_avx+0x515>
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
-  .byte  196,98,125,24,29,143,5,0,0          // vbroadcastss  0x58f(%rip),%ymm11        # 677c <_sk_callback_avx+0x51a>
+  .byte  196,98,125,24,29,142,5,0,0          // vbroadcastss  0x58e(%rip),%ymm11        # 6814 <_sk_callback_avx+0x519>
   .byte  196,65,44,88,211                    // vaddps        %ymm11,%ymm10,%ymm10
   .byte  196,65,52,89,210                    // vmulps        %ymm10,%ymm9,%ymm10
   .byte  196,65,44,88,192                    // vaddps        %ymm8,%ymm10,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
-  .byte  196,98,125,24,13,118,5,0,0          // vbroadcastss  0x576(%rip),%ymm9        # 6780 <_sk_callback_avx+0x51e>
+  .byte  196,98,125,24,13,117,5,0,0          // vbroadcastss  0x575(%rip),%ymm9        # 6818 <_sk_callback_avx+0x51d>
   .byte  196,65,60,88,193                    // vaddps        %ymm9,%ymm8,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -19778,13 +19922,13 @@ HIDDEN _sk_bicubic_p3y_avx
 FUNCTION(_sk_bicubic_p3y_avx)
 _sk_bicubic_p3y_avx:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  196,226,125,24,13,94,5,0,0          // vbroadcastss  0x55e(%rip),%ymm1        # 6784 <_sk_callback_avx+0x522>
+  .byte  196,226,125,24,13,93,5,0,0          // vbroadcastss  0x55d(%rip),%ymm1        # 681c <_sk_callback_avx+0x521>
   .byte  197,244,88,72,32                    // vaddps        0x20(%rax),%ymm1,%ymm1
   .byte  197,124,16,64,96                    // vmovups       0x60(%rax),%ymm8
   .byte  196,65,60,89,200                    // vmulps        %ymm8,%ymm8,%ymm9
-  .byte  196,98,125,24,21,74,5,0,0           // vbroadcastss  0x54a(%rip),%ymm10        # 6788 <_sk_callback_avx+0x526>
+  .byte  196,98,125,24,21,73,5,0,0           // vbroadcastss  0x549(%rip),%ymm10        # 6820 <_sk_callback_avx+0x525>
   .byte  196,65,60,89,194                    // vmulps        %ymm10,%ymm8,%ymm8
-  .byte  196,98,125,24,21,64,5,0,0           // vbroadcastss  0x540(%rip),%ymm10        # 678c <_sk_callback_avx+0x52a>
+  .byte  196,98,125,24,21,63,5,0,0           // vbroadcastss  0x53f(%rip),%ymm10        # 6824 <_sk_callback_avx+0x529>
   .byte  196,65,60,88,194                    // vaddps        %ymm10,%ymm8,%ymm8
   .byte  196,65,52,89,192                    // vmulps        %ymm8,%ymm9,%ymm8
   .byte  197,124,17,128,160,0,0,0            // vmovups       %ymm8,0xa0(%rax)
@@ -19908,25 +20052,25 @@ BALIGN4
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 6439 <.literal4+0xb1>
+  .byte  71,225,61                           // rex.RXB       loope 64d1 <.literal4+0xb1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 6449 <.literal4+0xc1>
+  .byte  71,225,61                           // rex.RXB       loope 64e1 <.literal4+0xc1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 6459 <.literal4+0xd1>
+  .byte  71,225,61                           // rex.RXB       loope 64f1 <.literal4+0xd1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,154                          // cmpb          $0x9a,(%rdi)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
   .byte  62,61,10,23,63,174                  // ds            cmp $0xae3f170a,%eax
-  .byte  71,225,61                           // rex.RXB       loope 6469 <.literal4+0xe1>
+  .byte  71,225,61                           // rex.RXB       loope 6501 <.literal4+0xe1>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -19974,7 +20118,7 @@ BALIGN4
   .byte  190,129,128,128,59                  // mov           $0x3b808081,%esi
   .byte  129,128,128,59,0,248,0,0,8,33       // addl          $0x21080000,-0x7ffc480(%rax)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        64b5 <.literal4+0x12d>
+  .byte  224,7                               // loopne        654d <.literal4+0x12d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -19990,10 +20134,10 @@ BALIGN4
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
   .byte  0,52,255                            // add           %dh,(%rdi,%rdi,8)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            64dc <.literal4+0x154>
+  .byte  127,0                               // jg            6574 <.literal4+0x154>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            6555 <.literal4+0x1cd>
+  .byte  119,115                             // ja            65ed <.literal4+0x1cd>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -20007,10 +20151,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            6510 <.literal4+0x188>
+  .byte  127,0                               // jg            65a8 <.literal4+0x188>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            6589 <.literal4+0x201>
+  .byte  119,115                             // ja            6621 <.literal4+0x201>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -20024,10 +20168,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            6544 <.literal4+0x1bc>
+  .byte  127,0                               // jg            65dc <.literal4+0x1bc>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            65bd <.literal4+0x235>
+  .byte  119,115                             // ja            6655 <.literal4+0x235>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -20041,10 +20185,10 @@ BALIGN4
   .byte  0,128,63,0,0,0                      // add           %al,0x3f(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            6578 <.literal4+0x1f0>
+  .byte  127,0                               // jg            6610 <.literal4+0x1f0>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            65f1 <.literal4+0x269>
+  .byte  119,115                             // ja            6689 <.literal4+0x269>
   .byte  248                                 // clc
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,249,68,180                   // mov           $0xb444f93f,%edi
@@ -20057,7 +20201,7 @@ BALIGN4
   .byte  0,75,0                              // add           %cl,0x0(%rbx)
   .byte  0,128,63,0,0,200                    // add           %al,-0x37ffffc1(%rax)
   .byte  66,0,0                              // rex.X         add %al,(%rax)
-  .byte  127,67                              // jg            65ef <.literal4+0x267>
+  .byte  127,67                              // jg            6687 <.literal4+0x267>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,195                               // add           %al,%bl
   .byte  0,0                                 // add           %al,(%rax)
@@ -20069,10 +20213,10 @@ BALIGN4
   .byte  190,80,128,3,62                     // mov           $0x3e038050,%esi
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           660f <.literal4+0x287>
+  .byte  118,63                              // jbe           66a7 <.literal4+0x287>
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            6623 <.literal4+0x29b>
+  .byte  127,67                              // jg            66bb <.literal4+0x29b>
   .byte  129,128,128,59,0,0,128,63,129,128   // addl          $0x80813f80,0x3b80(%rax)
   .byte  128,59,0                            // cmpb          $0x0,(%rbx)
   .byte  0,128,63,129,128,128                // add           %al,-0x7f7f7ec1(%rax)
@@ -20081,7 +20225,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        6605 <.literal4+0x27d>
+  .byte  224,7                               // loopne        669d <.literal4+0x27d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -20093,7 +20237,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        6621 <.literal4+0x299>
+  .byte  224,7                               // loopne        66b9 <.literal4+0x299>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -20104,7 +20248,7 @@ BALIGN4
   .byte  0,0                                 // add           %al,(%rax)
   .byte  248                                 // clc
   .byte  65,0,0                              // add           %al,(%r8)
-  .byte  124,66                              // jl            6676 <.literal4+0x2ee>
+  .byte  124,66                              // jl            670e <.literal4+0x2ee>
   .byte  0,240                               // add           %dh,%al
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,55,0,15                 // mov           %ecx,0xf003788(%rax)
@@ -20122,9 +20266,9 @@ BALIGN4
   .byte  137,136,136,59,15,0                 // mov           %ecx,0xf3b88(%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  137,136,136,61,0,0                  // mov           %ecx,0x3d88(%rax)
-  .byte  112,65                              // jo            66b9 <.literal4+0x331>
+  .byte  112,65                              // jo            6751 <.literal4+0x331>
   .byte  129,128,128,59,129,128,128,59,0,0   // addl          $0x3b80,-0x7f7ec480(%rax)
-  .byte  127,67                              // jg            66c7 <.literal4+0x33f>
+  .byte  127,67                              // jg            675f <.literal4+0x33f>
   .byte  0,128,0,0,0,0                       // add           %al,0x0(%rax)
   .byte  0,128,0,4,0,128                     // add           %al,-0x7ffffc00(%rax)
   .byte  0,0                                 // add           %al,(%rax)
@@ -20140,7 +20284,7 @@ BALIGN4
   .byte  0,128,55,0,0,128                    // add           %al,-0x7fffffc9(%rax)
   .byte  63                                  // (bad)
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            6707 <.literal4+0x37f>
+  .byte  127,71                              // jg            679f <.literal4+0x37f>
   .byte  208                                 // (bad)
   .byte  179,89                              // mov           $0x59,%bl
   .byte  62,89                               // ds            pop %rcx
@@ -20370,7 +20514,7 @@ _sk_seed_shader_sse41:
   .byte  102,15,110,199                      // movd          %edi,%xmm0
   .byte  102,15,112,192,0                    // pshufd        $0x0,%xmm0,%xmm0
   .byte  15,91,200                           // cvtdq2ps      %xmm0,%xmm1
-  .byte  15,40,21,164,70,0,0                 // movaps        0x46a4(%rip),%xmm2        # 4720 <_sk_callback_sse41+0xe4>
+  .byte  15,40,21,84,71,0,0                  // movaps        0x4754(%rip),%xmm2        # 47d0 <_sk_callback_sse41+0xda>
   .byte  15,88,202                           // addps         %xmm2,%xmm1
   .byte  15,16,2                             // movups        (%rdx),%xmm0
   .byte  15,88,193                           // addps         %xmm1,%xmm0
@@ -20379,7 +20523,7 @@ _sk_seed_shader_sse41:
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  15,88,202                           // addps         %xmm2,%xmm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,21,147,70,0,0                 // movaps        0x4693(%rip),%xmm2        # 4730 <_sk_callback_sse41+0xf4>
+  .byte  15,40,21,67,71,0,0                  // movaps        0x4743(%rip),%xmm2        # 47e0 <_sk_callback_sse41+0xea>
   .byte  15,87,219                           // xorps         %xmm3,%xmm3
   .byte  15,87,228                           // xorps         %xmm4,%xmm4
   .byte  15,87,237                           // xorps         %xmm5,%xmm5
@@ -20402,14 +20546,14 @@ _sk_dither_sse41:
   .byte  102,68,15,110,1                     // movd          (%rcx),%xmm8
   .byte  102,69,15,112,192,0                 // pshufd        $0x0,%xmm8,%xmm8
   .byte  102,69,15,239,193                   // pxor          %xmm9,%xmm8
-  .byte  102,68,15,111,21,88,70,0,0          // movdqa        0x4658(%rip),%xmm10        # 4740 <_sk_callback_sse41+0x104>
+  .byte  102,68,15,111,21,8,71,0,0           // movdqa        0x4708(%rip),%xmm10        # 47f0 <_sk_callback_sse41+0xfa>
   .byte  102,69,15,111,216                   // movdqa        %xmm8,%xmm11
   .byte  102,69,15,219,218                   // pand          %xmm10,%xmm11
   .byte  102,65,15,114,243,5                 // pslld         $0x5,%xmm11
   .byte  102,69,15,219,209                   // pand          %xmm9,%xmm10
   .byte  102,65,15,114,242,4                 // pslld         $0x4,%xmm10
-  .byte  102,68,15,111,37,68,70,0,0          // movdqa        0x4644(%rip),%xmm12        # 4750 <_sk_callback_sse41+0x114>
-  .byte  102,68,15,111,45,75,70,0,0          // movdqa        0x464b(%rip),%xmm13        # 4760 <_sk_callback_sse41+0x124>
+  .byte  102,68,15,111,37,244,70,0,0         // movdqa        0x46f4(%rip),%xmm12        # 4800 <_sk_callback_sse41+0x10a>
+  .byte  102,68,15,111,45,251,70,0,0         // movdqa        0x46fb(%rip),%xmm13        # 4810 <_sk_callback_sse41+0x11a>
   .byte  102,69,15,111,240                   // movdqa        %xmm8,%xmm14
   .byte  102,69,15,219,245                   // pand          %xmm13,%xmm14
   .byte  102,65,15,114,246,2                 // pslld         $0x2,%xmm14
@@ -20425,8 +20569,8 @@ _sk_dither_sse41:
   .byte  102,69,15,235,245                   // por           %xmm13,%xmm14
   .byte  102,69,15,235,240                   // por           %xmm8,%xmm14
   .byte  69,15,91,198                        // cvtdq2ps      %xmm14,%xmm8
-  .byte  68,15,89,5,6,70,0,0                 // mulps         0x4606(%rip),%xmm8        # 4770 <_sk_callback_sse41+0x134>
-  .byte  68,15,88,5,14,70,0,0                // addps         0x460e(%rip),%xmm8        # 4780 <_sk_callback_sse41+0x144>
+  .byte  68,15,89,5,182,70,0,0               // mulps         0x46b6(%rip),%xmm8        # 4820 <_sk_callback_sse41+0x12a>
+  .byte  68,15,88,5,190,70,0,0               // addps         0x46be(%rip),%xmm8        # 4830 <_sk_callback_sse41+0x13a>
   .byte  243,68,15,16,80,8                   // movss         0x8(%rax),%xmm10
   .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
   .byte  69,15,89,208                        // mulps         %xmm8,%xmm10
@@ -20503,7 +20647,7 @@ HIDDEN _sk_srcatop_sse41
 FUNCTION(_sk_srcatop_sse41)
 _sk_srcatop_sse41:
   .byte  15,89,199                           // mulps         %xmm7,%xmm0
-  .byte  68,15,40,5,103,69,0,0               // movaps        0x4567(%rip),%xmm8        # 4790 <_sk_callback_sse41+0x154>
+  .byte  68,15,40,5,23,70,0,0                // movaps        0x4617(%rip),%xmm8        # 4840 <_sk_callback_sse41+0x14a>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,89,204                        // mulps         %xmm4,%xmm9
@@ -20528,7 +20672,7 @@ FUNCTION(_sk_dstatop_sse41)
 _sk_dstatop_sse41:
   .byte  68,15,40,195                        // movaps        %xmm3,%xmm8
   .byte  68,15,89,196                        // mulps         %xmm4,%xmm8
-  .byte  68,15,40,13,42,69,0,0               // movaps        0x452a(%rip),%xmm9        # 47a0 <_sk_callback_sse41+0x164>
+  .byte  68,15,40,13,218,69,0,0              // movaps        0x45da(%rip),%xmm9        # 4850 <_sk_callback_sse41+0x15a>
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  65,15,88,192                        // addps         %xmm8,%xmm0
@@ -20575,7 +20719,7 @@ HIDDEN _sk_srcout_sse41
 .globl _sk_srcout_sse41
 FUNCTION(_sk_srcout_sse41)
 _sk_srcout_sse41:
-  .byte  68,15,40,5,206,68,0,0               // movaps        0x44ce(%rip),%xmm8        # 47b0 <_sk_callback_sse41+0x174>
+  .byte  68,15,40,5,126,69,0,0               // movaps        0x457e(%rip),%xmm8        # 4860 <_sk_callback_sse41+0x16a>
   .byte  68,15,92,199                        // subps         %xmm7,%xmm8
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
@@ -20588,7 +20732,7 @@ HIDDEN _sk_dstout_sse41
 .globl _sk_dstout_sse41
 FUNCTION(_sk_dstout_sse41)
 _sk_dstout_sse41:
-  .byte  68,15,40,5,190,68,0,0               // movaps        0x44be(%rip),%xmm8        # 47c0 <_sk_callback_sse41+0x184>
+  .byte  68,15,40,5,110,69,0,0               // movaps        0x456e(%rip),%xmm8        # 4870 <_sk_callback_sse41+0x17a>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  15,89,196                           // mulps         %xmm4,%xmm0
@@ -20605,7 +20749,7 @@ HIDDEN _sk_srcover_sse41
 .globl _sk_srcover_sse41
 FUNCTION(_sk_srcover_sse41)
 _sk_srcover_sse41:
-  .byte  68,15,40,5,161,68,0,0               // movaps        0x44a1(%rip),%xmm8        # 47d0 <_sk_callback_sse41+0x194>
+  .byte  68,15,40,5,81,69,0,0                // movaps        0x4551(%rip),%xmm8        # 4880 <_sk_callback_sse41+0x18a>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,89,204                        // mulps         %xmm4,%xmm9
@@ -20625,7 +20769,7 @@ HIDDEN _sk_dstover_sse41
 .globl _sk_dstover_sse41
 FUNCTION(_sk_dstover_sse41)
 _sk_dstover_sse41:
-  .byte  68,15,40,5,117,68,0,0               // movaps        0x4475(%rip),%xmm8        # 47e0 <_sk_callback_sse41+0x1a4>
+  .byte  68,15,40,5,37,69,0,0                // movaps        0x4525(%rip),%xmm8        # 4890 <_sk_callback_sse41+0x19a>
   .byte  68,15,92,199                        // subps         %xmm7,%xmm8
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -20653,7 +20797,7 @@ HIDDEN _sk_multiply_sse41
 .globl _sk_multiply_sse41
 FUNCTION(_sk_multiply_sse41)
 _sk_multiply_sse41:
-  .byte  68,15,40,5,73,68,0,0                // movaps        0x4449(%rip),%xmm8        # 47f0 <_sk_callback_sse41+0x1b4>
+  .byte  68,15,40,5,249,68,0,0               // movaps        0x44f9(%rip),%xmm8        # 48a0 <_sk_callback_sse41+0x1aa>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  69,15,40,209                        // movaps        %xmm9,%xmm10
@@ -20729,7 +20873,7 @@ HIDDEN _sk_xor__sse41
 FUNCTION(_sk_xor__sse41)
 _sk_xor__sse41:
   .byte  68,15,40,195                        // movaps        %xmm3,%xmm8
-  .byte  15,40,29,122,67,0,0                 // movaps        0x437a(%rip),%xmm3        # 4800 <_sk_callback_sse41+0x1c4>
+  .byte  15,40,29,42,68,0,0                  // movaps        0x442a(%rip),%xmm3        # 48b0 <_sk_callback_sse41+0x1ba>
   .byte  68,15,40,203                        // movaps        %xmm3,%xmm9
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
@@ -20777,7 +20921,7 @@ _sk_darken_sse41:
   .byte  68,15,89,206                        // mulps         %xmm6,%xmm9
   .byte  65,15,95,209                        // maxps         %xmm9,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,229,66,0,0                 // movaps        0x42e5(%rip),%xmm2        # 4810 <_sk_callback_sse41+0x1d4>
+  .byte  15,40,21,149,67,0,0                 // movaps        0x4395(%rip),%xmm2        # 48c0 <_sk_callback_sse41+0x1ca>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -20811,7 +20955,7 @@ _sk_lighten_sse41:
   .byte  68,15,89,206                        // mulps         %xmm6,%xmm9
   .byte  65,15,93,209                        // minps         %xmm9,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,138,66,0,0                 // movaps        0x428a(%rip),%xmm2        # 4820 <_sk_callback_sse41+0x1e4>
+  .byte  15,40,21,58,67,0,0                  // movaps        0x433a(%rip),%xmm2        # 48d0 <_sk_callback_sse41+0x1da>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -20848,7 +20992,7 @@ _sk_difference_sse41:
   .byte  65,15,93,209                        // minps         %xmm9,%xmm2
   .byte  15,88,210                           // addps         %xmm2,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,36,66,0,0                  // movaps        0x4224(%rip),%xmm2        # 4830 <_sk_callback_sse41+0x1f4>
+  .byte  15,40,21,212,66,0,0                 // movaps        0x42d4(%rip),%xmm2        # 48e0 <_sk_callback_sse41+0x1ea>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -20875,7 +21019,7 @@ _sk_exclusion_sse41:
   .byte  15,89,214                           // mulps         %xmm6,%xmm2
   .byte  15,88,210                           // addps         %xmm2,%xmm2
   .byte  68,15,92,202                        // subps         %xmm2,%xmm9
-  .byte  15,40,13,229,65,0,0                 // movaps        0x41e5(%rip),%xmm1        # 4840 <_sk_callback_sse41+0x204>
+  .byte  15,40,13,149,66,0,0                 // movaps        0x4295(%rip),%xmm1        # 48f0 <_sk_callback_sse41+0x1fa>
   .byte  15,92,203                           // subps         %xmm3,%xmm1
   .byte  15,89,207                           // mulps         %xmm7,%xmm1
   .byte  15,88,217                           // addps         %xmm1,%xmm3
@@ -20889,7 +21033,7 @@ HIDDEN _sk_colorburn_sse41
 FUNCTION(_sk_colorburn_sse41)
 _sk_colorburn_sse41:
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
-  .byte  68,15,40,21,212,65,0,0              // movaps        0x41d4(%rip),%xmm10        # 4850 <_sk_callback_sse41+0x214>
+  .byte  68,15,40,21,132,66,0,0              // movaps        0x4284(%rip),%xmm10        # 4900 <_sk_callback_sse41+0x20a>
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
   .byte  68,15,92,223                        // subps         %xmm7,%xmm11
   .byte  69,15,40,203                        // movaps        %xmm11,%xmm9
@@ -20971,7 +21115,7 @@ HIDDEN _sk_colordodge_sse41
 FUNCTION(_sk_colordodge_sse41)
 _sk_colordodge_sse41:
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
-  .byte  68,15,40,21,178,64,0,0              // movaps        0x40b2(%rip),%xmm10        # 4860 <_sk_callback_sse41+0x224>
+  .byte  68,15,40,21,98,65,0,0               // movaps        0x4162(%rip),%xmm10        # 4910 <_sk_callback_sse41+0x21a>
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
   .byte  68,15,92,223                        // subps         %xmm7,%xmm11
   .byte  69,15,40,227                        // movaps        %xmm11,%xmm12
@@ -21053,7 +21197,7 @@ _sk_hardlight_sse41:
   .byte  15,40,244                           // movaps        %xmm4,%xmm6
   .byte  15,40,227                           // movaps        %xmm3,%xmm4
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
-  .byte  68,15,40,21,139,63,0,0              // movaps        0x3f8b(%rip),%xmm10        # 4870 <_sk_callback_sse41+0x234>
+  .byte  68,15,40,21,59,64,0,0               // movaps        0x403b(%rip),%xmm10        # 4920 <_sk_callback_sse41+0x22a>
   .byte  65,15,40,234                        // movaps        %xmm10,%xmm5
   .byte  15,92,239                           // subps         %xmm7,%xmm5
   .byte  15,40,197                           // movaps        %xmm5,%xmm0
@@ -21136,7 +21280,7 @@ FUNCTION(_sk_overlay_sse41)
 _sk_overlay_sse41:
   .byte  68,15,40,201                        // movaps        %xmm1,%xmm9
   .byte  68,15,40,240                        // movaps        %xmm0,%xmm14
-  .byte  68,15,40,21,112,62,0,0              // movaps        0x3e70(%rip),%xmm10        # 4880 <_sk_callback_sse41+0x244>
+  .byte  68,15,40,21,32,63,0,0               // movaps        0x3f20(%rip),%xmm10        # 4930 <_sk_callback_sse41+0x23a>
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
   .byte  68,15,92,223                        // subps         %xmm7,%xmm11
   .byte  65,15,40,195                        // movaps        %xmm11,%xmm0
@@ -21221,7 +21365,7 @@ _sk_softlight_sse41:
   .byte  15,40,198                           // movaps        %xmm6,%xmm0
   .byte  15,94,199                           // divps         %xmm7,%xmm0
   .byte  65,15,84,193                        // andps         %xmm9,%xmm0
-  .byte  15,40,13,71,61,0,0                  // movaps        0x3d47(%rip),%xmm1        # 4890 <_sk_callback_sse41+0x254>
+  .byte  15,40,13,247,61,0,0                 // movaps        0x3df7(%rip),%xmm1        # 4940 <_sk_callback_sse41+0x24a>
   .byte  68,15,40,209                        // movaps        %xmm1,%xmm10
   .byte  68,15,92,208                        // subps         %xmm0,%xmm10
   .byte  68,15,40,240                        // movaps        %xmm0,%xmm14
@@ -21234,10 +21378,10 @@ _sk_softlight_sse41:
   .byte  15,40,208                           // movaps        %xmm0,%xmm2
   .byte  15,89,210                           // mulps         %xmm2,%xmm2
   .byte  15,88,208                           // addps         %xmm0,%xmm2
-  .byte  68,15,40,45,37,61,0,0               // movaps        0x3d25(%rip),%xmm13        # 48a0 <_sk_callback_sse41+0x264>
+  .byte  68,15,40,45,213,61,0,0              // movaps        0x3dd5(%rip),%xmm13        # 4950 <_sk_callback_sse41+0x25a>
   .byte  69,15,88,245                        // addps         %xmm13,%xmm14
   .byte  68,15,89,242                        // mulps         %xmm2,%xmm14
-  .byte  68,15,40,37,37,61,0,0               // movaps        0x3d25(%rip),%xmm12        # 48b0 <_sk_callback_sse41+0x274>
+  .byte  68,15,40,37,213,61,0,0              // movaps        0x3dd5(%rip),%xmm12        # 4960 <_sk_callback_sse41+0x26a>
   .byte  69,15,89,252                        // mulps         %xmm12,%xmm15
   .byte  69,15,88,254                        // addps         %xmm14,%xmm15
   .byte  15,40,198                           // movaps        %xmm6,%xmm0
@@ -21423,12 +21567,12 @@ _sk_hue_sse41:
   .byte  68,15,84,208                        // andps         %xmm0,%xmm10
   .byte  15,84,200                           // andps         %xmm0,%xmm1
   .byte  68,15,84,232                        // andps         %xmm0,%xmm13
-  .byte  15,40,5,144,58,0,0                  // movaps        0x3a90(%rip),%xmm0        # 48c0 <_sk_callback_sse41+0x284>
+  .byte  15,40,5,64,59,0,0                   // movaps        0x3b40(%rip),%xmm0        # 4970 <_sk_callback_sse41+0x27a>
   .byte  68,15,89,224                        // mulps         %xmm0,%xmm12
-  .byte  15,40,21,149,58,0,0                 // movaps        0x3a95(%rip),%xmm2        # 48d0 <_sk_callback_sse41+0x294>
+  .byte  15,40,21,69,59,0,0                  // movaps        0x3b45(%rip),%xmm2        # 4980 <_sk_callback_sse41+0x28a>
   .byte  15,89,250                           // mulps         %xmm2,%xmm7
   .byte  65,15,88,252                        // addps         %xmm12,%xmm7
-  .byte  68,15,40,53,150,58,0,0              // movaps        0x3a96(%rip),%xmm14        # 48e0 <_sk_callback_sse41+0x2a4>
+  .byte  68,15,40,53,70,59,0,0               // movaps        0x3b46(%rip),%xmm14        # 4990 <_sk_callback_sse41+0x29a>
   .byte  68,15,40,252                        // movaps        %xmm4,%xmm15
   .byte  69,15,89,254                        // mulps         %xmm14,%xmm15
   .byte  68,15,88,255                        // addps         %xmm7,%xmm15
@@ -21511,7 +21655,7 @@ _sk_hue_sse41:
   .byte  65,15,88,214                        // addps         %xmm14,%xmm2
   .byte  15,40,196                           // movaps        %xmm4,%xmm0
   .byte  102,15,56,20,202                    // blendvps      %xmm0,%xmm2,%xmm1
-  .byte  68,15,40,13,90,57,0,0               // movaps        0x395a(%rip),%xmm9        # 48f0 <_sk_callback_sse41+0x2b4>
+  .byte  68,15,40,13,10,58,0,0               // movaps        0x3a0a(%rip),%xmm9        # 49a0 <_sk_callback_sse41+0x2aa>
   .byte  65,15,40,225                        // movaps        %xmm9,%xmm4
   .byte  15,92,229                           // subps         %xmm5,%xmm4
   .byte  15,40,68,36,200                     // movaps        -0x38(%rsp),%xmm0
@@ -21605,14 +21749,14 @@ _sk_saturation_sse41:
   .byte  68,15,84,215                        // andps         %xmm7,%xmm10
   .byte  68,15,84,223                        // andps         %xmm7,%xmm11
   .byte  68,15,84,199                        // andps         %xmm7,%xmm8
-  .byte  15,40,21,20,56,0,0                  // movaps        0x3814(%rip),%xmm2        # 4900 <_sk_callback_sse41+0x2c4>
+  .byte  15,40,21,196,56,0,0                 // movaps        0x38c4(%rip),%xmm2        # 49b0 <_sk_callback_sse41+0x2ba>
   .byte  15,40,221                           // movaps        %xmm5,%xmm3
   .byte  15,89,218                           // mulps         %xmm2,%xmm3
-  .byte  15,40,13,23,56,0,0                  // movaps        0x3817(%rip),%xmm1        # 4910 <_sk_callback_sse41+0x2d4>
+  .byte  15,40,13,199,56,0,0                 // movaps        0x38c7(%rip),%xmm1        # 49c0 <_sk_callback_sse41+0x2ca>
   .byte  15,40,254                           // movaps        %xmm6,%xmm7
   .byte  15,89,249                           // mulps         %xmm1,%xmm7
   .byte  15,88,251                           // addps         %xmm3,%xmm7
-  .byte  68,15,40,45,22,56,0,0               // movaps        0x3816(%rip),%xmm13        # 4920 <_sk_callback_sse41+0x2e4>
+  .byte  68,15,40,45,198,56,0,0              // movaps        0x38c6(%rip),%xmm13        # 49d0 <_sk_callback_sse41+0x2da>
   .byte  69,15,89,245                        // mulps         %xmm13,%xmm14
   .byte  68,15,88,247                        // addps         %xmm7,%xmm14
   .byte  65,15,40,218                        // movaps        %xmm10,%xmm3
@@ -21693,7 +21837,7 @@ _sk_saturation_sse41:
   .byte  65,15,88,253                        // addps         %xmm13,%xmm7
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  102,68,15,56,20,223                 // blendvps      %xmm0,%xmm7,%xmm11
-  .byte  68,15,40,13,220,54,0,0              // movaps        0x36dc(%rip),%xmm9        # 4930 <_sk_callback_sse41+0x2f4>
+  .byte  68,15,40,13,140,55,0,0              // movaps        0x378c(%rip),%xmm9        # 49e0 <_sk_callback_sse41+0x2ea>
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  68,15,92,204                        // subps         %xmm4,%xmm9
   .byte  15,40,124,36,168                    // movaps        -0x58(%rsp),%xmm7
@@ -21748,14 +21892,14 @@ _sk_color_sse41:
   .byte  15,40,231                           // movaps        %xmm7,%xmm4
   .byte  68,15,89,244                        // mulps         %xmm4,%xmm14
   .byte  15,89,204                           // mulps         %xmm4,%xmm1
-  .byte  68,15,40,13,39,54,0,0               // movaps        0x3627(%rip),%xmm9        # 4940 <_sk_callback_sse41+0x304>
+  .byte  68,15,40,13,215,54,0,0              // movaps        0x36d7(%rip),%xmm9        # 49f0 <_sk_callback_sse41+0x2fa>
   .byte  65,15,40,250                        // movaps        %xmm10,%xmm7
   .byte  65,15,89,249                        // mulps         %xmm9,%xmm7
-  .byte  68,15,40,21,39,54,0,0               // movaps        0x3627(%rip),%xmm10        # 4950 <_sk_callback_sse41+0x314>
+  .byte  68,15,40,21,215,54,0,0              // movaps        0x36d7(%rip),%xmm10        # 4a00 <_sk_callback_sse41+0x30a>
   .byte  65,15,40,219                        // movaps        %xmm11,%xmm3
   .byte  65,15,89,218                        // mulps         %xmm10,%xmm3
   .byte  15,88,223                           // addps         %xmm7,%xmm3
-  .byte  68,15,40,29,36,54,0,0               // movaps        0x3624(%rip),%xmm11        # 4960 <_sk_callback_sse41+0x324>
+  .byte  68,15,40,29,212,54,0,0              // movaps        0x36d4(%rip),%xmm11        # 4a10 <_sk_callback_sse41+0x31a>
   .byte  69,15,40,236                        // movaps        %xmm12,%xmm13
   .byte  69,15,89,235                        // mulps         %xmm11,%xmm13
   .byte  68,15,88,235                        // addps         %xmm3,%xmm13
@@ -21840,7 +21984,7 @@ _sk_color_sse41:
   .byte  65,15,88,251                        // addps         %xmm11,%xmm7
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  102,15,56,20,207                    // blendvps      %xmm0,%xmm7,%xmm1
-  .byte  68,15,40,13,224,52,0,0              // movaps        0x34e0(%rip),%xmm9        # 4970 <_sk_callback_sse41+0x334>
+  .byte  68,15,40,13,144,53,0,0              // movaps        0x3590(%rip),%xmm9        # 4a20 <_sk_callback_sse41+0x32a>
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  15,92,196                           // subps         %xmm4,%xmm0
   .byte  68,15,89,192                        // mulps         %xmm0,%xmm8
@@ -21892,13 +22036,13 @@ _sk_luminosity_sse41:
   .byte  69,15,89,216                        // mulps         %xmm8,%xmm11
   .byte  68,15,40,203                        // movaps        %xmm3,%xmm9
   .byte  68,15,89,205                        // mulps         %xmm5,%xmm9
-  .byte  68,15,40,5,56,52,0,0                // movaps        0x3438(%rip),%xmm8        # 4980 <_sk_callback_sse41+0x344>
+  .byte  68,15,40,5,232,52,0,0               // movaps        0x34e8(%rip),%xmm8        # 4a30 <_sk_callback_sse41+0x33a>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
-  .byte  68,15,40,21,60,52,0,0               // movaps        0x343c(%rip),%xmm10        # 4990 <_sk_callback_sse41+0x354>
+  .byte  68,15,40,21,236,52,0,0              // movaps        0x34ec(%rip),%xmm10        # 4a40 <_sk_callback_sse41+0x34a>
   .byte  15,40,233                           // movaps        %xmm1,%xmm5
   .byte  65,15,89,234                        // mulps         %xmm10,%xmm5
   .byte  15,88,232                           // addps         %xmm0,%xmm5
-  .byte  68,15,40,37,58,52,0,0               // movaps        0x343a(%rip),%xmm12        # 49a0 <_sk_callback_sse41+0x364>
+  .byte  68,15,40,37,234,52,0,0              // movaps        0x34ea(%rip),%xmm12        # 4a50 <_sk_callback_sse41+0x35a>
   .byte  68,15,40,242                        // movaps        %xmm2,%xmm14
   .byte  69,15,89,244                        // mulps         %xmm12,%xmm14
   .byte  68,15,88,245                        // addps         %xmm5,%xmm14
@@ -21983,7 +22127,7 @@ _sk_luminosity_sse41:
   .byte  65,15,88,244                        // addps         %xmm12,%xmm6
   .byte  65,15,40,195                        // movaps        %xmm11,%xmm0
   .byte  102,68,15,56,20,206                 // blendvps      %xmm0,%xmm6,%xmm9
-  .byte  15,40,5,240,50,0,0                  // movaps        0x32f0(%rip),%xmm0        # 49b0 <_sk_callback_sse41+0x374>
+  .byte  15,40,5,160,51,0,0                  // movaps        0x33a0(%rip),%xmm0        # 4a60 <_sk_callback_sse41+0x36a>
   .byte  15,40,208                           // movaps        %xmm0,%xmm2
   .byte  15,92,215                           // subps         %xmm7,%xmm2
   .byte  15,89,226                           // mulps         %xmm2,%xmm4
@@ -22032,7 +22176,7 @@ HIDDEN _sk_clamp_1_sse41
 .globl _sk_clamp_1_sse41
 FUNCTION(_sk_clamp_1_sse41)
 _sk_clamp_1_sse41:
-  .byte  68,15,40,5,115,50,0,0               // movaps        0x3273(%rip),%xmm8        # 49c0 <_sk_callback_sse41+0x384>
+  .byte  68,15,40,5,35,51,0,0                // movaps        0x3323(%rip),%xmm8        # 4a70 <_sk_callback_sse41+0x37a>
   .byte  65,15,93,192                        // minps         %xmm8,%xmm0
   .byte  65,15,93,200                        // minps         %xmm8,%xmm1
   .byte  65,15,93,208                        // minps         %xmm8,%xmm2
@@ -22044,7 +22188,7 @@ HIDDEN _sk_clamp_a_sse41
 .globl _sk_clamp_a_sse41
 FUNCTION(_sk_clamp_a_sse41)
 _sk_clamp_a_sse41:
-  .byte  15,93,29,104,50,0,0                 // minps         0x3268(%rip),%xmm3        # 49d0 <_sk_callback_sse41+0x394>
+  .byte  15,93,29,24,51,0,0                  // minps         0x3318(%rip),%xmm3        # 4a80 <_sk_callback_sse41+0x38a>
   .byte  15,93,195                           // minps         %xmm3,%xmm0
   .byte  15,93,203                           // minps         %xmm3,%xmm1
   .byte  15,93,211                           // minps         %xmm3,%xmm2
@@ -22131,7 +22275,7 @@ HIDDEN _sk_unpremul_sse41
 FUNCTION(_sk_unpremul_sse41)
 _sk_unpremul_sse41:
   .byte  69,15,87,192                        // xorps         %xmm8,%xmm8
-  .byte  68,15,40,13,211,49,0,0              // movaps        0x31d3(%rip),%xmm9        # 49e0 <_sk_callback_sse41+0x3a4>
+  .byte  68,15,40,13,131,50,0,0              // movaps        0x3283(%rip),%xmm9        # 4a90 <_sk_callback_sse41+0x39a>
   .byte  68,15,94,203                        // divps         %xmm3,%xmm9
   .byte  68,15,194,195,4                     // cmpneqps      %xmm3,%xmm8
   .byte  69,15,84,193                        // andps         %xmm9,%xmm8
@@ -22145,20 +22289,20 @@ HIDDEN _sk_from_srgb_sse41
 .globl _sk_from_srgb_sse41
 FUNCTION(_sk_from_srgb_sse41)
 _sk_from_srgb_sse41:
-  .byte  68,15,40,29,190,49,0,0              // movaps        0x31be(%rip),%xmm11        # 49f0 <_sk_callback_sse41+0x3b4>
+  .byte  68,15,40,29,110,50,0,0              // movaps        0x326e(%rip),%xmm11        # 4aa0 <_sk_callback_sse41+0x3aa>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,203                        // mulps         %xmm11,%xmm9
   .byte  68,15,40,208                        // movaps        %xmm0,%xmm10
   .byte  69,15,89,210                        // mulps         %xmm10,%xmm10
-  .byte  68,15,40,37,182,49,0,0              // movaps        0x31b6(%rip),%xmm12        # 4a00 <_sk_callback_sse41+0x3c4>
+  .byte  68,15,40,37,102,50,0,0              // movaps        0x3266(%rip),%xmm12        # 4ab0 <_sk_callback_sse41+0x3ba>
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
   .byte  69,15,89,196                        // mulps         %xmm12,%xmm8
-  .byte  68,15,40,45,182,49,0,0              // movaps        0x31b6(%rip),%xmm13        # 4a10 <_sk_callback_sse41+0x3d4>
+  .byte  68,15,40,45,102,50,0,0              // movaps        0x3266(%rip),%xmm13        # 4ac0 <_sk_callback_sse41+0x3ca>
   .byte  69,15,88,197                        // addps         %xmm13,%xmm8
   .byte  69,15,89,194                        // mulps         %xmm10,%xmm8
-  .byte  68,15,40,53,182,49,0,0              // movaps        0x31b6(%rip),%xmm14        # 4a20 <_sk_callback_sse41+0x3e4>
+  .byte  68,15,40,53,102,50,0,0              // movaps        0x3266(%rip),%xmm14        # 4ad0 <_sk_callback_sse41+0x3da>
   .byte  69,15,88,198                        // addps         %xmm14,%xmm8
-  .byte  68,15,40,61,186,49,0,0              // movaps        0x31ba(%rip),%xmm15        # 4a30 <_sk_callback_sse41+0x3f4>
+  .byte  68,15,40,61,106,50,0,0              // movaps        0x326a(%rip),%xmm15        # 4ae0 <_sk_callback_sse41+0x3ea>
   .byte  65,15,194,199,1                     // cmpltps       %xmm15,%xmm0
   .byte  102,69,15,56,20,193                 // blendvps      %xmm0,%xmm9,%xmm8
   .byte  68,15,40,209                        // movaps        %xmm1,%xmm10
@@ -22201,22 +22345,22 @@ _sk_to_srgb_sse41:
   .byte  15,40,218                           // movaps        %xmm2,%xmm3
   .byte  15,40,209                           // movaps        %xmm1,%xmm2
   .byte  68,15,82,192                        // rsqrtps       %xmm0,%xmm8
-  .byte  68,15,40,29,50,49,0,0               // movaps        0x3132(%rip),%xmm11        # 4a40 <_sk_callback_sse41+0x404>
+  .byte  68,15,40,29,226,49,0,0              // movaps        0x31e2(%rip),%xmm11        # 4af0 <_sk_callback_sse41+0x3fa>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,203                        // mulps         %xmm11,%xmm9
-  .byte  68,15,40,37,50,49,0,0               // movaps        0x3132(%rip),%xmm12        # 4a50 <_sk_callback_sse41+0x414>
+  .byte  68,15,40,37,226,49,0,0              // movaps        0x31e2(%rip),%xmm12        # 4b00 <_sk_callback_sse41+0x40a>
   .byte  69,15,40,248                        // movaps        %xmm8,%xmm15
   .byte  69,15,89,252                        // mulps         %xmm12,%xmm15
-  .byte  68,15,40,21,50,49,0,0               // movaps        0x3132(%rip),%xmm10        # 4a60 <_sk_callback_sse41+0x424>
+  .byte  68,15,40,21,226,49,0,0              // movaps        0x31e2(%rip),%xmm10        # 4b10 <_sk_callback_sse41+0x41a>
   .byte  69,15,88,250                        // addps         %xmm10,%xmm15
   .byte  69,15,89,248                        // mulps         %xmm8,%xmm15
-  .byte  68,15,40,45,50,49,0,0               // movaps        0x3132(%rip),%xmm13        # 4a70 <_sk_callback_sse41+0x434>
+  .byte  68,15,40,45,226,49,0,0              // movaps        0x31e2(%rip),%xmm13        # 4b20 <_sk_callback_sse41+0x42a>
   .byte  69,15,88,253                        // addps         %xmm13,%xmm15
-  .byte  68,15,40,53,54,49,0,0               // movaps        0x3136(%rip),%xmm14        # 4a80 <_sk_callback_sse41+0x444>
+  .byte  68,15,40,53,230,49,0,0              // movaps        0x31e6(%rip),%xmm14        # 4b30 <_sk_callback_sse41+0x43a>
   .byte  69,15,88,198                        // addps         %xmm14,%xmm8
   .byte  69,15,83,192                        // rcpps         %xmm8,%xmm8
   .byte  69,15,89,199                        // mulps         %xmm15,%xmm8
-  .byte  68,15,40,61,50,49,0,0               // movaps        0x3132(%rip),%xmm15        # 4a90 <_sk_callback_sse41+0x454>
+  .byte  68,15,40,61,226,49,0,0              // movaps        0x31e2(%rip),%xmm15        # 4b40 <_sk_callback_sse41+0x44a>
   .byte  65,15,194,199,1                     // cmpltps       %xmm15,%xmm0
   .byte  102,69,15,56,20,193                 // blendvps      %xmm0,%xmm9,%xmm8
   .byte  68,15,82,202                        // rsqrtps       %xmm2,%xmm9
@@ -22269,7 +22413,7 @@ _sk_rgb_to_hsl_sse41:
   .byte  68,15,93,226                        // minps         %xmm2,%xmm12
   .byte  65,15,40,203                        // movaps        %xmm11,%xmm1
   .byte  65,15,92,204                        // subps         %xmm12,%xmm1
-  .byte  68,15,40,53,127,48,0,0              // movaps        0x307f(%rip),%xmm14        # 4aa0 <_sk_callback_sse41+0x464>
+  .byte  68,15,40,53,47,49,0,0               // movaps        0x312f(%rip),%xmm14        # 4b50 <_sk_callback_sse41+0x45a>
   .byte  68,15,94,241                        // divps         %xmm1,%xmm14
   .byte  69,15,40,211                        // movaps        %xmm11,%xmm10
   .byte  69,15,194,208,0                     // cmpeqps       %xmm8,%xmm10
@@ -22278,27 +22422,27 @@ _sk_rgb_to_hsl_sse41:
   .byte  65,15,89,198                        // mulps         %xmm14,%xmm0
   .byte  69,15,40,249                        // movaps        %xmm9,%xmm15
   .byte  68,15,194,250,1                     // cmpltps       %xmm2,%xmm15
-  .byte  68,15,84,61,102,48,0,0              // andps         0x3066(%rip),%xmm15        # 4ab0 <_sk_callback_sse41+0x474>
+  .byte  68,15,84,61,22,49,0,0               // andps         0x3116(%rip),%xmm15        # 4b60 <_sk_callback_sse41+0x46a>
   .byte  68,15,88,248                        // addps         %xmm0,%xmm15
   .byte  65,15,40,195                        // movaps        %xmm11,%xmm0
   .byte  65,15,194,193,0                     // cmpeqps       %xmm9,%xmm0
   .byte  65,15,92,208                        // subps         %xmm8,%xmm2
   .byte  65,15,89,214                        // mulps         %xmm14,%xmm2
-  .byte  68,15,40,45,89,48,0,0               // movaps        0x3059(%rip),%xmm13        # 4ac0 <_sk_callback_sse41+0x484>
+  .byte  68,15,40,45,9,49,0,0                // movaps        0x3109(%rip),%xmm13        # 4b70 <_sk_callback_sse41+0x47a>
   .byte  65,15,88,213                        // addps         %xmm13,%xmm2
   .byte  69,15,92,193                        // subps         %xmm9,%xmm8
   .byte  69,15,89,198                        // mulps         %xmm14,%xmm8
-  .byte  68,15,88,5,85,48,0,0                // addps         0x3055(%rip),%xmm8        # 4ad0 <_sk_callback_sse41+0x494>
+  .byte  68,15,88,5,5,49,0,0                 // addps         0x3105(%rip),%xmm8        # 4b80 <_sk_callback_sse41+0x48a>
   .byte  102,68,15,56,20,194                 // blendvps      %xmm0,%xmm2,%xmm8
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  102,69,15,56,20,199                 // blendvps      %xmm0,%xmm15,%xmm8
-  .byte  68,15,89,5,77,48,0,0                // mulps         0x304d(%rip),%xmm8        # 4ae0 <_sk_callback_sse41+0x4a4>
+  .byte  68,15,89,5,253,48,0,0               // mulps         0x30fd(%rip),%xmm8        # 4b90 <_sk_callback_sse41+0x49a>
   .byte  69,15,40,203                        // movaps        %xmm11,%xmm9
   .byte  69,15,194,204,4                     // cmpneqps      %xmm12,%xmm9
   .byte  69,15,84,193                        // andps         %xmm9,%xmm8
   .byte  69,15,92,235                        // subps         %xmm11,%xmm13
   .byte  69,15,88,220                        // addps         %xmm12,%xmm11
-  .byte  15,40,5,65,48,0,0                   // movaps        0x3041(%rip),%xmm0        # 4af0 <_sk_callback_sse41+0x4b4>
+  .byte  15,40,5,241,48,0,0                  // movaps        0x30f1(%rip),%xmm0        # 4ba0 <_sk_callback_sse41+0x4aa>
   .byte  65,15,40,211                        // movaps        %xmm11,%xmm2
   .byte  15,89,208                           // mulps         %xmm0,%xmm2
   .byte  15,194,194,1                        // cmpltps       %xmm2,%xmm0
@@ -22320,7 +22464,7 @@ _sk_hsl_to_rgb_sse41:
   .byte  15,41,100,36,184                    // movaps        %xmm4,-0x48(%rsp)
   .byte  15,41,92,36,168                     // movaps        %xmm3,-0x58(%rsp)
   .byte  68,15,40,208                        // movaps        %xmm0,%xmm10
-  .byte  68,15,40,13,7,48,0,0                // movaps        0x3007(%rip),%xmm9        # 4b00 <_sk_callback_sse41+0x4c4>
+  .byte  68,15,40,13,183,48,0,0              // movaps        0x30b7(%rip),%xmm9        # 4bb0 <_sk_callback_sse41+0x4ba>
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  15,194,194,2                        // cmpleps       %xmm2,%xmm0
   .byte  15,40,217                           // movaps        %xmm1,%xmm3
@@ -22333,19 +22477,19 @@ _sk_hsl_to_rgb_sse41:
   .byte  15,41,84,36,152                     // movaps        %xmm2,-0x68(%rsp)
   .byte  69,15,88,192                        // addps         %xmm8,%xmm8
   .byte  68,15,92,197                        // subps         %xmm5,%xmm8
-  .byte  68,15,40,53,226,47,0,0              // movaps        0x2fe2(%rip),%xmm14        # 4b10 <_sk_callback_sse41+0x4d4>
+  .byte  68,15,40,53,146,48,0,0              // movaps        0x3092(%rip),%xmm14        # 4bc0 <_sk_callback_sse41+0x4ca>
   .byte  69,15,88,242                        // addps         %xmm10,%xmm14
   .byte  102,65,15,58,8,198,1                // roundps       $0x1,%xmm14,%xmm0
   .byte  68,15,92,240                        // subps         %xmm0,%xmm14
-  .byte  68,15,40,29,219,47,0,0              // movaps        0x2fdb(%rip),%xmm11        # 4b20 <_sk_callback_sse41+0x4e4>
+  .byte  68,15,40,29,139,48,0,0              // movaps        0x308b(%rip),%xmm11        # 4bd0 <_sk_callback_sse41+0x4da>
   .byte  65,15,40,195                        // movaps        %xmm11,%xmm0
   .byte  65,15,194,198,2                     // cmpleps       %xmm14,%xmm0
   .byte  15,40,245                           // movaps        %xmm5,%xmm6
   .byte  65,15,92,240                        // subps         %xmm8,%xmm6
-  .byte  15,40,61,212,47,0,0                 // movaps        0x2fd4(%rip),%xmm7        # 4b30 <_sk_callback_sse41+0x4f4>
+  .byte  15,40,61,132,48,0,0                 // movaps        0x3084(%rip),%xmm7        # 4be0 <_sk_callback_sse41+0x4ea>
   .byte  69,15,40,238                        // movaps        %xmm14,%xmm13
   .byte  68,15,89,239                        // mulps         %xmm7,%xmm13
-  .byte  15,40,29,213,47,0,0                 // movaps        0x2fd5(%rip),%xmm3        # 4b40 <_sk_callback_sse41+0x504>
+  .byte  15,40,29,133,48,0,0                 // movaps        0x3085(%rip),%xmm3        # 4bf0 <_sk_callback_sse41+0x4fa>
   .byte  68,15,40,227                        // movaps        %xmm3,%xmm12
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
   .byte  68,15,89,230                        // mulps         %xmm6,%xmm12
@@ -22355,7 +22499,7 @@ _sk_hsl_to_rgb_sse41:
   .byte  65,15,194,198,2                     // cmpleps       %xmm14,%xmm0
   .byte  68,15,40,253                        // movaps        %xmm5,%xmm15
   .byte  102,69,15,56,20,252                 // blendvps      %xmm0,%xmm12,%xmm15
-  .byte  68,15,40,37,180,47,0,0              // movaps        0x2fb4(%rip),%xmm12        # 4b50 <_sk_callback_sse41+0x514>
+  .byte  68,15,40,37,100,48,0,0              // movaps        0x3064(%rip),%xmm12        # 4c00 <_sk_callback_sse41+0x50a>
   .byte  65,15,40,196                        // movaps        %xmm12,%xmm0
   .byte  65,15,194,198,2                     // cmpleps       %xmm14,%xmm0
   .byte  68,15,89,238                        // mulps         %xmm6,%xmm13
@@ -22389,7 +22533,7 @@ _sk_hsl_to_rgb_sse41:
   .byte  65,15,40,198                        // movaps        %xmm14,%xmm0
   .byte  15,40,84,36,152                     // movaps        -0x68(%rsp),%xmm2
   .byte  102,15,56,20,202                    // blendvps      %xmm0,%xmm2,%xmm1
-  .byte  68,15,88,21,44,47,0,0               // addps         0x2f2c(%rip),%xmm10        # 4b60 <_sk_callback_sse41+0x524>
+  .byte  68,15,88,21,220,47,0,0              // addps         0x2fdc(%rip),%xmm10        # 4c10 <_sk_callback_sse41+0x51a>
   .byte  102,65,15,58,8,194,1                // roundps       $0x1,%xmm10,%xmm0
   .byte  68,15,92,208                        // subps         %xmm0,%xmm10
   .byte  69,15,194,218,2                     // cmpleps       %xmm10,%xmm11
@@ -22441,7 +22585,7 @@ _sk_scale_u8_sse41:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,68,15,56,49,4,56                // pmovzxbd      (%rax,%rdi,1),%xmm8
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,137,46,0,0               // mulps         0x2e89(%rip),%xmm8        # 4b70 <_sk_callback_sse41+0x534>
+  .byte  68,15,89,5,57,47,0,0                // mulps         0x2f39(%rip),%xmm8        # 4c20 <_sk_callback_sse41+0x52a>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
@@ -22479,7 +22623,7 @@ _sk_lerp_u8_sse41:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,68,15,56,49,4,56                // pmovzxbd      (%rax,%rdi,1),%xmm8
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,53,46,0,0                // mulps         0x2e35(%rip),%xmm8        # 4b80 <_sk_callback_sse41+0x544>
+  .byte  68,15,89,5,229,46,0,0               // mulps         0x2ee5(%rip),%xmm8        # 4c30 <_sk_callback_sse41+0x53a>
   .byte  15,92,196                           // subps         %xmm4,%xmm0
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -22502,17 +22646,17 @@ _sk_lerp_565_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,68,15,56,51,20,120              // pmovzxwd      (%rax,%rdi,2),%xmm10
-  .byte  102,68,15,111,5,4,46,0,0            // movdqa        0x2e04(%rip),%xmm8        # 4b90 <_sk_callback_sse41+0x554>
+  .byte  102,68,15,111,5,180,46,0,0          // movdqa        0x2eb4(%rip),%xmm8        # 4c40 <_sk_callback_sse41+0x54a>
   .byte  102,69,15,219,194                   // pand          %xmm10,%xmm8
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,3,46,0,0                 // mulps         0x2e03(%rip),%xmm8        # 4ba0 <_sk_callback_sse41+0x564>
-  .byte  102,68,15,111,13,10,46,0,0          // movdqa        0x2e0a(%rip),%xmm9        # 4bb0 <_sk_callback_sse41+0x574>
+  .byte  68,15,89,5,179,46,0,0               // mulps         0x2eb3(%rip),%xmm8        # 4c50 <_sk_callback_sse41+0x55a>
+  .byte  102,68,15,111,13,186,46,0,0         // movdqa        0x2eba(%rip),%xmm9        # 4c60 <_sk_callback_sse41+0x56a>
   .byte  102,69,15,219,202                   // pand          %xmm10,%xmm9
   .byte  69,15,91,201                        // cvtdq2ps      %xmm9,%xmm9
-  .byte  68,15,89,13,9,46,0,0                // mulps         0x2e09(%rip),%xmm9        # 4bc0 <_sk_callback_sse41+0x584>
-  .byte  102,68,15,219,21,16,46,0,0          // pand          0x2e10(%rip),%xmm10        # 4bd0 <_sk_callback_sse41+0x594>
+  .byte  68,15,89,13,185,46,0,0              // mulps         0x2eb9(%rip),%xmm9        # 4c70 <_sk_callback_sse41+0x57a>
+  .byte  102,68,15,219,21,192,46,0,0         // pand          0x2ec0(%rip),%xmm10        # 4c80 <_sk_callback_sse41+0x58a>
   .byte  69,15,91,210                        // cvtdq2ps      %xmm10,%xmm10
-  .byte  68,15,89,21,20,46,0,0               // mulps         0x2e14(%rip),%xmm10        # 4be0 <_sk_callback_sse41+0x5a4>
+  .byte  68,15,89,21,196,46,0,0              // mulps         0x2ec4(%rip),%xmm10        # 4c90 <_sk_callback_sse41+0x59a>
   .byte  15,92,196                           // subps         %xmm4,%xmm0
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -22543,7 +22687,7 @@ _sk_load_tables_sse41:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,139,72,8                         // mov           0x8(%rax),%r9
   .byte  243,69,15,111,4,184                 // movdqu        (%r8,%rdi,4),%xmm8
-  .byte  102,15,111,5,197,45,0,0             // movdqa        0x2dc5(%rip),%xmm0        # 4bf0 <_sk_callback_sse41+0x5b4>
+  .byte  102,15,111,5,117,46,0,0             // movdqa        0x2e75(%rip),%xmm0        # 4ca0 <_sk_callback_sse41+0x5aa>
   .byte  102,65,15,219,192                   // pand          %xmm8,%xmm0
   .byte  102,73,15,58,22,192,1               // pextrq        $0x1,%xmm0,%r8
   .byte  102,72,15,126,193                   // movq          %xmm0,%rcx
@@ -22558,7 +22702,7 @@ _sk_load_tables_sse41:
   .byte  102,15,58,33,193,48                 // insertps      $0x30,%xmm1,%xmm0
   .byte  76,139,64,16                        // mov           0x10(%rax),%r8
   .byte  102,65,15,111,200                   // movdqa        %xmm8,%xmm1
-  .byte  102,15,56,0,13,128,45,0,0           // pshufb        0x2d80(%rip),%xmm1        # 4c00 <_sk_callback_sse41+0x5c4>
+  .byte  102,15,56,0,13,48,46,0,0            // pshufb        0x2e30(%rip),%xmm1        # 4cb0 <_sk_callback_sse41+0x5ba>
   .byte  102,73,15,58,22,201,1               // pextrq        $0x1,%xmm1,%r9
   .byte  102,72,15,126,201                   // movq          %xmm1,%rcx
   .byte  68,15,182,209                       // movzbl        %cl,%r10d
@@ -22573,7 +22717,7 @@ _sk_load_tables_sse41:
   .byte  102,15,58,33,202,48                 // insertps      $0x30,%xmm2,%xmm1
   .byte  76,139,64,24                        // mov           0x18(%rax),%r8
   .byte  102,65,15,111,208                   // movdqa        %xmm8,%xmm2
-  .byte  102,15,56,0,21,60,45,0,0            // pshufb        0x2d3c(%rip),%xmm2        # 4c10 <_sk_callback_sse41+0x5d4>
+  .byte  102,15,56,0,21,236,45,0,0           // pshufb        0x2dec(%rip),%xmm2        # 4cc0 <_sk_callback_sse41+0x5ca>
   .byte  102,72,15,58,22,209,1               // pextrq        $0x1,%xmm2,%rcx
   .byte  102,72,15,126,208                   // movq          %xmm2,%rax
   .byte  68,15,182,200                       // movzbl        %al,%r9d
@@ -22588,7 +22732,7 @@ _sk_load_tables_sse41:
   .byte  102,15,58,33,211,48                 // insertps      $0x30,%xmm3,%xmm2
   .byte  102,65,15,114,208,24                // psrld         $0x18,%xmm8
   .byte  65,15,91,216                        // cvtdq2ps      %xmm8,%xmm3
-  .byte  15,89,29,249,44,0,0                 // mulps         0x2cf9(%rip),%xmm3        # 4c20 <_sk_callback_sse41+0x5e4>
+  .byte  15,89,29,169,45,0,0                 // mulps         0x2da9(%rip),%xmm3        # 4cd0 <_sk_callback_sse41+0x5da>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -22607,7 +22751,7 @@ _sk_load_tables_u16_be_sse41:
   .byte  102,65,15,111,201                   // movdqa        %xmm9,%xmm1
   .byte  102,15,97,200                       // punpcklwd     %xmm0,%xmm1
   .byte  102,68,15,105,200                   // punpckhwd     %xmm0,%xmm9
-  .byte  102,68,15,111,5,204,44,0,0          // movdqa        0x2ccc(%rip),%xmm8        # 4c30 <_sk_callback_sse41+0x5f4>
+  .byte  102,68,15,111,5,124,45,0,0          // movdqa        0x2d7c(%rip),%xmm8        # 4ce0 <_sk_callback_sse41+0x5ea>
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,65,15,219,192                   // pand          %xmm8,%xmm0
   .byte  102,15,56,51,192                    // pmovzxwd      %xmm0,%xmm0
@@ -22624,7 +22768,7 @@ _sk_load_tables_u16_be_sse41:
   .byte  243,67,15,16,20,8                   // movss         (%r8,%r9,1),%xmm2
   .byte  102,15,58,33,194,48                 // insertps      $0x30,%xmm2,%xmm0
   .byte  76,139,64,16                        // mov           0x10(%rax),%r8
-  .byte  102,15,56,0,13,127,44,0,0           // pshufb        0x2c7f(%rip),%xmm1        # 4c40 <_sk_callback_sse41+0x604>
+  .byte  102,15,56,0,13,47,45,0,0            // pshufb        0x2d2f(%rip),%xmm1        # 4cf0 <_sk_callback_sse41+0x5fa>
   .byte  102,15,56,51,201                    // pmovzxwd      %xmm1,%xmm1
   .byte  102,73,15,58,22,201,1               // pextrq        $0x1,%xmm1,%r9
   .byte  102,72,15,126,201                   // movq          %xmm1,%rcx
@@ -22660,7 +22804,7 @@ _sk_load_tables_u16_be_sse41:
   .byte  102,65,15,235,216                   // por           %xmm8,%xmm3
   .byte  102,15,56,51,219                    // pmovzxwd      %xmm3,%xmm3
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,205,43,0,0                 // mulps         0x2bcd(%rip),%xmm3        # 4c50 <_sk_callback_sse41+0x614>
+  .byte  15,89,29,125,44,0,0                 // mulps         0x2c7d(%rip),%xmm3        # 4d00 <_sk_callback_sse41+0x60a>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -22682,7 +22826,7 @@ _sk_load_tables_rgb_u16_be_sse41:
   .byte  102,68,15,97,200                    // punpcklwd     %xmm0,%xmm9
   .byte  102,15,111,202                      // movdqa        %xmm2,%xmm1
   .byte  102,65,15,97,201                    // punpcklwd     %xmm9,%xmm1
-  .byte  102,68,15,111,5,143,43,0,0          // movdqa        0x2b8f(%rip),%xmm8        # 4c60 <_sk_callback_sse41+0x624>
+  .byte  102,68,15,111,5,63,44,0,0           // movdqa        0x2c3f(%rip),%xmm8        # 4d10 <_sk_callback_sse41+0x61a>
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,65,15,219,192                   // pand          %xmm8,%xmm0
   .byte  102,15,56,51,192                    // pmovzxwd      %xmm0,%xmm0
@@ -22699,7 +22843,7 @@ _sk_load_tables_rgb_u16_be_sse41:
   .byte  243,67,15,16,28,8                   // movss         (%r8,%r9,1),%xmm3
   .byte  102,15,58,33,195,48                 // insertps      $0x30,%xmm3,%xmm0
   .byte  76,139,64,16                        // mov           0x10(%rax),%r8
-  .byte  102,15,56,0,13,66,43,0,0            // pshufb        0x2b42(%rip),%xmm1        # 4c70 <_sk_callback_sse41+0x634>
+  .byte  102,15,56,0,13,242,43,0,0           // pshufb        0x2bf2(%rip),%xmm1        # 4d20 <_sk_callback_sse41+0x62a>
   .byte  102,15,56,51,201                    // pmovzxwd      %xmm1,%xmm1
   .byte  102,73,15,58,22,201,1               // pextrq        $0x1,%xmm1,%r9
   .byte  102,72,15,126,201                   // movq          %xmm1,%rcx
@@ -22730,7 +22874,7 @@ _sk_load_tables_rgb_u16_be_sse41:
   .byte  243,65,15,16,28,8                   // movss         (%r8,%rcx,1),%xmm3
   .byte  102,15,58,33,211,48                 // insertps      $0x30,%xmm3,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,173,42,0,0                 // movaps        0x2aad(%rip),%xmm3        # 4c80 <_sk_callback_sse41+0x644>
+  .byte  15,40,29,93,43,0,0                  // movaps        0x2b5d(%rip),%xmm3        # 4d30 <_sk_callback_sse41+0x63a>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_byte_tables_sse41
@@ -22740,7 +22884,7 @@ _sk_byte_tables_sse41:
   .byte  65,86                               // push          %r14
   .byte  83                                  // push          %rbx
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,174,42,0,0               // movaps        0x2aae(%rip),%xmm8        # 4c90 <_sk_callback_sse41+0x654>
+  .byte  68,15,40,5,94,43,0,0                // movaps        0x2b5e(%rip),%xmm8        # 4d40 <_sk_callback_sse41+0x64a>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,91,192                       // cvtps2dq      %xmm0,%xmm0
   .byte  102,72,15,58,22,193,1               // pextrq        $0x1,%xmm0,%rcx
@@ -22759,7 +22903,7 @@ _sk_byte_tables_sse41:
   .byte  102,15,58,32,193,3                  // pinsrb        $0x3,%ecx,%xmm0
   .byte  102,15,56,49,192                    // pmovzxbd      %xmm0,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,13,95,42,0,0               // movaps        0x2a5f(%rip),%xmm9        # 4ca0 <_sk_callback_sse41+0x664>
+  .byte  68,15,40,13,15,43,0,0               // movaps        0x2b0f(%rip),%xmm9        # 4d50 <_sk_callback_sse41+0x65a>
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,91,201                       // cvtps2dq      %xmm1,%xmm1
@@ -22850,7 +22994,7 @@ _sk_byte_tables_rgb_sse41:
   .byte  102,15,58,32,193,3                  // pinsrb        $0x3,%ecx,%xmm0
   .byte  102,15,56,49,192                    // pmovzxbd      %xmm0,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,13,231,40,0,0              // movaps        0x28e7(%rip),%xmm9        # 4cb0 <_sk_callback_sse41+0x674>
+  .byte  68,15,40,13,151,41,0,0              // movaps        0x2997(%rip),%xmm9        # 4d60 <_sk_callback_sse41+0x66a>
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,91,201                       // cvtps2dq      %xmm1,%xmm1
@@ -23027,31 +23171,31 @@ _sk_parametric_r_sse41:
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,91,194                        // cvtdq2ps      %xmm10,%xmm8
-  .byte  68,15,89,5,62,38,0,0                // mulps         0x263e(%rip),%xmm8        # 4cc0 <_sk_callback_sse41+0x684>
-  .byte  68,15,84,21,70,38,0,0               // andps         0x2646(%rip),%xmm10        # 4cd0 <_sk_callback_sse41+0x694>
-  .byte  68,15,86,21,78,38,0,0               // orps          0x264e(%rip),%xmm10        # 4ce0 <_sk_callback_sse41+0x6a4>
-  .byte  68,15,88,5,86,38,0,0                // addps         0x2656(%rip),%xmm8        # 4cf0 <_sk_callback_sse41+0x6b4>
-  .byte  68,15,40,37,94,38,0,0               // movaps        0x265e(%rip),%xmm12        # 4d00 <_sk_callback_sse41+0x6c4>
+  .byte  68,15,89,5,238,38,0,0               // mulps         0x26ee(%rip),%xmm8        # 4d70 <_sk_callback_sse41+0x67a>
+  .byte  68,15,84,21,246,38,0,0              // andps         0x26f6(%rip),%xmm10        # 4d80 <_sk_callback_sse41+0x68a>
+  .byte  68,15,86,21,254,38,0,0              // orps          0x26fe(%rip),%xmm10        # 4d90 <_sk_callback_sse41+0x69a>
+  .byte  68,15,88,5,6,39,0,0                 // addps         0x2706(%rip),%xmm8        # 4da0 <_sk_callback_sse41+0x6aa>
+  .byte  68,15,40,37,14,39,0,0               // movaps        0x270e(%rip),%xmm12        # 4db0 <_sk_callback_sse41+0x6ba>
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  69,15,92,196                        // subps         %xmm12,%xmm8
-  .byte  68,15,88,21,94,38,0,0               // addps         0x265e(%rip),%xmm10        # 4d10 <_sk_callback_sse41+0x6d4>
-  .byte  68,15,40,37,102,38,0,0              // movaps        0x2666(%rip),%xmm12        # 4d20 <_sk_callback_sse41+0x6e4>
+  .byte  68,15,88,21,14,39,0,0               // addps         0x270e(%rip),%xmm10        # 4dc0 <_sk_callback_sse41+0x6ca>
+  .byte  68,15,40,37,22,39,0,0               // movaps        0x2716(%rip),%xmm12        # 4dd0 <_sk_callback_sse41+0x6da>
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,92,196                        // subps         %xmm12,%xmm8
   .byte  69,15,89,195                        // mulps         %xmm11,%xmm8
   .byte  102,69,15,58,8,208,1                // roundps       $0x1,%xmm8,%xmm10
   .byte  69,15,40,216                        // movaps        %xmm8,%xmm11
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
-  .byte  68,15,88,5,83,38,0,0                // addps         0x2653(%rip),%xmm8        # 4d30 <_sk_callback_sse41+0x6f4>
-  .byte  68,15,40,21,91,38,0,0               // movaps        0x265b(%rip),%xmm10        # 4d40 <_sk_callback_sse41+0x704>
+  .byte  68,15,88,5,3,39,0,0                 // addps         0x2703(%rip),%xmm8        # 4de0 <_sk_callback_sse41+0x6ea>
+  .byte  68,15,40,21,11,39,0,0               // movaps        0x270b(%rip),%xmm10        # 4df0 <_sk_callback_sse41+0x6fa>
   .byte  69,15,89,211                        // mulps         %xmm11,%xmm10
   .byte  69,15,92,194                        // subps         %xmm10,%xmm8
-  .byte  68,15,40,21,91,38,0,0               // movaps        0x265b(%rip),%xmm10        # 4d50 <_sk_callback_sse41+0x714>
+  .byte  68,15,40,21,11,39,0,0               // movaps        0x270b(%rip),%xmm10        # 4e00 <_sk_callback_sse41+0x70a>
   .byte  69,15,92,211                        // subps         %xmm11,%xmm10
-  .byte  68,15,40,29,95,38,0,0               // movaps        0x265f(%rip),%xmm11        # 4d60 <_sk_callback_sse41+0x724>
+  .byte  68,15,40,29,15,39,0,0               // movaps        0x270f(%rip),%xmm11        # 4e10 <_sk_callback_sse41+0x71a>
   .byte  69,15,94,218                        // divps         %xmm10,%xmm11
   .byte  69,15,88,216                        // addps         %xmm8,%xmm11
-  .byte  68,15,89,29,95,38,0,0               // mulps         0x265f(%rip),%xmm11        # 4d70 <_sk_callback_sse41+0x734>
+  .byte  68,15,89,29,15,39,0,0               // mulps         0x270f(%rip),%xmm11        # 4e20 <_sk_callback_sse41+0x72a>
   .byte  102,69,15,91,211                    // cvtps2dq      %xmm11,%xmm10
   .byte  243,68,15,16,64,20                  // movss         0x14(%rax),%xmm8
   .byte  69,15,198,192,0                     // shufps        $0x0,%xmm8,%xmm8
@@ -23059,7 +23203,7 @@ _sk_parametric_r_sse41:
   .byte  102,69,15,56,20,193                 // blendvps      %xmm0,%xmm9,%xmm8
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  68,15,95,192                        // maxps         %xmm0,%xmm8
-  .byte  68,15,93,5,70,38,0,0                // minps         0x2646(%rip),%xmm8        # 4d80 <_sk_callback_sse41+0x744>
+  .byte  68,15,93,5,246,38,0,0               // minps         0x26f6(%rip),%xmm8        # 4e30 <_sk_callback_sse41+0x73a>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  255,224                             // jmpq          *%rax
@@ -23089,31 +23233,31 @@ _sk_parametric_g_sse41:
   .byte  68,15,88,217                        // addps         %xmm1,%xmm11
   .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
   .byte  69,15,91,227                        // cvtdq2ps      %xmm11,%xmm12
-  .byte  68,15,89,37,231,37,0,0              // mulps         0x25e7(%rip),%xmm12        # 4d90 <_sk_callback_sse41+0x754>
-  .byte  68,15,84,29,239,37,0,0              // andps         0x25ef(%rip),%xmm11        # 4da0 <_sk_callback_sse41+0x764>
-  .byte  68,15,86,29,247,37,0,0              // orps          0x25f7(%rip),%xmm11        # 4db0 <_sk_callback_sse41+0x774>
-  .byte  68,15,88,37,255,37,0,0              // addps         0x25ff(%rip),%xmm12        # 4dc0 <_sk_callback_sse41+0x784>
-  .byte  15,40,13,8,38,0,0                   // movaps        0x2608(%rip),%xmm1        # 4dd0 <_sk_callback_sse41+0x794>
+  .byte  68,15,89,37,151,38,0,0              // mulps         0x2697(%rip),%xmm12        # 4e40 <_sk_callback_sse41+0x74a>
+  .byte  68,15,84,29,159,38,0,0              // andps         0x269f(%rip),%xmm11        # 4e50 <_sk_callback_sse41+0x75a>
+  .byte  68,15,86,29,167,38,0,0              // orps          0x26a7(%rip),%xmm11        # 4e60 <_sk_callback_sse41+0x76a>
+  .byte  68,15,88,37,175,38,0,0              // addps         0x26af(%rip),%xmm12        # 4e70 <_sk_callback_sse41+0x77a>
+  .byte  15,40,13,184,38,0,0                 // movaps        0x26b8(%rip),%xmm1        # 4e80 <_sk_callback_sse41+0x78a>
   .byte  65,15,89,203                        // mulps         %xmm11,%xmm1
   .byte  68,15,92,225                        // subps         %xmm1,%xmm12
-  .byte  68,15,88,29,8,38,0,0                // addps         0x2608(%rip),%xmm11        # 4de0 <_sk_callback_sse41+0x7a4>
-  .byte  15,40,13,17,38,0,0                  // movaps        0x2611(%rip),%xmm1        # 4df0 <_sk_callback_sse41+0x7b4>
+  .byte  68,15,88,29,184,38,0,0              // addps         0x26b8(%rip),%xmm11        # 4e90 <_sk_callback_sse41+0x79a>
+  .byte  15,40,13,193,38,0,0                 // movaps        0x26c1(%rip),%xmm1        # 4ea0 <_sk_callback_sse41+0x7aa>
   .byte  65,15,94,203                        // divps         %xmm11,%xmm1
   .byte  68,15,92,225                        // subps         %xmm1,%xmm12
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  102,69,15,58,8,212,1                // roundps       $0x1,%xmm12,%xmm10
   .byte  69,15,40,220                        // movaps        %xmm12,%xmm11
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
-  .byte  68,15,88,37,254,37,0,0              // addps         0x25fe(%rip),%xmm12        # 4e00 <_sk_callback_sse41+0x7c4>
-  .byte  15,40,13,7,38,0,0                   // movaps        0x2607(%rip),%xmm1        # 4e10 <_sk_callback_sse41+0x7d4>
+  .byte  68,15,88,37,174,38,0,0              // addps         0x26ae(%rip),%xmm12        # 4eb0 <_sk_callback_sse41+0x7ba>
+  .byte  15,40,13,183,38,0,0                 // movaps        0x26b7(%rip),%xmm1        # 4ec0 <_sk_callback_sse41+0x7ca>
   .byte  65,15,89,203                        // mulps         %xmm11,%xmm1
   .byte  68,15,92,225                        // subps         %xmm1,%xmm12
-  .byte  68,15,40,21,7,38,0,0                // movaps        0x2607(%rip),%xmm10        # 4e20 <_sk_callback_sse41+0x7e4>
+  .byte  68,15,40,21,183,38,0,0              // movaps        0x26b7(%rip),%xmm10        # 4ed0 <_sk_callback_sse41+0x7da>
   .byte  69,15,92,211                        // subps         %xmm11,%xmm10
-  .byte  15,40,13,12,38,0,0                  // movaps        0x260c(%rip),%xmm1        # 4e30 <_sk_callback_sse41+0x7f4>
+  .byte  15,40,13,188,38,0,0                 // movaps        0x26bc(%rip),%xmm1        # 4ee0 <_sk_callback_sse41+0x7ea>
   .byte  65,15,94,202                        // divps         %xmm10,%xmm1
   .byte  65,15,88,204                        // addps         %xmm12,%xmm1
-  .byte  15,89,13,13,38,0,0                  // mulps         0x260d(%rip),%xmm1        # 4e40 <_sk_callback_sse41+0x804>
+  .byte  15,89,13,189,38,0,0                 // mulps         0x26bd(%rip),%xmm1        # 4ef0 <_sk_callback_sse41+0x7fa>
   .byte  102,68,15,91,209                    // cvtps2dq      %xmm1,%xmm10
   .byte  243,15,16,72,20                     // movss         0x14(%rax),%xmm1
   .byte  15,198,201,0                        // shufps        $0x0,%xmm1,%xmm1
@@ -23121,7 +23265,7 @@ _sk_parametric_g_sse41:
   .byte  102,65,15,56,20,201                 // blendvps      %xmm0,%xmm9,%xmm1
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  15,95,200                           // maxps         %xmm0,%xmm1
-  .byte  15,93,13,248,37,0,0                 // minps         0x25f8(%rip),%xmm1        # 4e50 <_sk_callback_sse41+0x814>
+  .byte  15,93,13,168,38,0,0                 // minps         0x26a8(%rip),%xmm1        # 4f00 <_sk_callback_sse41+0x80a>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  255,224                             // jmpq          *%rax
@@ -23151,31 +23295,31 @@ _sk_parametric_b_sse41:
   .byte  68,15,88,218                        // addps         %xmm2,%xmm11
   .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
   .byte  69,15,91,227                        // cvtdq2ps      %xmm11,%xmm12
-  .byte  68,15,89,37,153,37,0,0              // mulps         0x2599(%rip),%xmm12        # 4e60 <_sk_callback_sse41+0x824>
-  .byte  68,15,84,29,161,37,0,0              // andps         0x25a1(%rip),%xmm11        # 4e70 <_sk_callback_sse41+0x834>
-  .byte  68,15,86,29,169,37,0,0              // orps          0x25a9(%rip),%xmm11        # 4e80 <_sk_callback_sse41+0x844>
-  .byte  68,15,88,37,177,37,0,0              // addps         0x25b1(%rip),%xmm12        # 4e90 <_sk_callback_sse41+0x854>
-  .byte  15,40,21,186,37,0,0                 // movaps        0x25ba(%rip),%xmm2        # 4ea0 <_sk_callback_sse41+0x864>
+  .byte  68,15,89,37,73,38,0,0               // mulps         0x2649(%rip),%xmm12        # 4f10 <_sk_callback_sse41+0x81a>
+  .byte  68,15,84,29,81,38,0,0               // andps         0x2651(%rip),%xmm11        # 4f20 <_sk_callback_sse41+0x82a>
+  .byte  68,15,86,29,89,38,0,0               // orps          0x2659(%rip),%xmm11        # 4f30 <_sk_callback_sse41+0x83a>
+  .byte  68,15,88,37,97,38,0,0               // addps         0x2661(%rip),%xmm12        # 4f40 <_sk_callback_sse41+0x84a>
+  .byte  15,40,21,106,38,0,0                 // movaps        0x266a(%rip),%xmm2        # 4f50 <_sk_callback_sse41+0x85a>
   .byte  65,15,89,211                        // mulps         %xmm11,%xmm2
   .byte  68,15,92,226                        // subps         %xmm2,%xmm12
-  .byte  68,15,88,29,186,37,0,0              // addps         0x25ba(%rip),%xmm11        # 4eb0 <_sk_callback_sse41+0x874>
-  .byte  15,40,21,195,37,0,0                 // movaps        0x25c3(%rip),%xmm2        # 4ec0 <_sk_callback_sse41+0x884>
+  .byte  68,15,88,29,106,38,0,0              // addps         0x266a(%rip),%xmm11        # 4f60 <_sk_callback_sse41+0x86a>
+  .byte  15,40,21,115,38,0,0                 // movaps        0x2673(%rip),%xmm2        # 4f70 <_sk_callback_sse41+0x87a>
   .byte  65,15,94,211                        // divps         %xmm11,%xmm2
   .byte  68,15,92,226                        // subps         %xmm2,%xmm12
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  102,69,15,58,8,212,1                // roundps       $0x1,%xmm12,%xmm10
   .byte  69,15,40,220                        // movaps        %xmm12,%xmm11
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
-  .byte  68,15,88,37,176,37,0,0              // addps         0x25b0(%rip),%xmm12        # 4ed0 <_sk_callback_sse41+0x894>
-  .byte  15,40,21,185,37,0,0                 // movaps        0x25b9(%rip),%xmm2        # 4ee0 <_sk_callback_sse41+0x8a4>
+  .byte  68,15,88,37,96,38,0,0               // addps         0x2660(%rip),%xmm12        # 4f80 <_sk_callback_sse41+0x88a>
+  .byte  15,40,21,105,38,0,0                 // movaps        0x2669(%rip),%xmm2        # 4f90 <_sk_callback_sse41+0x89a>
   .byte  65,15,89,211                        // mulps         %xmm11,%xmm2
   .byte  68,15,92,226                        // subps         %xmm2,%xmm12
-  .byte  68,15,40,21,185,37,0,0              // movaps        0x25b9(%rip),%xmm10        # 4ef0 <_sk_callback_sse41+0x8b4>
+  .byte  68,15,40,21,105,38,0,0              // movaps        0x2669(%rip),%xmm10        # 4fa0 <_sk_callback_sse41+0x8aa>
   .byte  69,15,92,211                        // subps         %xmm11,%xmm10
-  .byte  15,40,21,190,37,0,0                 // movaps        0x25be(%rip),%xmm2        # 4f00 <_sk_callback_sse41+0x8c4>
+  .byte  15,40,21,110,38,0,0                 // movaps        0x266e(%rip),%xmm2        # 4fb0 <_sk_callback_sse41+0x8ba>
   .byte  65,15,94,210                        // divps         %xmm10,%xmm2
   .byte  65,15,88,212                        // addps         %xmm12,%xmm2
-  .byte  15,89,21,191,37,0,0                 // mulps         0x25bf(%rip),%xmm2        # 4f10 <_sk_callback_sse41+0x8d4>
+  .byte  15,89,21,111,38,0,0                 // mulps         0x266f(%rip),%xmm2        # 4fc0 <_sk_callback_sse41+0x8ca>
   .byte  102,68,15,91,210                    // cvtps2dq      %xmm2,%xmm10
   .byte  243,15,16,80,20                     // movss         0x14(%rax),%xmm2
   .byte  15,198,210,0                        // shufps        $0x0,%xmm2,%xmm2
@@ -23183,7 +23327,7 @@ _sk_parametric_b_sse41:
   .byte  102,65,15,56,20,209                 // blendvps      %xmm0,%xmm9,%xmm2
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  15,95,208                           // maxps         %xmm0,%xmm2
-  .byte  15,93,21,170,37,0,0                 // minps         0x25aa(%rip),%xmm2        # 4f20 <_sk_callback_sse41+0x8e4>
+  .byte  15,93,21,90,38,0,0                  // minps         0x265a(%rip),%xmm2        # 4fd0 <_sk_callback_sse41+0x8da>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  255,224                             // jmpq          *%rax
@@ -23213,31 +23357,31 @@ _sk_parametric_a_sse41:
   .byte  68,15,88,219                        // addps         %xmm3,%xmm11
   .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
   .byte  69,15,91,227                        // cvtdq2ps      %xmm11,%xmm12
-  .byte  68,15,89,37,75,37,0,0               // mulps         0x254b(%rip),%xmm12        # 4f30 <_sk_callback_sse41+0x8f4>
-  .byte  68,15,84,29,83,37,0,0               // andps         0x2553(%rip),%xmm11        # 4f40 <_sk_callback_sse41+0x904>
-  .byte  68,15,86,29,91,37,0,0               // orps          0x255b(%rip),%xmm11        # 4f50 <_sk_callback_sse41+0x914>
-  .byte  68,15,88,37,99,37,0,0               // addps         0x2563(%rip),%xmm12        # 4f60 <_sk_callback_sse41+0x924>
-  .byte  15,40,29,108,37,0,0                 // movaps        0x256c(%rip),%xmm3        # 4f70 <_sk_callback_sse41+0x934>
+  .byte  68,15,89,37,251,37,0,0              // mulps         0x25fb(%rip),%xmm12        # 4fe0 <_sk_callback_sse41+0x8ea>
+  .byte  68,15,84,29,3,38,0,0                // andps         0x2603(%rip),%xmm11        # 4ff0 <_sk_callback_sse41+0x8fa>
+  .byte  68,15,86,29,11,38,0,0               // orps          0x260b(%rip),%xmm11        # 5000 <_sk_callback_sse41+0x90a>
+  .byte  68,15,88,37,19,38,0,0               // addps         0x2613(%rip),%xmm12        # 5010 <_sk_callback_sse41+0x91a>
+  .byte  15,40,29,28,38,0,0                  // movaps        0x261c(%rip),%xmm3        # 5020 <_sk_callback_sse41+0x92a>
   .byte  65,15,89,219                        // mulps         %xmm11,%xmm3
   .byte  68,15,92,227                        // subps         %xmm3,%xmm12
-  .byte  68,15,88,29,108,37,0,0              // addps         0x256c(%rip),%xmm11        # 4f80 <_sk_callback_sse41+0x944>
-  .byte  15,40,29,117,37,0,0                 // movaps        0x2575(%rip),%xmm3        # 4f90 <_sk_callback_sse41+0x954>
+  .byte  68,15,88,29,28,38,0,0               // addps         0x261c(%rip),%xmm11        # 5030 <_sk_callback_sse41+0x93a>
+  .byte  15,40,29,37,38,0,0                  // movaps        0x2625(%rip),%xmm3        # 5040 <_sk_callback_sse41+0x94a>
   .byte  65,15,94,219                        // divps         %xmm11,%xmm3
   .byte  68,15,92,227                        // subps         %xmm3,%xmm12
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  102,69,15,58,8,212,1                // roundps       $0x1,%xmm12,%xmm10
   .byte  69,15,40,220                        // movaps        %xmm12,%xmm11
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
-  .byte  68,15,88,37,98,37,0,0               // addps         0x2562(%rip),%xmm12        # 4fa0 <_sk_callback_sse41+0x964>
-  .byte  15,40,29,107,37,0,0                 // movaps        0x256b(%rip),%xmm3        # 4fb0 <_sk_callback_sse41+0x974>
+  .byte  68,15,88,37,18,38,0,0               // addps         0x2612(%rip),%xmm12        # 5050 <_sk_callback_sse41+0x95a>
+  .byte  15,40,29,27,38,0,0                  // movaps        0x261b(%rip),%xmm3        # 5060 <_sk_callback_sse41+0x96a>
   .byte  65,15,89,219                        // mulps         %xmm11,%xmm3
   .byte  68,15,92,227                        // subps         %xmm3,%xmm12
-  .byte  68,15,40,21,107,37,0,0              // movaps        0x256b(%rip),%xmm10        # 4fc0 <_sk_callback_sse41+0x984>
+  .byte  68,15,40,21,27,38,0,0               // movaps        0x261b(%rip),%xmm10        # 5070 <_sk_callback_sse41+0x97a>
   .byte  69,15,92,211                        // subps         %xmm11,%xmm10
-  .byte  15,40,29,112,37,0,0                 // movaps        0x2570(%rip),%xmm3        # 4fd0 <_sk_callback_sse41+0x994>
+  .byte  15,40,29,32,38,0,0                  // movaps        0x2620(%rip),%xmm3        # 5080 <_sk_callback_sse41+0x98a>
   .byte  65,15,94,218                        // divps         %xmm10,%xmm3
   .byte  65,15,88,220                        // addps         %xmm12,%xmm3
-  .byte  15,89,29,113,37,0,0                 // mulps         0x2571(%rip),%xmm3        # 4fe0 <_sk_callback_sse41+0x9a4>
+  .byte  15,89,29,33,38,0,0                  // mulps         0x2621(%rip),%xmm3        # 5090 <_sk_callback_sse41+0x99a>
   .byte  102,68,15,91,211                    // cvtps2dq      %xmm3,%xmm10
   .byte  243,15,16,88,20                     // movss         0x14(%rax),%xmm3
   .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
@@ -23245,7 +23389,7 @@ _sk_parametric_a_sse41:
   .byte  102,65,15,56,20,217                 // blendvps      %xmm0,%xmm9,%xmm3
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  15,95,216                           // maxps         %xmm0,%xmm3
-  .byte  15,93,29,92,37,0,0                  // minps         0x255c(%rip),%xmm3        # 4ff0 <_sk_callback_sse41+0x9b4>
+  .byte  15,93,29,12,38,0,0                  // minps         0x260c(%rip),%xmm3        # 50a0 <_sk_callback_sse41+0x9aa>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  255,224                             // jmpq          *%rax
@@ -23255,29 +23399,29 @@ HIDDEN _sk_lab_to_xyz_sse41
 FUNCTION(_sk_lab_to_xyz_sse41)
 _sk_lab_to_xyz_sse41:
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
-  .byte  68,15,89,5,88,37,0,0                // mulps         0x2558(%rip),%xmm8        # 5000 <_sk_callback_sse41+0x9c4>
-  .byte  68,15,40,13,96,37,0,0               // movaps        0x2560(%rip),%xmm9        # 5010 <_sk_callback_sse41+0x9d4>
+  .byte  68,15,89,5,8,38,0,0                 // mulps         0x2608(%rip),%xmm8        # 50b0 <_sk_callback_sse41+0x9ba>
+  .byte  68,15,40,13,16,38,0,0               // movaps        0x2610(%rip),%xmm9        # 50c0 <_sk_callback_sse41+0x9ca>
   .byte  65,15,89,201                        // mulps         %xmm9,%xmm1
-  .byte  15,40,5,101,37,0,0                  // movaps        0x2565(%rip),%xmm0        # 5020 <_sk_callback_sse41+0x9e4>
+  .byte  15,40,5,21,38,0,0                   // movaps        0x2615(%rip),%xmm0        # 50d0 <_sk_callback_sse41+0x9da>
   .byte  15,88,200                           // addps         %xmm0,%xmm1
   .byte  65,15,89,209                        // mulps         %xmm9,%xmm2
   .byte  15,88,208                           // addps         %xmm0,%xmm2
-  .byte  68,15,88,5,99,37,0,0                // addps         0x2563(%rip),%xmm8        # 5030 <_sk_callback_sse41+0x9f4>
-  .byte  68,15,89,5,107,37,0,0               // mulps         0x256b(%rip),%xmm8        # 5040 <_sk_callback_sse41+0xa04>
-  .byte  15,89,13,116,37,0,0                 // mulps         0x2574(%rip),%xmm1        # 5050 <_sk_callback_sse41+0xa14>
+  .byte  68,15,88,5,19,38,0,0                // addps         0x2613(%rip),%xmm8        # 50e0 <_sk_callback_sse41+0x9ea>
+  .byte  68,15,89,5,27,38,0,0                // mulps         0x261b(%rip),%xmm8        # 50f0 <_sk_callback_sse41+0x9fa>
+  .byte  15,89,13,36,38,0,0                  // mulps         0x2624(%rip),%xmm1        # 5100 <_sk_callback_sse41+0xa0a>
   .byte  65,15,88,200                        // addps         %xmm8,%xmm1
-  .byte  15,89,21,121,37,0,0                 // mulps         0x2579(%rip),%xmm2        # 5060 <_sk_callback_sse41+0xa24>
+  .byte  15,89,21,41,38,0,0                  // mulps         0x2629(%rip),%xmm2        # 5110 <_sk_callback_sse41+0xa1a>
   .byte  69,15,40,208                        // movaps        %xmm8,%xmm10
   .byte  68,15,92,210                        // subps         %xmm2,%xmm10
   .byte  68,15,40,217                        // movaps        %xmm1,%xmm11
   .byte  69,15,89,219                        // mulps         %xmm11,%xmm11
   .byte  68,15,89,217                        // mulps         %xmm1,%xmm11
-  .byte  68,15,40,13,109,37,0,0              // movaps        0x256d(%rip),%xmm9        # 5070 <_sk_callback_sse41+0xa34>
+  .byte  68,15,40,13,29,38,0,0               // movaps        0x261d(%rip),%xmm9        # 5120 <_sk_callback_sse41+0xa2a>
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  65,15,194,195,1                     // cmpltps       %xmm11,%xmm0
-  .byte  15,40,21,109,37,0,0                 // movaps        0x256d(%rip),%xmm2        # 5080 <_sk_callback_sse41+0xa44>
+  .byte  15,40,21,29,38,0,0                  // movaps        0x261d(%rip),%xmm2        # 5130 <_sk_callback_sse41+0xa3a>
   .byte  15,88,202                           // addps         %xmm2,%xmm1
-  .byte  68,15,40,37,114,37,0,0              // movaps        0x2572(%rip),%xmm12        # 5090 <_sk_callback_sse41+0xa54>
+  .byte  68,15,40,37,34,38,0,0               // movaps        0x2622(%rip),%xmm12        # 5140 <_sk_callback_sse41+0xa4a>
   .byte  65,15,89,204                        // mulps         %xmm12,%xmm1
   .byte  102,65,15,56,20,203                 // blendvps      %xmm0,%xmm11,%xmm1
   .byte  69,15,40,216                        // movaps        %xmm8,%xmm11
@@ -23296,8 +23440,8 @@ _sk_lab_to_xyz_sse41:
   .byte  65,15,89,212                        // mulps         %xmm12,%xmm2
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  102,65,15,56,20,211                 // blendvps      %xmm0,%xmm11,%xmm2
-  .byte  15,89,13,43,37,0,0                  // mulps         0x252b(%rip),%xmm1        # 50a0 <_sk_callback_sse41+0xa64>
-  .byte  15,89,21,52,37,0,0                  // mulps         0x2534(%rip),%xmm2        # 50b0 <_sk_callback_sse41+0xa74>
+  .byte  15,89,13,219,37,0,0                 // mulps         0x25db(%rip),%xmm1        # 5150 <_sk_callback_sse41+0xa5a>
+  .byte  15,89,21,228,37,0,0                 // mulps         0x25e4(%rip),%xmm2        # 5160 <_sk_callback_sse41+0xa6a>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,40,193                           // movaps        %xmm1,%xmm0
   .byte  65,15,40,200                        // movaps        %xmm8,%xmm1
@@ -23311,7 +23455,7 @@ _sk_load_a8_sse41:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,15,56,49,4,56                   // pmovzxbd      (%rax,%rdi,1),%xmm0
   .byte  15,91,216                           // cvtdq2ps      %xmm0,%xmm3
-  .byte  15,89,29,36,37,0,0                  // mulps         0x2524(%rip),%xmm3        # 50c0 <_sk_callback_sse41+0xa84>
+  .byte  15,89,29,212,37,0,0                 // mulps         0x25d4(%rip),%xmm3        # 5170 <_sk_callback_sse41+0xa7a>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  15,87,201                           // xorps         %xmm1,%xmm1
@@ -23344,7 +23488,7 @@ _sk_gather_a8_sse41:
   .byte  102,15,58,32,192,3                  // pinsrb        $0x3,%eax,%xmm0
   .byte  102,15,56,49,192                    // pmovzxbd      %xmm0,%xmm0
   .byte  15,91,216                           // cvtdq2ps      %xmm0,%xmm3
-  .byte  15,89,29,184,36,0,0                 // mulps         0x24b8(%rip),%xmm3        # 50d0 <_sk_callback_sse41+0xa94>
+  .byte  15,89,29,104,37,0,0                 // mulps         0x2568(%rip),%xmm3        # 5180 <_sk_callback_sse41+0xa8a>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
@@ -23357,7 +23501,7 @@ FUNCTION(_sk_store_a8_sse41)
 _sk_store_a8_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,172,36,0,0               // movaps        0x24ac(%rip),%xmm8        # 50e0 <_sk_callback_sse41+0xaa4>
+  .byte  68,15,40,5,92,37,0,0                // movaps        0x255c(%rip),%xmm8        # 5190 <_sk_callback_sse41+0xa9a>
   .byte  68,15,89,195                        // mulps         %xmm3,%xmm8
   .byte  102,69,15,91,192                    // cvtps2dq      %xmm8,%xmm8
   .byte  102,69,15,56,43,192                 // packusdw      %xmm8,%xmm8
@@ -23374,9 +23518,9 @@ _sk_load_g8_sse41:
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,15,56,49,4,56                   // pmovzxbd      (%rax,%rdi,1),%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,137,36,0,0                  // mulps         0x2489(%rip),%xmm0        # 50f0 <_sk_callback_sse41+0xab4>
+  .byte  15,89,5,57,37,0,0                   // mulps         0x2539(%rip),%xmm0        # 51a0 <_sk_callback_sse41+0xaaa>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,144,36,0,0                 // movaps        0x2490(%rip),%xmm3        # 5100 <_sk_callback_sse41+0xac4>
+  .byte  15,40,29,64,37,0,0                  // movaps        0x2540(%rip),%xmm3        # 51b0 <_sk_callback_sse41+0xaba>
   .byte  15,40,200                           // movaps        %xmm0,%xmm1
   .byte  15,40,208                           // movaps        %xmm0,%xmm2
   .byte  255,224                             // jmpq          *%rax
@@ -23407,9 +23551,9 @@ _sk_gather_g8_sse41:
   .byte  102,15,58,32,192,3                  // pinsrb        $0x3,%eax,%xmm0
   .byte  102,15,56,49,192                    // pmovzxbd      %xmm0,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,41,36,0,0                   // mulps         0x2429(%rip),%xmm0        # 5110 <_sk_callback_sse41+0xad4>
+  .byte  15,89,5,217,36,0,0                  // mulps         0x24d9(%rip),%xmm0        # 51c0 <_sk_callback_sse41+0xaca>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,48,36,0,0                  // movaps        0x2430(%rip),%xmm3        # 5120 <_sk_callback_sse41+0xae4>
+  .byte  15,40,29,224,36,0,0                 // movaps        0x24e0(%rip),%xmm3        # 51d0 <_sk_callback_sse41+0xada>
   .byte  15,40,200                           // movaps        %xmm0,%xmm1
   .byte  15,40,208                           // movaps        %xmm0,%xmm2
   .byte  255,224                             // jmpq          *%rax
@@ -23454,17 +23598,17 @@ _sk_gather_i8_sse41:
   .byte  102,15,58,34,28,8,1                 // pinsrd        $0x1,(%rax,%rcx,1),%xmm3
   .byte  102,66,15,58,34,28,144,2            // pinsrd        $0x2,(%rax,%r10,4),%xmm3
   .byte  102,66,15,58,34,28,8,3              // pinsrd        $0x3,(%rax,%r9,1),%xmm3
-  .byte  102,15,111,5,135,35,0,0             // movdqa        0x2387(%rip),%xmm0        # 5130 <_sk_callback_sse41+0xaf4>
+  .byte  102,15,111,5,55,36,0,0              // movdqa        0x2437(%rip),%xmm0        # 51e0 <_sk_callback_sse41+0xaea>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,136,35,0,0               // movaps        0x2388(%rip),%xmm8        # 5140 <_sk_callback_sse41+0xb04>
+  .byte  68,15,40,5,56,36,0,0                // movaps        0x2438(%rip),%xmm8        # 51f0 <_sk_callback_sse41+0xafa>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
-  .byte  102,15,56,0,13,135,35,0,0           // pshufb        0x2387(%rip),%xmm1        # 5150 <_sk_callback_sse41+0xb14>
+  .byte  102,15,56,0,13,55,36,0,0            // pshufb        0x2437(%rip),%xmm1        # 5200 <_sk_callback_sse41+0xb0a>
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,111,211                      // movdqa        %xmm3,%xmm2
-  .byte  102,15,56,0,21,131,35,0,0           // pshufb        0x2383(%rip),%xmm2        # 5160 <_sk_callback_sse41+0xb24>
+  .byte  102,15,56,0,21,51,36,0,0            // pshufb        0x2433(%rip),%xmm2        # 5210 <_sk_callback_sse41+0xb1a>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  102,15,114,211,24                   // psrld         $0x18,%xmm3
@@ -23480,19 +23624,19 @@ _sk_load_565_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,15,56,51,20,120                 // pmovzxwd      (%rax,%rdi,2),%xmm2
-  .byte  102,15,111,5,105,35,0,0             // movdqa        0x2369(%rip),%xmm0        # 5170 <_sk_callback_sse41+0xb34>
+  .byte  102,15,111,5,25,36,0,0              // movdqa        0x2419(%rip),%xmm0        # 5220 <_sk_callback_sse41+0xb2a>
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,107,35,0,0                  // mulps         0x236b(%rip),%xmm0        # 5180 <_sk_callback_sse41+0xb44>
-  .byte  102,15,111,13,115,35,0,0            // movdqa        0x2373(%rip),%xmm1        # 5190 <_sk_callback_sse41+0xb54>
+  .byte  15,89,5,27,36,0,0                   // mulps         0x241b(%rip),%xmm0        # 5230 <_sk_callback_sse41+0xb3a>
+  .byte  102,15,111,13,35,36,0,0             // movdqa        0x2423(%rip),%xmm1        # 5240 <_sk_callback_sse41+0xb4a>
   .byte  102,15,219,202                      // pand          %xmm2,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,117,35,0,0                 // mulps         0x2375(%rip),%xmm1        # 51a0 <_sk_callback_sse41+0xb64>
-  .byte  102,15,219,21,125,35,0,0            // pand          0x237d(%rip),%xmm2        # 51b0 <_sk_callback_sse41+0xb74>
+  .byte  15,89,13,37,36,0,0                  // mulps         0x2425(%rip),%xmm1        # 5250 <_sk_callback_sse41+0xb5a>
+  .byte  102,15,219,21,45,36,0,0             // pand          0x242d(%rip),%xmm2        # 5260 <_sk_callback_sse41+0xb6a>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,131,35,0,0                 // mulps         0x2383(%rip),%xmm2        # 51c0 <_sk_callback_sse41+0xb84>
+  .byte  15,89,21,51,36,0,0                  // mulps         0x2433(%rip),%xmm2        # 5270 <_sk_callback_sse41+0xb7a>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,138,35,0,0                 // movaps        0x238a(%rip),%xmm3        # 51d0 <_sk_callback_sse41+0xb94>
+  .byte  15,40,29,58,36,0,0                  // movaps        0x243a(%rip),%xmm3        # 5280 <_sk_callback_sse41+0xb8a>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_gather_565_sse41
@@ -23520,19 +23664,19 @@ _sk_gather_565_sse41:
   .byte  65,15,183,4,65                      // movzwl        (%r9,%rax,2),%eax
   .byte  102,15,196,192,3                    // pinsrw        $0x3,%eax,%xmm0
   .byte  102,15,56,51,208                    // pmovzxwd      %xmm0,%xmm2
-  .byte  102,15,111,5,47,35,0,0              // movdqa        0x232f(%rip),%xmm0        # 51e0 <_sk_callback_sse41+0xba4>
+  .byte  102,15,111,5,223,35,0,0             // movdqa        0x23df(%rip),%xmm0        # 5290 <_sk_callback_sse41+0xb9a>
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,49,35,0,0                   // mulps         0x2331(%rip),%xmm0        # 51f0 <_sk_callback_sse41+0xbb4>
-  .byte  102,15,111,13,57,35,0,0             // movdqa        0x2339(%rip),%xmm1        # 5200 <_sk_callback_sse41+0xbc4>
+  .byte  15,89,5,225,35,0,0                  // mulps         0x23e1(%rip),%xmm0        # 52a0 <_sk_callback_sse41+0xbaa>
+  .byte  102,15,111,13,233,35,0,0            // movdqa        0x23e9(%rip),%xmm1        # 52b0 <_sk_callback_sse41+0xbba>
   .byte  102,15,219,202                      // pand          %xmm2,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,59,35,0,0                  // mulps         0x233b(%rip),%xmm1        # 5210 <_sk_callback_sse41+0xbd4>
-  .byte  102,15,219,21,67,35,0,0             // pand          0x2343(%rip),%xmm2        # 5220 <_sk_callback_sse41+0xbe4>
+  .byte  15,89,13,235,35,0,0                 // mulps         0x23eb(%rip),%xmm1        # 52c0 <_sk_callback_sse41+0xbca>
+  .byte  102,15,219,21,243,35,0,0            // pand          0x23f3(%rip),%xmm2        # 52d0 <_sk_callback_sse41+0xbda>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,73,35,0,0                  // mulps         0x2349(%rip),%xmm2        # 5230 <_sk_callback_sse41+0xbf4>
+  .byte  15,89,21,249,35,0,0                 // mulps         0x23f9(%rip),%xmm2        # 52e0 <_sk_callback_sse41+0xbea>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,80,35,0,0                  // movaps        0x2350(%rip),%xmm3        # 5240 <_sk_callback_sse41+0xc04>
+  .byte  15,40,29,0,36,0,0                   // movaps        0x2400(%rip),%xmm3        # 52f0 <_sk_callback_sse41+0xbfa>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_store_565_sse41
@@ -23541,12 +23685,12 @@ FUNCTION(_sk_store_565_sse41)
 _sk_store_565_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,81,35,0,0                // movaps        0x2351(%rip),%xmm8        # 5250 <_sk_callback_sse41+0xc14>
+  .byte  68,15,40,5,1,36,0,0                 // movaps        0x2401(%rip),%xmm8        # 5300 <_sk_callback_sse41+0xc0a>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
   .byte  102,65,15,114,241,11                // pslld         $0xb,%xmm9
-  .byte  68,15,40,21,70,35,0,0               // movaps        0x2346(%rip),%xmm10        # 5260 <_sk_callback_sse41+0xc24>
+  .byte  68,15,40,21,246,35,0,0              // movaps        0x23f6(%rip),%xmm10        # 5310 <_sk_callback_sse41+0xc1a>
   .byte  68,15,89,209                        // mulps         %xmm1,%xmm10
   .byte  102,69,15,91,210                    // cvtps2dq      %xmm10,%xmm10
   .byte  102,65,15,114,242,5                 // pslld         $0x5,%xmm10
@@ -23566,21 +23710,21 @@ _sk_load_4444_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  102,15,56,51,28,120                 // pmovzxwd      (%rax,%rdi,2),%xmm3
-  .byte  102,15,111,5,17,35,0,0              // movdqa        0x2311(%rip),%xmm0        # 5270 <_sk_callback_sse41+0xc34>
+  .byte  102,15,111,5,193,35,0,0             // movdqa        0x23c1(%rip),%xmm0        # 5320 <_sk_callback_sse41+0xc2a>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,19,35,0,0                   // mulps         0x2313(%rip),%xmm0        # 5280 <_sk_callback_sse41+0xc44>
-  .byte  102,15,111,13,27,35,0,0             // movdqa        0x231b(%rip),%xmm1        # 5290 <_sk_callback_sse41+0xc54>
+  .byte  15,89,5,195,35,0,0                  // mulps         0x23c3(%rip),%xmm0        # 5330 <_sk_callback_sse41+0xc3a>
+  .byte  102,15,111,13,203,35,0,0            // movdqa        0x23cb(%rip),%xmm1        # 5340 <_sk_callback_sse41+0xc4a>
   .byte  102,15,219,203                      // pand          %xmm3,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,29,35,0,0                  // mulps         0x231d(%rip),%xmm1        # 52a0 <_sk_callback_sse41+0xc64>
-  .byte  102,15,111,21,37,35,0,0             // movdqa        0x2325(%rip),%xmm2        # 52b0 <_sk_callback_sse41+0xc74>
+  .byte  15,89,13,205,35,0,0                 // mulps         0x23cd(%rip),%xmm1        # 5350 <_sk_callback_sse41+0xc5a>
+  .byte  102,15,111,21,213,35,0,0            // movdqa        0x23d5(%rip),%xmm2        # 5360 <_sk_callback_sse41+0xc6a>
   .byte  102,15,219,211                      // pand          %xmm3,%xmm2
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,39,35,0,0                  // mulps         0x2327(%rip),%xmm2        # 52c0 <_sk_callback_sse41+0xc84>
-  .byte  102,15,219,29,47,35,0,0             // pand          0x232f(%rip),%xmm3        # 52d0 <_sk_callback_sse41+0xc94>
+  .byte  15,89,21,215,35,0,0                 // mulps         0x23d7(%rip),%xmm2        # 5370 <_sk_callback_sse41+0xc7a>
+  .byte  102,15,219,29,223,35,0,0            // pand          0x23df(%rip),%xmm3        # 5380 <_sk_callback_sse41+0xc8a>
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,53,35,0,0                  // mulps         0x2335(%rip),%xmm3        # 52e0 <_sk_callback_sse41+0xca4>
+  .byte  15,89,29,229,35,0,0                 // mulps         0x23e5(%rip),%xmm3        # 5390 <_sk_callback_sse41+0xc9a>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -23609,21 +23753,21 @@ _sk_gather_4444_sse41:
   .byte  65,15,183,4,65                      // movzwl        (%r9,%rax,2),%eax
   .byte  102,15,196,192,3                    // pinsrw        $0x3,%eax,%xmm0
   .byte  102,15,56,51,216                    // pmovzxwd      %xmm0,%xmm3
-  .byte  102,15,111,5,216,34,0,0             // movdqa        0x22d8(%rip),%xmm0        # 52f0 <_sk_callback_sse41+0xcb4>
+  .byte  102,15,111,5,136,35,0,0             // movdqa        0x2388(%rip),%xmm0        # 53a0 <_sk_callback_sse41+0xcaa>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,218,34,0,0                  // mulps         0x22da(%rip),%xmm0        # 5300 <_sk_callback_sse41+0xcc4>
-  .byte  102,15,111,13,226,34,0,0            // movdqa        0x22e2(%rip),%xmm1        # 5310 <_sk_callback_sse41+0xcd4>
+  .byte  15,89,5,138,35,0,0                  // mulps         0x238a(%rip),%xmm0        # 53b0 <_sk_callback_sse41+0xcba>
+  .byte  102,15,111,13,146,35,0,0            // movdqa        0x2392(%rip),%xmm1        # 53c0 <_sk_callback_sse41+0xcca>
   .byte  102,15,219,203                      // pand          %xmm3,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,228,34,0,0                 // mulps         0x22e4(%rip),%xmm1        # 5320 <_sk_callback_sse41+0xce4>
-  .byte  102,15,111,21,236,34,0,0            // movdqa        0x22ec(%rip),%xmm2        # 5330 <_sk_callback_sse41+0xcf4>
+  .byte  15,89,13,148,35,0,0                 // mulps         0x2394(%rip),%xmm1        # 53d0 <_sk_callback_sse41+0xcda>
+  .byte  102,15,111,21,156,35,0,0            // movdqa        0x239c(%rip),%xmm2        # 53e0 <_sk_callback_sse41+0xcea>
   .byte  102,15,219,211                      // pand          %xmm3,%xmm2
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,238,34,0,0                 // mulps         0x22ee(%rip),%xmm2        # 5340 <_sk_callback_sse41+0xd04>
-  .byte  102,15,219,29,246,34,0,0            // pand          0x22f6(%rip),%xmm3        # 5350 <_sk_callback_sse41+0xd14>
+  .byte  15,89,21,158,35,0,0                 // mulps         0x239e(%rip),%xmm2        # 53f0 <_sk_callback_sse41+0xcfa>
+  .byte  102,15,219,29,166,35,0,0            // pand          0x23a6(%rip),%xmm3        # 5400 <_sk_callback_sse41+0xd0a>
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,252,34,0,0                 // mulps         0x22fc(%rip),%xmm3        # 5360 <_sk_callback_sse41+0xd24>
+  .byte  15,89,29,172,35,0,0                 // mulps         0x23ac(%rip),%xmm3        # 5410 <_sk_callback_sse41+0xd1a>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -23633,7 +23777,7 @@ FUNCTION(_sk_store_4444_sse41)
 _sk_store_4444_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,251,34,0,0               // movaps        0x22fb(%rip),%xmm8        # 5370 <_sk_callback_sse41+0xd34>
+  .byte  68,15,40,5,171,35,0,0               // movaps        0x23ab(%rip),%xmm8        # 5420 <_sk_callback_sse41+0xd2a>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
@@ -23663,17 +23807,17 @@ _sk_load_8888_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  15,16,28,184                        // movups        (%rax,%rdi,4),%xmm3
-  .byte  15,40,5,154,34,0,0                  // movaps        0x229a(%rip),%xmm0        # 5380 <_sk_callback_sse41+0xd44>
+  .byte  15,40,5,74,35,0,0                   // movaps        0x234a(%rip),%xmm0        # 5430 <_sk_callback_sse41+0xd3a>
   .byte  15,84,195                           // andps         %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,156,34,0,0               // movaps        0x229c(%rip),%xmm8        # 5390 <_sk_callback_sse41+0xd54>
+  .byte  68,15,40,5,76,35,0,0                // movaps        0x234c(%rip),%xmm8        # 5440 <_sk_callback_sse41+0xd4a>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,40,203                           // movaps        %xmm3,%xmm1
-  .byte  102,15,56,0,13,156,34,0,0           // pshufb        0x229c(%rip),%xmm1        # 53a0 <_sk_callback_sse41+0xd64>
+  .byte  102,15,56,0,13,76,35,0,0            // pshufb        0x234c(%rip),%xmm1        # 5450 <_sk_callback_sse41+0xd5a>
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  15,40,211                           // movaps        %xmm3,%xmm2
-  .byte  102,15,56,0,21,153,34,0,0           // pshufb        0x2299(%rip),%xmm2        # 53b0 <_sk_callback_sse41+0xd74>
+  .byte  102,15,56,0,21,73,35,0,0            // pshufb        0x2349(%rip),%xmm2        # 5460 <_sk_callback_sse41+0xd6a>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  102,15,114,211,24                   // psrld         $0x18,%xmm3
@@ -23704,17 +23848,17 @@ _sk_gather_8888_sse41:
   .byte  102,65,15,58,34,28,129,1            // pinsrd        $0x1,(%r9,%rax,4),%xmm3
   .byte  102,67,15,58,34,28,145,2            // pinsrd        $0x2,(%r9,%r10,4),%xmm3
   .byte  102,65,15,58,34,28,137,3            // pinsrd        $0x3,(%r9,%rcx,4),%xmm3
-  .byte  102,15,111,5,50,34,0,0              // movdqa        0x2232(%rip),%xmm0        # 53c0 <_sk_callback_sse41+0xd84>
+  .byte  102,15,111,5,226,34,0,0             // movdqa        0x22e2(%rip),%xmm0        # 5470 <_sk_callback_sse41+0xd7a>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,51,34,0,0                // movaps        0x2233(%rip),%xmm8        # 53d0 <_sk_callback_sse41+0xd94>
+  .byte  68,15,40,5,227,34,0,0               // movaps        0x22e3(%rip),%xmm8        # 5480 <_sk_callback_sse41+0xd8a>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
-  .byte  102,15,56,0,13,50,34,0,0            // pshufb        0x2232(%rip),%xmm1        # 53e0 <_sk_callback_sse41+0xda4>
+  .byte  102,15,56,0,13,226,34,0,0           // pshufb        0x22e2(%rip),%xmm1        # 5490 <_sk_callback_sse41+0xd9a>
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,111,211                      // movdqa        %xmm3,%xmm2
-  .byte  102,15,56,0,21,46,34,0,0            // pshufb        0x222e(%rip),%xmm2        # 53f0 <_sk_callback_sse41+0xdb4>
+  .byte  102,15,56,0,21,222,34,0,0           // pshufb        0x22de(%rip),%xmm2        # 54a0 <_sk_callback_sse41+0xdaa>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  102,15,114,211,24                   // psrld         $0x18,%xmm3
@@ -23729,7 +23873,7 @@ FUNCTION(_sk_store_8888_sse41)
 _sk_store_8888_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,26,34,0,0                // movaps        0x221a(%rip),%xmm8        # 5400 <_sk_callback_sse41+0xdc4>
+  .byte  68,15,40,5,202,34,0,0               // movaps        0x22ca(%rip),%xmm8        # 54b0 <_sk_callback_sse41+0xdba>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
@@ -23766,18 +23910,18 @@ _sk_load_f16_sse41:
   .byte  102,68,15,97,216                    // punpcklwd     %xmm0,%xmm11
   .byte  102,68,15,105,200                   // punpckhwd     %xmm0,%xmm9
   .byte  102,65,15,56,51,203                 // pmovzxwd      %xmm11,%xmm1
-  .byte  102,68,15,111,5,147,33,0,0          // movdqa        0x2193(%rip),%xmm8        # 5410 <_sk_callback_sse41+0xdd4>
+  .byte  102,68,15,111,5,67,34,0,0           // movdqa        0x2243(%rip),%xmm8        # 54c0 <_sk_callback_sse41+0xdca>
   .byte  102,15,111,209                      // movdqa        %xmm1,%xmm2
   .byte  102,65,15,219,208                   // pand          %xmm8,%xmm2
   .byte  102,15,239,202                      // pxor          %xmm2,%xmm1
-  .byte  102,15,111,29,142,33,0,0            // movdqa        0x218e(%rip),%xmm3        # 5420 <_sk_callback_sse41+0xde4>
+  .byte  102,15,111,29,62,34,0,0             // movdqa        0x223e(%rip),%xmm3        # 54d0 <_sk_callback_sse41+0xdda>
   .byte  102,15,114,242,16                   // pslld         $0x10,%xmm2
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,15,56,63,195                    // pmaxud        %xmm3,%xmm0
   .byte  102,15,118,193                      // pcmpeqd       %xmm1,%xmm0
   .byte  102,15,114,241,13                   // pslld         $0xd,%xmm1
   .byte  102,15,235,202                      // por           %xmm2,%xmm1
-  .byte  102,68,15,111,21,122,33,0,0         // movdqa        0x217a(%rip),%xmm10        # 5430 <_sk_callback_sse41+0xdf4>
+  .byte  102,68,15,111,21,42,34,0,0          // movdqa        0x222a(%rip),%xmm10        # 54e0 <_sk_callback_sse41+0xdea>
   .byte  102,65,15,254,202                   // paddd         %xmm10,%xmm1
   .byte  102,15,219,193                      // pand          %xmm1,%xmm0
   .byte  102,65,15,115,219,8                 // psrldq        $0x8,%xmm11
@@ -23850,18 +23994,18 @@ _sk_gather_f16_sse41:
   .byte  102,68,15,97,218                    // punpcklwd     %xmm2,%xmm11
   .byte  102,68,15,105,202                   // punpckhwd     %xmm2,%xmm9
   .byte  102,65,15,56,51,203                 // pmovzxwd      %xmm11,%xmm1
-  .byte  102,68,15,111,5,56,32,0,0           // movdqa        0x2038(%rip),%xmm8        # 5440 <_sk_callback_sse41+0xe04>
+  .byte  102,68,15,111,5,232,32,0,0          // movdqa        0x20e8(%rip),%xmm8        # 54f0 <_sk_callback_sse41+0xdfa>
   .byte  102,15,111,209                      // movdqa        %xmm1,%xmm2
   .byte  102,65,15,219,208                   // pand          %xmm8,%xmm2
   .byte  102,15,239,202                      // pxor          %xmm2,%xmm1
-  .byte  102,15,111,29,51,32,0,0             // movdqa        0x2033(%rip),%xmm3        # 5450 <_sk_callback_sse41+0xe14>
+  .byte  102,15,111,29,227,32,0,0            // movdqa        0x20e3(%rip),%xmm3        # 5500 <_sk_callback_sse41+0xe0a>
   .byte  102,15,114,242,16                   // pslld         $0x10,%xmm2
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,15,56,63,195                    // pmaxud        %xmm3,%xmm0
   .byte  102,15,118,193                      // pcmpeqd       %xmm1,%xmm0
   .byte  102,15,114,241,13                   // pslld         $0xd,%xmm1
   .byte  102,15,235,202                      // por           %xmm2,%xmm1
-  .byte  102,68,15,111,21,31,32,0,0          // movdqa        0x201f(%rip),%xmm10        # 5460 <_sk_callback_sse41+0xe24>
+  .byte  102,68,15,111,21,207,32,0,0         // movdqa        0x20cf(%rip),%xmm10        # 5510 <_sk_callback_sse41+0xe1a>
   .byte  102,65,15,254,202                   // paddd         %xmm10,%xmm1
   .byte  102,15,219,193                      // pand          %xmm1,%xmm0
   .byte  102,65,15,115,219,8                 // psrldq        $0x8,%xmm11
@@ -23909,17 +24053,17 @@ FUNCTION(_sk_store_f16_sse41)
 _sk_store_f16_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  102,68,15,111,21,85,31,0,0          // movdqa        0x1f55(%rip),%xmm10        # 5470 <_sk_callback_sse41+0xe34>
+  .byte  102,68,15,111,21,5,32,0,0           // movdqa        0x2005(%rip),%xmm10        # 5520 <_sk_callback_sse41+0xe2a>
   .byte  102,68,15,111,224                   // movdqa        %xmm0,%xmm12
   .byte  102,68,15,111,232                   // movdqa        %xmm0,%xmm13
   .byte  102,69,15,219,234                   // pand          %xmm10,%xmm13
   .byte  102,69,15,239,229                   // pxor          %xmm13,%xmm12
-  .byte  102,68,15,111,13,72,31,0,0          // movdqa        0x1f48(%rip),%xmm9        # 5480 <_sk_callback_sse41+0xe44>
+  .byte  102,68,15,111,13,248,31,0,0         // movdqa        0x1ff8(%rip),%xmm9        # 5530 <_sk_callback_sse41+0xe3a>
   .byte  102,65,15,114,213,16                // psrld         $0x10,%xmm13
   .byte  102,69,15,111,193                   // movdqa        %xmm9,%xmm8
   .byte  102,69,15,102,196                   // pcmpgtd       %xmm12,%xmm8
   .byte  102,65,15,114,212,13                // psrld         $0xd,%xmm12
-  .byte  102,68,15,111,29,57,31,0,0          // movdqa        0x1f39(%rip),%xmm11        # 5490 <_sk_callback_sse41+0xe54>
+  .byte  102,68,15,111,29,233,31,0,0         // movdqa        0x1fe9(%rip),%xmm11        # 5540 <_sk_callback_sse41+0xe4a>
   .byte  102,69,15,235,235                   // por           %xmm11,%xmm13
   .byte  102,69,15,254,236                   // paddd         %xmm12,%xmm13
   .byte  102,69,15,223,197                   // pandn         %xmm13,%xmm8
@@ -23989,7 +24133,7 @@ _sk_load_u16_be_sse41:
   .byte  102,15,235,200                      // por           %xmm0,%xmm1
   .byte  102,15,56,51,193                    // pmovzxwd      %xmm1,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,8,30,0,0                 // movaps        0x1e08(%rip),%xmm8        # 54a0 <_sk_callback_sse41+0xe64>
+  .byte  68,15,40,5,184,30,0,0               // movaps        0x1eb8(%rip),%xmm8        # 5550 <_sk_callback_sse41+0xe5a>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
   .byte  102,15,113,241,8                    // psllw         $0x8,%xmm1
@@ -24041,7 +24185,7 @@ _sk_load_rgb_u16_be_sse41:
   .byte  102,15,235,193                      // por           %xmm1,%xmm0
   .byte  102,15,56,51,192                    // pmovzxwd      %xmm0,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,73,29,0,0                // movaps        0x1d49(%rip),%xmm8        # 54b0 <_sk_callback_sse41+0xe74>
+  .byte  68,15,40,5,249,29,0,0               // movaps        0x1df9(%rip),%xmm8        # 5560 <_sk_callback_sse41+0xe6a>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
   .byte  102,15,113,241,8                    // psllw         $0x8,%xmm1
@@ -24058,7 +24202,7 @@ _sk_load_rgb_u16_be_sse41:
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,16,29,0,0                  // movaps        0x1d10(%rip),%xmm3        # 54c0 <_sk_callback_sse41+0xe84>
+  .byte  15,40,29,192,29,0,0                 // movaps        0x1dc0(%rip),%xmm3        # 5570 <_sk_callback_sse41+0xe7a>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_store_u16_be_sse41
@@ -24067,7 +24211,7 @@ FUNCTION(_sk_store_u16_be_sse41)
 _sk_store_u16_be_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,13,17,29,0,0               // movaps        0x1d11(%rip),%xmm9        # 54d0 <_sk_callback_sse41+0xe94>
+  .byte  68,15,40,13,193,29,0,0              // movaps        0x1dc1(%rip),%xmm9        # 5580 <_sk_callback_sse41+0xe8a>
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  102,69,15,91,192                    // cvtps2dq      %xmm8,%xmm8
@@ -24278,10 +24422,10 @@ HIDDEN _sk_luminance_to_alpha_sse41
 FUNCTION(_sk_luminance_to_alpha_sse41)
 _sk_luminance_to_alpha_sse41:
   .byte  15,40,218                           // movaps        %xmm2,%xmm3
-  .byte  15,89,5,109,26,0,0                  // mulps         0x1a6d(%rip),%xmm0        # 54e0 <_sk_callback_sse41+0xea4>
-  .byte  15,89,13,118,26,0,0                 // mulps         0x1a76(%rip),%xmm1        # 54f0 <_sk_callback_sse41+0xeb4>
+  .byte  15,89,5,29,27,0,0                   // mulps         0x1b1d(%rip),%xmm0        # 5590 <_sk_callback_sse41+0xe9a>
+  .byte  15,89,13,38,27,0,0                  // mulps         0x1b26(%rip),%xmm1        # 55a0 <_sk_callback_sse41+0xeaa>
   .byte  15,88,200                           // addps         %xmm0,%xmm1
-  .byte  15,89,29,124,26,0,0                 // mulps         0x1a7c(%rip),%xmm3        # 5500 <_sk_callback_sse41+0xec4>
+  .byte  15,89,29,44,27,0,0                  // mulps         0x1b2c(%rip),%xmm3        # 55b0 <_sk_callback_sse41+0xeba>
   .byte  15,88,217                           // addps         %xmm1,%xmm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
@@ -24456,6 +24600,56 @@ _sk_matrix_4x5_sse41:
   .byte  65,15,40,219                        // movaps        %xmm11,%xmm3
   .byte  255,224                             // jmpq          *%rax
 
+HIDDEN _sk_matrix_4x3_sse41
+.globl _sk_matrix_4x3_sse41
+FUNCTION(_sk_matrix_4x3_sse41)
+_sk_matrix_4x3_sse41:
+  .byte  68,15,40,201                        // movaps        %xmm1,%xmm9
+  .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  243,15,16,0                         // movss         (%rax),%xmm0
+  .byte  243,15,16,72,4                      // movss         0x4(%rax),%xmm1
+  .byte  15,198,192,0                        // shufps        $0x0,%xmm0,%xmm0
+  .byte  243,15,16,80,16                     // movss         0x10(%rax),%xmm2
+  .byte  15,198,210,0                        // shufps        $0x0,%xmm2,%xmm2
+  .byte  243,15,16,88,32                     // movss         0x20(%rax),%xmm3
+  .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
+  .byte  65,15,89,209                        // mulps         %xmm9,%xmm2
+  .byte  15,88,211                           // addps         %xmm3,%xmm2
+  .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
+  .byte  15,88,194                           // addps         %xmm2,%xmm0
+  .byte  15,198,201,0                        // shufps        $0x0,%xmm1,%xmm1
+  .byte  243,15,16,80,20                     // movss         0x14(%rax),%xmm2
+  .byte  15,198,210,0                        // shufps        $0x0,%xmm2,%xmm2
+  .byte  243,15,16,88,36                     // movss         0x24(%rax),%xmm3
+  .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
+  .byte  65,15,89,209                        // mulps         %xmm9,%xmm2
+  .byte  15,88,211                           // addps         %xmm3,%xmm2
+  .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
+  .byte  15,88,202                           // addps         %xmm2,%xmm1
+  .byte  243,15,16,80,8                      // movss         0x8(%rax),%xmm2
+  .byte  15,198,210,0                        // shufps        $0x0,%xmm2,%xmm2
+  .byte  243,15,16,88,24                     // movss         0x18(%rax),%xmm3
+  .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
+  .byte  243,68,15,16,80,40                  // movss         0x28(%rax),%xmm10
+  .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
+  .byte  65,15,89,217                        // mulps         %xmm9,%xmm3
+  .byte  65,15,88,218                        // addps         %xmm10,%xmm3
+  .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
+  .byte  15,88,211                           // addps         %xmm3,%xmm2
+  .byte  243,15,16,88,12                     // movss         0xc(%rax),%xmm3
+  .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
+  .byte  243,68,15,16,80,28                  // movss         0x1c(%rax),%xmm10
+  .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
+  .byte  243,68,15,16,88,44                  // movss         0x2c(%rax),%xmm11
+  .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
+  .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
+  .byte  69,15,88,211                        // addps         %xmm11,%xmm10
+  .byte  65,15,89,216                        // mulps         %xmm8,%xmm3
+  .byte  65,15,88,218                        // addps         %xmm10,%xmm3
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  255,224                             // jmpq          *%rax
+
 HIDDEN _sk_matrix_perspective_sse41
 .globl _sk_matrix_perspective_sse41
 FUNCTION(_sk_matrix_perspective_sse41)
@@ -24507,9 +24701,9 @@ _sk_evenly_spaced_gradient_sse41:
   .byte  72,139,8                            // mov           (%rax),%rcx
   .byte  76,139,88,8                         // mov           0x8(%rax),%r11
   .byte  72,255,201                          // dec           %rcx
-  .byte  120,7                               // js            3e03 <_sk_evenly_spaced_gradient_sse41+0x15>
+  .byte  120,7                               // js            3ebd <_sk_evenly_spaced_gradient_sse41+0x15>
   .byte  243,72,15,42,201                    // cvtsi2ss      %rcx,%xmm1
-  .byte  235,21                              // jmp           3e18 <_sk_evenly_spaced_gradient_sse41+0x2a>
+  .byte  235,21                              // jmp           3ed2 <_sk_evenly_spaced_gradient_sse41+0x2a>
   .byte  73,137,200                          // mov           %rcx,%r8
   .byte  73,209,232                          // shr           %r8
   .byte  131,225,1                           // and           $0x1,%ecx
@@ -24600,12 +24794,12 @@ _sk_gradient_sse41:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
   .byte  73,131,248,2                        // cmp           $0x2,%r8
-  .byte  114,50                              // jb            3ffb <_sk_gradient_sse41+0x41>
+  .byte  114,50                              // jb            40b5 <_sk_gradient_sse41+0x41>
   .byte  72,139,72,72                        // mov           0x48(%rax),%rcx
   .byte  73,255,200                          // dec           %r8
   .byte  72,131,193,4                        // add           $0x4,%rcx
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
-  .byte  15,40,21,49,21,0,0                  // movaps        0x1531(%rip),%xmm2        # 5510 <_sk_callback_sse41+0xed4>
+  .byte  15,40,21,39,21,0,0                  // movaps        0x1527(%rip),%xmm2        # 55c0 <_sk_callback_sse41+0xeca>
   .byte  243,15,16,25                        // movss         (%rcx),%xmm3
   .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
   .byte  15,194,216,2                        // cmpleps       %xmm0,%xmm3
@@ -24613,7 +24807,7 @@ _sk_gradient_sse41:
   .byte  102,15,254,203                      // paddd         %xmm3,%xmm1
   .byte  72,131,193,4                        // add           $0x4,%rcx
   .byte  73,255,200                          // dec           %r8
-  .byte  117,228                             // jne           3fdf <_sk_gradient_sse41+0x25>
+  .byte  117,228                             // jne           4099 <_sk_gradient_sse41+0x25>
   .byte  65,86                               // push          %r14
   .byte  83                                  // push          %rbx
   .byte  102,73,15,58,22,201,1               // pextrq        $0x1,%xmm1,%r9
@@ -24744,26 +24938,26 @@ _sk_xy_to_unit_angle_sse41:
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,40,236                        // movaps        %xmm12,%xmm13
   .byte  69,15,89,237                        // mulps         %xmm13,%xmm13
-  .byte  68,15,40,21,211,18,0,0              // movaps        0x12d3(%rip),%xmm10        # 5520 <_sk_callback_sse41+0xee4>
+  .byte  68,15,40,21,201,18,0,0              // movaps        0x12c9(%rip),%xmm10        # 55d0 <_sk_callback_sse41+0xeda>
   .byte  69,15,89,213                        // mulps         %xmm13,%xmm10
-  .byte  68,15,88,21,215,18,0,0              // addps         0x12d7(%rip),%xmm10        # 5530 <_sk_callback_sse41+0xef4>
+  .byte  68,15,88,21,205,18,0,0              // addps         0x12cd(%rip),%xmm10        # 55e0 <_sk_callback_sse41+0xeea>
   .byte  69,15,89,213                        // mulps         %xmm13,%xmm10
-  .byte  68,15,88,21,219,18,0,0              // addps         0x12db(%rip),%xmm10        # 5540 <_sk_callback_sse41+0xf04>
+  .byte  68,15,88,21,209,18,0,0              // addps         0x12d1(%rip),%xmm10        # 55f0 <_sk_callback_sse41+0xefa>
   .byte  69,15,89,213                        // mulps         %xmm13,%xmm10
-  .byte  68,15,88,21,223,18,0,0              // addps         0x12df(%rip),%xmm10        # 5550 <_sk_callback_sse41+0xf14>
+  .byte  68,15,88,21,213,18,0,0              // addps         0x12d5(%rip),%xmm10        # 5600 <_sk_callback_sse41+0xf0a>
   .byte  69,15,89,212                        // mulps         %xmm12,%xmm10
   .byte  65,15,194,195,1                     // cmpltps       %xmm11,%xmm0
-  .byte  68,15,40,29,222,18,0,0              // movaps        0x12de(%rip),%xmm11        # 5560 <_sk_callback_sse41+0xf24>
+  .byte  68,15,40,29,212,18,0,0              // movaps        0x12d4(%rip),%xmm11        # 5610 <_sk_callback_sse41+0xf1a>
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
   .byte  102,69,15,56,20,211                 // blendvps      %xmm0,%xmm11,%xmm10
   .byte  69,15,194,200,1                     // cmpltps       %xmm8,%xmm9
-  .byte  68,15,40,29,215,18,0,0              // movaps        0x12d7(%rip),%xmm11        # 5570 <_sk_callback_sse41+0xf34>
+  .byte  68,15,40,29,205,18,0,0              // movaps        0x12cd(%rip),%xmm11        # 5620 <_sk_callback_sse41+0xf2a>
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  102,69,15,56,20,211                 // blendvps      %xmm0,%xmm11,%xmm10
   .byte  15,40,193                           // movaps        %xmm1,%xmm0
   .byte  65,15,194,192,1                     // cmpltps       %xmm8,%xmm0
-  .byte  68,15,40,13,201,18,0,0              // movaps        0x12c9(%rip),%xmm9        # 5580 <_sk_callback_sse41+0xf44>
+  .byte  68,15,40,13,191,18,0,0              // movaps        0x12bf(%rip),%xmm9        # 5630 <_sk_callback_sse41+0xf3a>
   .byte  69,15,92,202                        // subps         %xmm10,%xmm9
   .byte  102,69,15,56,20,209                 // blendvps      %xmm0,%xmm9,%xmm10
   .byte  69,15,194,194,7                     // cmpordps      %xmm10,%xmm8
@@ -24789,7 +24983,7 @@ HIDDEN _sk_save_xy_sse41
 FUNCTION(_sk_save_xy_sse41)
 _sk_save_xy_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,157,18,0,0               // movaps        0x129d(%rip),%xmm8        # 5590 <_sk_callback_sse41+0xf54>
+  .byte  68,15,40,5,147,18,0,0               // movaps        0x1293(%rip),%xmm8        # 5640 <_sk_callback_sse41+0xf4a>
   .byte  15,17,0                             // movups        %xmm0,(%rax)
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,88,200                        // addps         %xmm8,%xmm9
@@ -24833,8 +25027,8 @@ _sk_bilinear_nx_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,31,18,0,0                   // addps         0x121f(%rip),%xmm0        # 55a0 <_sk_callback_sse41+0xf64>
-  .byte  68,15,40,13,39,18,0,0               // movaps        0x1227(%rip),%xmm9        # 55b0 <_sk_callback_sse41+0xf74>
+  .byte  15,88,5,21,18,0,0                   // addps         0x1215(%rip),%xmm0        # 5650 <_sk_callback_sse41+0xf5a>
+  .byte  68,15,40,13,29,18,0,0               // movaps        0x121d(%rip),%xmm9        # 5660 <_sk_callback_sse41+0xf6a>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,128,0,0,0              // movups        %xmm9,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -24847,7 +25041,7 @@ _sk_bilinear_px_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,22,18,0,0                   // addps         0x1216(%rip),%xmm0        # 55c0 <_sk_callback_sse41+0xf84>
+  .byte  15,88,5,12,18,0,0                   // addps         0x120c(%rip),%xmm0        # 5670 <_sk_callback_sse41+0xf7a>
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -24859,8 +25053,8 @@ _sk_bilinear_ny_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,8,18,0,0                   // addps         0x1208(%rip),%xmm1        # 55d0 <_sk_callback_sse41+0xf94>
-  .byte  68,15,40,13,16,18,0,0               // movaps        0x1210(%rip),%xmm9        # 55e0 <_sk_callback_sse41+0xfa4>
+  .byte  15,88,13,254,17,0,0                 // addps         0x11fe(%rip),%xmm1        # 5680 <_sk_callback_sse41+0xf8a>
+  .byte  68,15,40,13,6,18,0,0                // movaps        0x1206(%rip),%xmm9        # 5690 <_sk_callback_sse41+0xf9a>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,160,0,0,0              // movups        %xmm9,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -24873,7 +25067,7 @@ _sk_bilinear_py_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,254,17,0,0                 // addps         0x11fe(%rip),%xmm1        # 55f0 <_sk_callback_sse41+0xfb4>
+  .byte  15,88,13,244,17,0,0                 // addps         0x11f4(%rip),%xmm1        # 56a0 <_sk_callback_sse41+0xfaa>
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -24885,13 +25079,13 @@ _sk_bicubic_n3x_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,241,17,0,0                  // addps         0x11f1(%rip),%xmm0        # 5600 <_sk_callback_sse41+0xfc4>
-  .byte  68,15,40,13,249,17,0,0              // movaps        0x11f9(%rip),%xmm9        # 5610 <_sk_callback_sse41+0xfd4>
+  .byte  15,88,5,231,17,0,0                  // addps         0x11e7(%rip),%xmm0        # 56b0 <_sk_callback_sse41+0xfba>
+  .byte  68,15,40,13,239,17,0,0              // movaps        0x11ef(%rip),%xmm9        # 56c0 <_sk_callback_sse41+0xfca>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,245,17,0,0              // mulps         0x11f5(%rip),%xmm9        # 5620 <_sk_callback_sse41+0xfe4>
-  .byte  68,15,88,13,253,17,0,0              // addps         0x11fd(%rip),%xmm9        # 5630 <_sk_callback_sse41+0xff4>
+  .byte  68,15,89,13,235,17,0,0              // mulps         0x11eb(%rip),%xmm9        # 56d0 <_sk_callback_sse41+0xfda>
+  .byte  68,15,88,13,243,17,0,0              // addps         0x11f3(%rip),%xmm9        # 56e0 <_sk_callback_sse41+0xfea>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,128,0,0,0              // movups        %xmm9,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -24904,16 +25098,16 @@ _sk_bicubic_n1x_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,236,17,0,0                  // addps         0x11ec(%rip),%xmm0        # 5640 <_sk_callback_sse41+0x1004>
-  .byte  68,15,40,13,244,17,0,0              // movaps        0x11f4(%rip),%xmm9        # 5650 <_sk_callback_sse41+0x1014>
+  .byte  15,88,5,226,17,0,0                  // addps         0x11e2(%rip),%xmm0        # 56f0 <_sk_callback_sse41+0xffa>
+  .byte  68,15,40,13,234,17,0,0              // movaps        0x11ea(%rip),%xmm9        # 5700 <_sk_callback_sse41+0x100a>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,248,17,0,0               // movaps        0x11f8(%rip),%xmm8        # 5660 <_sk_callback_sse41+0x1024>
+  .byte  68,15,40,5,238,17,0,0               // movaps        0x11ee(%rip),%xmm8        # 5710 <_sk_callback_sse41+0x101a>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,252,17,0,0               // addps         0x11fc(%rip),%xmm8        # 5670 <_sk_callback_sse41+0x1034>
+  .byte  68,15,88,5,242,17,0,0               // addps         0x11f2(%rip),%xmm8        # 5720 <_sk_callback_sse41+0x102a>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,0,18,0,0                 // addps         0x1200(%rip),%xmm8        # 5680 <_sk_callback_sse41+0x1044>
+  .byte  68,15,88,5,246,17,0,0               // addps         0x11f6(%rip),%xmm8        # 5730 <_sk_callback_sse41+0x103a>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,4,18,0,0                 // addps         0x1204(%rip),%xmm8        # 5690 <_sk_callback_sse41+0x1054>
+  .byte  68,15,88,5,250,17,0,0               // addps         0x11fa(%rip),%xmm8        # 5740 <_sk_callback_sse41+0x104a>
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -24923,17 +25117,17 @@ HIDDEN _sk_bicubic_p1x_sse41
 FUNCTION(_sk_bicubic_p1x_sse41)
 _sk_bicubic_p1x_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,254,17,0,0               // movaps        0x11fe(%rip),%xmm8        # 56a0 <_sk_callback_sse41+0x1064>
+  .byte  68,15,40,5,244,17,0,0               // movaps        0x11f4(%rip),%xmm8        # 5750 <_sk_callback_sse41+0x105a>
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,72,64                      // movups        0x40(%rax),%xmm9
   .byte  65,15,88,192                        // addps         %xmm8,%xmm0
-  .byte  68,15,40,21,250,17,0,0              // movaps        0x11fa(%rip),%xmm10        # 56b0 <_sk_callback_sse41+0x1074>
+  .byte  68,15,40,21,240,17,0,0              // movaps        0x11f0(%rip),%xmm10        # 5760 <_sk_callback_sse41+0x106a>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,254,17,0,0              // addps         0x11fe(%rip),%xmm10        # 56c0 <_sk_callback_sse41+0x1084>
+  .byte  68,15,88,21,244,17,0,0              // addps         0x11f4(%rip),%xmm10        # 5770 <_sk_callback_sse41+0x107a>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,250,17,0,0              // addps         0x11fa(%rip),%xmm10        # 56d0 <_sk_callback_sse41+0x1094>
+  .byte  68,15,88,21,240,17,0,0              // addps         0x11f0(%rip),%xmm10        # 5780 <_sk_callback_sse41+0x108a>
   .byte  68,15,17,144,128,0,0,0              // movups        %xmm10,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -24945,11 +25139,11 @@ _sk_bicubic_p3x_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,237,17,0,0                  // addps         0x11ed(%rip),%xmm0        # 56e0 <_sk_callback_sse41+0x10a4>
+  .byte  15,88,5,227,17,0,0                  // addps         0x11e3(%rip),%xmm0        # 5790 <_sk_callback_sse41+0x109a>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,237,17,0,0               // mulps         0x11ed(%rip),%xmm8        # 56f0 <_sk_callback_sse41+0x10b4>
-  .byte  68,15,88,5,245,17,0,0               // addps         0x11f5(%rip),%xmm8        # 5700 <_sk_callback_sse41+0x10c4>
+  .byte  68,15,89,5,227,17,0,0               // mulps         0x11e3(%rip),%xmm8        # 57a0 <_sk_callback_sse41+0x10aa>
+  .byte  68,15,88,5,235,17,0,0               // addps         0x11eb(%rip),%xmm8        # 57b0 <_sk_callback_sse41+0x10ba>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -24962,13 +25156,13 @@ _sk_bicubic_n3y_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,227,17,0,0                 // addps         0x11e3(%rip),%xmm1        # 5710 <_sk_callback_sse41+0x10d4>
-  .byte  68,15,40,13,235,17,0,0              // movaps        0x11eb(%rip),%xmm9        # 5720 <_sk_callback_sse41+0x10e4>
+  .byte  15,88,13,217,17,0,0                 // addps         0x11d9(%rip),%xmm1        # 57c0 <_sk_callback_sse41+0x10ca>
+  .byte  68,15,40,13,225,17,0,0              // movaps        0x11e1(%rip),%xmm9        # 57d0 <_sk_callback_sse41+0x10da>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,231,17,0,0              // mulps         0x11e7(%rip),%xmm9        # 5730 <_sk_callback_sse41+0x10f4>
-  .byte  68,15,88,13,239,17,0,0              // addps         0x11ef(%rip),%xmm9        # 5740 <_sk_callback_sse41+0x1104>
+  .byte  68,15,89,13,221,17,0,0              // mulps         0x11dd(%rip),%xmm9        # 57e0 <_sk_callback_sse41+0x10ea>
+  .byte  68,15,88,13,229,17,0,0              // addps         0x11e5(%rip),%xmm9        # 57f0 <_sk_callback_sse41+0x10fa>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,160,0,0,0              // movups        %xmm9,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -24981,16 +25175,16 @@ _sk_bicubic_n1y_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,221,17,0,0                 // addps         0x11dd(%rip),%xmm1        # 5750 <_sk_callback_sse41+0x1114>
-  .byte  68,15,40,13,229,17,0,0              // movaps        0x11e5(%rip),%xmm9        # 5760 <_sk_callback_sse41+0x1124>
+  .byte  15,88,13,211,17,0,0                 // addps         0x11d3(%rip),%xmm1        # 5800 <_sk_callback_sse41+0x110a>
+  .byte  68,15,40,13,219,17,0,0              // movaps        0x11db(%rip),%xmm9        # 5810 <_sk_callback_sse41+0x111a>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,233,17,0,0               // movaps        0x11e9(%rip),%xmm8        # 5770 <_sk_callback_sse41+0x1134>
+  .byte  68,15,40,5,223,17,0,0               // movaps        0x11df(%rip),%xmm8        # 5820 <_sk_callback_sse41+0x112a>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,237,17,0,0               // addps         0x11ed(%rip),%xmm8        # 5780 <_sk_callback_sse41+0x1144>
+  .byte  68,15,88,5,227,17,0,0               // addps         0x11e3(%rip),%xmm8        # 5830 <_sk_callback_sse41+0x113a>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,241,17,0,0               // addps         0x11f1(%rip),%xmm8        # 5790 <_sk_callback_sse41+0x1154>
+  .byte  68,15,88,5,231,17,0,0               // addps         0x11e7(%rip),%xmm8        # 5840 <_sk_callback_sse41+0x114a>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,245,17,0,0               // addps         0x11f5(%rip),%xmm8        # 57a0 <_sk_callback_sse41+0x1164>
+  .byte  68,15,88,5,235,17,0,0               // addps         0x11eb(%rip),%xmm8        # 5850 <_sk_callback_sse41+0x115a>
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -25000,17 +25194,17 @@ HIDDEN _sk_bicubic_p1y_sse41
 FUNCTION(_sk_bicubic_p1y_sse41)
 _sk_bicubic_p1y_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,239,17,0,0               // movaps        0x11ef(%rip),%xmm8        # 57b0 <_sk_callback_sse41+0x1174>
+  .byte  68,15,40,5,229,17,0,0               // movaps        0x11e5(%rip),%xmm8        # 5860 <_sk_callback_sse41+0x116a>
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,72,96                      // movups        0x60(%rax),%xmm9
   .byte  65,15,88,200                        // addps         %xmm8,%xmm1
-  .byte  68,15,40,21,234,17,0,0              // movaps        0x11ea(%rip),%xmm10        # 57c0 <_sk_callback_sse41+0x1184>
+  .byte  68,15,40,21,224,17,0,0              // movaps        0x11e0(%rip),%xmm10        # 5870 <_sk_callback_sse41+0x117a>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,238,17,0,0              // addps         0x11ee(%rip),%xmm10        # 57d0 <_sk_callback_sse41+0x1194>
+  .byte  68,15,88,21,228,17,0,0              // addps         0x11e4(%rip),%xmm10        # 5880 <_sk_callback_sse41+0x118a>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,234,17,0,0              // addps         0x11ea(%rip),%xmm10        # 57e0 <_sk_callback_sse41+0x11a4>
+  .byte  68,15,88,21,224,17,0,0              // addps         0x11e0(%rip),%xmm10        # 5890 <_sk_callback_sse41+0x119a>
   .byte  68,15,17,144,160,0,0,0              // movups        %xmm10,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -25022,11 +25216,11 @@ _sk_bicubic_p3y_sse41:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,220,17,0,0                 // addps         0x11dc(%rip),%xmm1        # 57f0 <_sk_callback_sse41+0x11b4>
+  .byte  15,88,13,210,17,0,0                 // addps         0x11d2(%rip),%xmm1        # 58a0 <_sk_callback_sse41+0x11aa>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,220,17,0,0               // mulps         0x11dc(%rip),%xmm8        # 5800 <_sk_callback_sse41+0x11c4>
-  .byte  68,15,88,5,228,17,0,0               // addps         0x11e4(%rip),%xmm8        # 5810 <_sk_callback_sse41+0x11d4>
+  .byte  68,15,89,5,210,17,0,0               // mulps         0x11d2(%rip),%xmm8        # 58b0 <_sk_callback_sse41+0x11ba>
+  .byte  68,15,88,5,218,17,0,0               // addps         0x11da(%rip),%xmm8        # 58c0 <_sk_callback_sse41+0x11ca>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -25245,11 +25439,11 @@ BALIGN16
   .byte  128,191,0,0,128,191,0               // cmpb          $0x0,-0x40800000(%rdi)
   .byte  0,224                               // add           %ah,%al
   .byte  64,0,0                              // add           %al,(%rax)
-  .byte  224,64                              // loopne        48f8 <.literal16+0x1d8>
+  .byte  224,64                              // loopne        49a8 <.literal16+0x1d8>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,64                              // loopne        48fc <.literal16+0x1dc>
+  .byte  224,64                              // loopne        49ac <.literal16+0x1dc>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,64                              // loopne        4900 <.literal16+0x1e0>
+  .byte  224,64                              // loopne        49b0 <.literal16+0x1e0>
   .byte  154                                 // (bad)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
@@ -25269,13 +25463,13 @@ BALIGN16
   .byte  10,23                               // or            (%rdi),%dl
   .byte  63                                  // (bad)
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4921 <.literal16+0x201>
+  .byte  71,225,61                           // rex.RXB       loope 49d1 <.literal16+0x201>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4925 <.literal16+0x205>
+  .byte  71,225,61                           // rex.RXB       loope 49d5 <.literal16+0x205>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4929 <.literal16+0x209>
+  .byte  71,225,61                           // rex.RXB       loope 49d9 <.literal16+0x209>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 492d <.literal16+0x20d>
+  .byte  71,225,61                           // rex.RXB       loope 49dd <.literal16+0x20d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -25300,13 +25494,13 @@ BALIGN16
   .byte  10,23                               // or            (%rdi),%dl
   .byte  63                                  // (bad)
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4961 <.literal16+0x241>
+  .byte  71,225,61                           // rex.RXB       loope 4a11 <.literal16+0x241>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4965 <.literal16+0x245>
+  .byte  71,225,61                           // rex.RXB       loope 4a15 <.literal16+0x245>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4969 <.literal16+0x249>
+  .byte  71,225,61                           // rex.RXB       loope 4a19 <.literal16+0x249>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 496d <.literal16+0x24d>
+  .byte  71,225,61                           // rex.RXB       loope 4a1d <.literal16+0x24d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -25331,13 +25525,13 @@ BALIGN16
   .byte  10,23                               // or            (%rdi),%dl
   .byte  63                                  // (bad)
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 49a1 <.literal16+0x281>
+  .byte  71,225,61                           // rex.RXB       loope 4a51 <.literal16+0x281>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 49a5 <.literal16+0x285>
+  .byte  71,225,61                           // rex.RXB       loope 4a55 <.literal16+0x285>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 49a9 <.literal16+0x289>
+  .byte  71,225,61                           // rex.RXB       loope 4a59 <.literal16+0x289>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 49ad <.literal16+0x28d>
+  .byte  71,225,61                           // rex.RXB       loope 4a5d <.literal16+0x28d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -25362,13 +25556,13 @@ BALIGN16
   .byte  10,23                               // or            (%rdi),%dl
   .byte  63                                  // (bad)
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 49e1 <.literal16+0x2c1>
+  .byte  71,225,61                           // rex.RXB       loope 4a91 <.literal16+0x2c1>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 49e5 <.literal16+0x2c5>
+  .byte  71,225,61                           // rex.RXB       loope 4a95 <.literal16+0x2c5>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 49e9 <.literal16+0x2c9>
+  .byte  71,225,61                           // rex.RXB       loope 4a99 <.literal16+0x2c9>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 49ed <.literal16+0x2cd>
+  .byte  71,225,61                           // rex.RXB       loope 4a9d <.literal16+0x2cd>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -25584,13 +25778,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        4bb9 <.literal16+0x499>
+  .byte  224,7                               // loopne        4c69 <.literal16+0x499>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        4bbd <.literal16+0x49d>
+  .byte  224,7                               // loopne        4c6d <.literal16+0x49d>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        4bc1 <.literal16+0x4a1>
+  .byte  224,7                               // loopne        4c71 <.literal16+0x4a1>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        4bc5 <.literal16+0x4a5>
+  .byte  224,7                               // loopne        4c75 <.literal16+0x4a5>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -25624,10 +25818,10 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  1,255                               // add           %edi,%edi
   .byte  255                                 // (bad)
-  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004c08 <_sk_callback_sse41+0xa0005cc>
+  .byte  255,5,255,255,255,9                 // incl          0x9ffffff(%rip)        # a004cb8 <_sk_callback_sse41+0xa0005c2>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 3004c10 <_sk_callback_sse41+0x30005d4>
+  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 3004cc0 <_sk_callback_sse41+0x30005ca>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -25682,11 +25876,11 @@ BALIGN16
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,127,67                            // add           %bh,0x43(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            4cdb <.literal16+0x5bb>
+  .byte  127,67                              // jg            4d8b <.literal16+0x5bb>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            4cdf <.literal16+0x5bf>
+  .byte  127,67                              // jg            4d8f <.literal16+0x5bf>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            4ce3 <.literal16+0x5c3>
+  .byte  127,67                              // jg            4d93 <.literal16+0x5c3>
   .byte  129,128,128,59,129,128,128,59,129,128// addl          $0x80813b80,-0x7f7ec480(%rax)
   .byte  128,59,129                          // cmpb          $0x81,(%rbx)
   .byte  128,128,59,129,128,128,59           // addb          $0x3b,-0x7f7f7ec5(%rax)
@@ -25701,16 +25895,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4cd4 <.literal16+0x5b4>
+  .byte  127,0                               // jg            4d84 <.literal16+0x5b4>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4cd8 <.literal16+0x5b8>
+  .byte  127,0                               // jg            4d88 <.literal16+0x5b8>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4cdc <.literal16+0x5bc>
+  .byte  127,0                               // jg            4d8c <.literal16+0x5bc>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4ce0 <.literal16+0x5c0>
+  .byte  127,0                               // jg            4d90 <.literal16+0x5c0>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -25719,7 +25913,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            4d65 <.literal16+0x645>
+  .byte  119,115                             // ja            4e15 <.literal16+0x645>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -25730,7 +25924,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           4cc9 <.literal16+0x5a9>
+  .byte  117,191                             // jne           4d79 <.literal16+0x5a9>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -25742,7 +25936,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a38d0a <_sk_callback_sse41+0xffffffffe9a346ce>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a38dba <_sk_callback_sse41+0xffffffffe9a346c4>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  81                                  // push          %rcx
   .byte  140,242                             // mov           %?,%edx
@@ -25797,16 +25991,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4da4 <.literal16+0x684>
+  .byte  127,0                               // jg            4e54 <.literal16+0x684>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4da8 <.literal16+0x688>
+  .byte  127,0                               // jg            4e58 <.literal16+0x688>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4dac <.literal16+0x68c>
+  .byte  127,0                               // jg            4e5c <.literal16+0x68c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4db0 <.literal16+0x690>
+  .byte  127,0                               // jg            4e60 <.literal16+0x690>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -25815,7 +26009,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            4e35 <.literal16+0x715>
+  .byte  119,115                             // ja            4ee5 <.literal16+0x715>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -25826,7 +26020,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           4d99 <.literal16+0x679>
+  .byte  117,191                             // jne           4e49 <.literal16+0x679>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -25838,7 +26032,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a38dda <_sk_callback_sse41+0xffffffffe9a3479e>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a38e8a <_sk_callback_sse41+0xffffffffe9a34794>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  81                                  // push          %rcx
   .byte  140,242                             // mov           %?,%edx
@@ -25893,16 +26087,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4e74 <.literal16+0x754>
+  .byte  127,0                               // jg            4f24 <.literal16+0x754>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4e78 <.literal16+0x758>
+  .byte  127,0                               // jg            4f28 <.literal16+0x758>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4e7c <.literal16+0x75c>
+  .byte  127,0                               // jg            4f2c <.literal16+0x75c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4e80 <.literal16+0x760>
+  .byte  127,0                               // jg            4f30 <.literal16+0x760>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -25911,7 +26105,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            4f05 <.literal16+0x7e5>
+  .byte  119,115                             // ja            4fb5 <.literal16+0x7e5>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -25922,7 +26116,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           4e69 <.literal16+0x749>
+  .byte  117,191                             // jne           4f19 <.literal16+0x749>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -25934,7 +26128,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a38eaa <_sk_callback_sse41+0xffffffffe9a3486e>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a38f5a <_sk_callback_sse41+0xffffffffe9a34864>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  81                                  // push          %rcx
   .byte  140,242                             // mov           %?,%edx
@@ -25989,16 +26183,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4f44 <.literal16+0x824>
+  .byte  127,0                               // jg            4ff4 <.literal16+0x824>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4f48 <.literal16+0x828>
+  .byte  127,0                               // jg            4ff8 <.literal16+0x828>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4f4c <.literal16+0x82c>
+  .byte  127,0                               // jg            4ffc <.literal16+0x82c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            4f50 <.literal16+0x830>
+  .byte  127,0                               // jg            5000 <.literal16+0x830>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -26007,7 +26201,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            4fd5 <.literal16+0x8b5>
+  .byte  119,115                             // ja            5085 <.literal16+0x8b5>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -26018,7 +26212,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           4f39 <.literal16+0x819>
+  .byte  117,191                             // jne           4fe9 <.literal16+0x819>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -26030,7 +26224,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a38f7a <_sk_callback_sse41+0xffffffffe9a3493e>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3902a <_sk_callback_sse41+0xffffffffe9a34934>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  81                                  // push          %rcx
   .byte  140,242                             // mov           %?,%edx
@@ -26081,13 +26275,13 @@ BALIGN16
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
-  .byte  127,67                              // jg            5057 <.literal16+0x937>
+  .byte  127,67                              // jg            5107 <.literal16+0x937>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            505b <.literal16+0x93b>
+  .byte  127,67                              // jg            510b <.literal16+0x93b>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            505f <.literal16+0x93f>
+  .byte  127,67                              // jg            510f <.literal16+0x93f>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            5063 <.literal16+0x943>
+  .byte  127,67                              // jg            5113 <.literal16+0x943>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,195                               // add           %al,%bl
   .byte  0,0                                 // add           %al,(%rax)
@@ -26134,16 +26328,16 @@ BALIGN16
   .byte  128,3,62                            // addb          $0x3e,(%rbx)
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           50e3 <.literal16+0x9c3>
+  .byte  118,63                              // jbe           5193 <.literal16+0x9c3>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           50e7 <.literal16+0x9c7>
+  .byte  118,63                              // jbe           5197 <.literal16+0x9c7>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           50eb <.literal16+0x9cb>
+  .byte  118,63                              // jbe           519b <.literal16+0x9cb>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           50ef <.literal16+0x9cf>
+  .byte  118,63                              // jbe           519f <.literal16+0x9cf>
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
@@ -26155,11 +26349,11 @@ BALIGN16
   .byte  128,59,0                            // cmpb          $0x0,(%rbx)
   .byte  0,127,67                            // add           %bh,0x43(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            512b <.literal16+0xa0b>
+  .byte  127,67                              // jg            51db <.literal16+0xa0b>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            512f <.literal16+0xa0f>
+  .byte  127,67                              // jg            51df <.literal16+0xa0f>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            5133 <.literal16+0xa13>
+  .byte  127,67                              // jg            51e3 <.literal16+0xa13>
   .byte  129,128,128,59,129,128,128,59,129,128// addl          $0x80813b80,-0x7f7ec480(%rax)
   .byte  128,59,129                          // cmpb          $0x81,(%rbx)
   .byte  128,128,59,0,0,128,63               // addb          $0x3f,-0x7fffffc5(%rax)
@@ -26188,7 +26382,7 @@ BALIGN16
   .byte  5,255,255,255,9                     // add           $0x9ffffff,%eax
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 3005160 <_sk_callback_sse41+0x3000b24>
+  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 3005210 <_sk_callback_sse41+0x3000b1a>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -26217,13 +26411,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        5199 <.literal16+0xa79>
+  .byte  224,7                               // loopne        5249 <.literal16+0xa79>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        519d <.literal16+0xa7d>
+  .byte  224,7                               // loopne        524d <.literal16+0xa7d>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        51a1 <.literal16+0xa81>
+  .byte  224,7                               // loopne        5251 <.literal16+0xa81>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        51a5 <.literal16+0xa85>
+  .byte  224,7                               // loopne        5255 <.literal16+0xa85>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -26269,13 +26463,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        5209 <.literal16+0xae9>
+  .byte  224,7                               // loopne        52b9 <.literal16+0xae9>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        520d <.literal16+0xaed>
+  .byte  224,7                               // loopne        52bd <.literal16+0xaed>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        5211 <.literal16+0xaf1>
+  .byte  224,7                               // loopne        52c1 <.literal16+0xaf1>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        5215 <.literal16+0xaf5>
+  .byte  224,7                               // loopne        52c5 <.literal16+0xaf5>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -26313,13 +26507,13 @@ BALIGN16
   .byte  65,0,0                              // add           %al,(%r8)
   .byte  248                                 // clc
   .byte  65,0,0                              // add           %al,(%r8)
-  .byte  124,66                              // jl            52a6 <.literal16+0xb86>
+  .byte  124,66                              // jl            5356 <.literal16+0xb86>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            52aa <.literal16+0xb8a>
+  .byte  124,66                              // jl            535a <.literal16+0xb8a>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            52ae <.literal16+0xb8e>
+  .byte  124,66                              // jl            535e <.literal16+0xb8e>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            52b2 <.literal16+0xb92>
+  .byte  124,66                              // jl            5362 <.literal16+0xb92>
   .byte  0,240                               // add           %dh,%al
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,240                               // add           %dh,%al
@@ -26409,13 +26603,13 @@ BALIGN16
   .byte  136,136,61,137,136,136              // mov           %cl,-0x777776c3(%rax)
   .byte  61,137,136,136,61                   // cmp           $0x3d888889,%eax
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            53b5 <.literal16+0xc95>
+  .byte  112,65                              // jo            5465 <.literal16+0xc95>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            53b9 <.literal16+0xc99>
+  .byte  112,65                              // jo            5469 <.literal16+0xc99>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            53bd <.literal16+0xc9d>
+  .byte  112,65                              // jo            546d <.literal16+0xc9d>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            53c1 <.literal16+0xca1>
+  .byte  112,65                              // jo            5471 <.literal16+0xca1>
   .byte  255,0                               // incl          (%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  255,0                               // incl          (%rax)
@@ -26430,7 +26624,7 @@ BALIGN16
   .byte  5,255,255,255,9                     // add           $0x9ffffff,%eax
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 30053b0 <_sk_callback_sse41+0x3000d74>
+  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 3005460 <_sk_callback_sse41+0x3000d6a>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -26457,7 +26651,7 @@ BALIGN16
   .byte  5,255,255,255,9                     // add           $0x9ffffff,%eax
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 30053f0 <_sk_callback_sse41+0x3000db4>
+  .byte  255,13,255,255,255,2                // decl          0x2ffffff(%rip)        # 30054a0 <_sk_callback_sse41+0x3000daa>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
   .byte  255,6                               // incl          (%rsi)
@@ -26472,11 +26666,11 @@ BALIGN16
   .byte  255,0                               // incl          (%rax)
   .byte  0,127,67                            // add           %bh,0x43(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            544b <.literal16+0xd2b>
+  .byte  127,67                              // jg            54fb <.literal16+0xd2b>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            544f <.literal16+0xd2f>
+  .byte  127,67                              // jg            54ff <.literal16+0xd2f>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            5453 <.literal16+0xd33>
+  .byte  127,67                              // jg            5503 <.literal16+0xd33>
   .byte  0,128,0,0,0,128                     // add           %al,-0x80000000(%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,128,0,0,0,128                     // add           %al,-0x80000000(%rax)
@@ -26552,13 +26746,13 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  255                                 // (bad)
-  .byte  127,71                              // jg            551b <.literal16+0xdfb>
+  .byte  127,71                              // jg            55cb <.literal16+0xdfb>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            551f <.literal16+0xdff>
+  .byte  127,71                              // jg            55cf <.literal16+0xdff>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            5523 <.literal16+0xe03>
+  .byte  127,71                              // jg            55d3 <.literal16+0xe03>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            5527 <.literal16+0xe07>
+  .byte  127,71                              // jg            55d7 <.literal16+0xe07>
   .byte  208                                 // (bad)
   .byte  179,89                              // mov           $0x59,%bl
   .byte  62,208                              // ds            (bad)
@@ -26692,11 +26886,11 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,114                          // cmpb          $0x72,(%rdi)
   .byte  28,199                              // sbb           $0xc7,%al
-  .byte  62,114,28                           // jb,pt         5642 <.literal16+0xf22>
+  .byte  62,114,28                           // jb,pt         56f2 <.literal16+0xf22>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5646 <.literal16+0xf26>
+  .byte  62,114,28                           // jb,pt         56f6 <.literal16+0xf26>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         564a <.literal16+0xf2a>
+  .byte  62,114,28                           // jb,pt         56fa <.literal16+0xf2a>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -26740,7 +26934,7 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63e4d5 <_sk_callback_sse41+0x3d639e99>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63e585 <_sk_callback_sse41+0x3d639e8f>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -26766,7 +26960,7 @@ BALIGN16
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63e515 <_sk_callback_sse41+0x3d639ed9>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63e5c5 <_sk_callback_sse41+0x3d639ecf>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
@@ -26775,13 +26969,13 @@ BALIGN16
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
-  .byte  114,28                              // jb            570e <.literal16+0xfee>
+  .byte  114,28                              // jb            57be <.literal16+0xfee>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5712 <.literal16+0xff2>
+  .byte  62,114,28                           // jb,pt         57c2 <.literal16+0xff2>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5716 <.literal16+0xff6>
+  .byte  62,114,28                           // jb,pt         57c6 <.literal16+0xff6>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         571a <.literal16+0xffa>
+  .byte  62,114,28                           // jb,pt         57ca <.literal16+0xffa>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -26802,11 +26996,11 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,114                          // cmpb          $0x72,(%rdi)
   .byte  28,199                              // sbb           $0xc7,%al
-  .byte  62,114,28                           // jb,pt         5752 <.literal16+0x1032>
+  .byte  62,114,28                           // jb,pt         5802 <.literal16+0x1032>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5756 <.literal16+0x1036>
+  .byte  62,114,28                           // jb,pt         5806 <.literal16+0x1036>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         575a <.literal16+0x103a>
+  .byte  62,114,28                           // jb,pt         580a <.literal16+0x103a>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -26850,7 +27044,7 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63e5e5 <_sk_callback_sse41+0x3d639fa9>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63e695 <_sk_callback_sse41+0x3d639f9f>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -26876,7 +27070,7 @@ BALIGN16
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63e625 <_sk_callback_sse41+0x3d639fe9>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63e6d5 <_sk_callback_sse41+0x3d639fdf>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
@@ -26885,13 +27079,13 @@ BALIGN16
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
-  .byte  114,28                              // jb            581e <.literal16+0x10fe>
+  .byte  114,28                              // jb            58ce <.literal16+0x10fe>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5822 <_sk_callback_sse41+0x11e6>
+  .byte  62,114,28                           // jb,pt         58d2 <_sk_callback_sse41+0x11dc>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5826 <_sk_callback_sse41+0x11ea>
+  .byte  62,114,28                           // jb,pt         58d6 <_sk_callback_sse41+0x11e0>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         582a <_sk_callback_sse41+0x11ee>
+  .byte  62,114,28                           // jb,pt         58da <_sk_callback_sse41+0x11e4>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -26961,7 +27155,7 @@ _sk_seed_shader_sse2:
   .byte  102,15,110,199                      // movd          %edi,%xmm0
   .byte  102,15,112,192,0                    // pshufd        $0x0,%xmm0,%xmm0
   .byte  15,91,200                           // cvtdq2ps      %xmm0,%xmm1
-  .byte  15,40,21,4,75,0,0                   // movaps        0x4b04(%rip),%xmm2        # 4b80 <_sk_callback_sse2+0xd5>
+  .byte  15,40,21,196,75,0,0                 // movaps        0x4bc4(%rip),%xmm2        # 4c40 <_sk_callback_sse2+0xdb>
   .byte  15,88,202                           // addps         %xmm2,%xmm1
   .byte  15,16,2                             // movups        (%rdx),%xmm0
   .byte  15,88,193                           // addps         %xmm1,%xmm0
@@ -26970,7 +27164,7 @@ _sk_seed_shader_sse2:
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  15,88,202                           // addps         %xmm2,%xmm1
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,21,243,74,0,0                 // movaps        0x4af3(%rip),%xmm2        # 4b90 <_sk_callback_sse2+0xe5>
+  .byte  15,40,21,179,75,0,0                 // movaps        0x4bb3(%rip),%xmm2        # 4c50 <_sk_callback_sse2+0xeb>
   .byte  15,87,219                           // xorps         %xmm3,%xmm3
   .byte  15,87,228                           // xorps         %xmm4,%xmm4
   .byte  15,87,237                           // xorps         %xmm5,%xmm5
@@ -26993,14 +27187,14 @@ _sk_dither_sse2:
   .byte  102,68,15,110,1                     // movd          (%rcx),%xmm8
   .byte  102,69,15,112,192,0                 // pshufd        $0x0,%xmm8,%xmm8
   .byte  102,69,15,239,193                   // pxor          %xmm9,%xmm8
-  .byte  102,68,15,111,21,184,74,0,0         // movdqa        0x4ab8(%rip),%xmm10        # 4ba0 <_sk_callback_sse2+0xf5>
+  .byte  102,68,15,111,21,120,75,0,0         // movdqa        0x4b78(%rip),%xmm10        # 4c60 <_sk_callback_sse2+0xfb>
   .byte  102,69,15,111,216                   // movdqa        %xmm8,%xmm11
   .byte  102,69,15,219,218                   // pand          %xmm10,%xmm11
   .byte  102,65,15,114,243,5                 // pslld         $0x5,%xmm11
   .byte  102,69,15,219,209                   // pand          %xmm9,%xmm10
   .byte  102,65,15,114,242,4                 // pslld         $0x4,%xmm10
-  .byte  102,68,15,111,37,164,74,0,0         // movdqa        0x4aa4(%rip),%xmm12        # 4bb0 <_sk_callback_sse2+0x105>
-  .byte  102,68,15,111,45,171,74,0,0         // movdqa        0x4aab(%rip),%xmm13        # 4bc0 <_sk_callback_sse2+0x115>
+  .byte  102,68,15,111,37,100,75,0,0         // movdqa        0x4b64(%rip),%xmm12        # 4c70 <_sk_callback_sse2+0x10b>
+  .byte  102,68,15,111,45,107,75,0,0         // movdqa        0x4b6b(%rip),%xmm13        # 4c80 <_sk_callback_sse2+0x11b>
   .byte  102,69,15,111,240                   // movdqa        %xmm8,%xmm14
   .byte  102,69,15,219,245                   // pand          %xmm13,%xmm14
   .byte  102,65,15,114,246,2                 // pslld         $0x2,%xmm14
@@ -27016,8 +27210,8 @@ _sk_dither_sse2:
   .byte  102,69,15,235,245                   // por           %xmm13,%xmm14
   .byte  102,69,15,235,240                   // por           %xmm8,%xmm14
   .byte  69,15,91,198                        // cvtdq2ps      %xmm14,%xmm8
-  .byte  68,15,89,5,102,74,0,0               // mulps         0x4a66(%rip),%xmm8        # 4bd0 <_sk_callback_sse2+0x125>
-  .byte  68,15,88,5,110,74,0,0               // addps         0x4a6e(%rip),%xmm8        # 4be0 <_sk_callback_sse2+0x135>
+  .byte  68,15,89,5,38,75,0,0                // mulps         0x4b26(%rip),%xmm8        # 4c90 <_sk_callback_sse2+0x12b>
+  .byte  68,15,88,5,46,75,0,0                // addps         0x4b2e(%rip),%xmm8        # 4ca0 <_sk_callback_sse2+0x13b>
   .byte  243,68,15,16,80,8                   // movss         0x8(%rax),%xmm10
   .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
   .byte  69,15,89,208                        // mulps         %xmm8,%xmm10
@@ -27094,7 +27288,7 @@ HIDDEN _sk_srcatop_sse2
 FUNCTION(_sk_srcatop_sse2)
 _sk_srcatop_sse2:
   .byte  15,89,199                           // mulps         %xmm7,%xmm0
-  .byte  68,15,40,5,199,73,0,0               // movaps        0x49c7(%rip),%xmm8        # 4bf0 <_sk_callback_sse2+0x145>
+  .byte  68,15,40,5,135,74,0,0               // movaps        0x4a87(%rip),%xmm8        # 4cb0 <_sk_callback_sse2+0x14b>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,89,204                        // mulps         %xmm4,%xmm9
@@ -27119,7 +27313,7 @@ FUNCTION(_sk_dstatop_sse2)
 _sk_dstatop_sse2:
   .byte  68,15,40,195                        // movaps        %xmm3,%xmm8
   .byte  68,15,89,196                        // mulps         %xmm4,%xmm8
-  .byte  68,15,40,13,138,73,0,0              // movaps        0x498a(%rip),%xmm9        # 4c00 <_sk_callback_sse2+0x155>
+  .byte  68,15,40,13,74,74,0,0               // movaps        0x4a4a(%rip),%xmm9        # 4cc0 <_sk_callback_sse2+0x15b>
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  65,15,88,192                        // addps         %xmm8,%xmm0
@@ -27166,7 +27360,7 @@ HIDDEN _sk_srcout_sse2
 .globl _sk_srcout_sse2
 FUNCTION(_sk_srcout_sse2)
 _sk_srcout_sse2:
-  .byte  68,15,40,5,46,73,0,0                // movaps        0x492e(%rip),%xmm8        # 4c10 <_sk_callback_sse2+0x165>
+  .byte  68,15,40,5,238,73,0,0               // movaps        0x49ee(%rip),%xmm8        # 4cd0 <_sk_callback_sse2+0x16b>
   .byte  68,15,92,199                        // subps         %xmm7,%xmm8
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
@@ -27179,7 +27373,7 @@ HIDDEN _sk_dstout_sse2
 .globl _sk_dstout_sse2
 FUNCTION(_sk_dstout_sse2)
 _sk_dstout_sse2:
-  .byte  68,15,40,5,30,73,0,0                // movaps        0x491e(%rip),%xmm8        # 4c20 <_sk_callback_sse2+0x175>
+  .byte  68,15,40,5,222,73,0,0               // movaps        0x49de(%rip),%xmm8        # 4ce0 <_sk_callback_sse2+0x17b>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  15,89,196                           // mulps         %xmm4,%xmm0
@@ -27196,7 +27390,7 @@ HIDDEN _sk_srcover_sse2
 .globl _sk_srcover_sse2
 FUNCTION(_sk_srcover_sse2)
 _sk_srcover_sse2:
-  .byte  68,15,40,5,1,73,0,0                 // movaps        0x4901(%rip),%xmm8        # 4c30 <_sk_callback_sse2+0x185>
+  .byte  68,15,40,5,193,73,0,0               // movaps        0x49c1(%rip),%xmm8        # 4cf0 <_sk_callback_sse2+0x18b>
   .byte  68,15,92,195                        // subps         %xmm3,%xmm8
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,89,204                        // mulps         %xmm4,%xmm9
@@ -27216,7 +27410,7 @@ HIDDEN _sk_dstover_sse2
 .globl _sk_dstover_sse2
 FUNCTION(_sk_dstover_sse2)
 _sk_dstover_sse2:
-  .byte  68,15,40,5,213,72,0,0               // movaps        0x48d5(%rip),%xmm8        # 4c40 <_sk_callback_sse2+0x195>
+  .byte  68,15,40,5,149,73,0,0               // movaps        0x4995(%rip),%xmm8        # 4d00 <_sk_callback_sse2+0x19b>
   .byte  68,15,92,199                        // subps         %xmm7,%xmm8
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -27244,7 +27438,7 @@ HIDDEN _sk_multiply_sse2
 .globl _sk_multiply_sse2
 FUNCTION(_sk_multiply_sse2)
 _sk_multiply_sse2:
-  .byte  68,15,40,5,169,72,0,0               // movaps        0x48a9(%rip),%xmm8        # 4c50 <_sk_callback_sse2+0x1a5>
+  .byte  68,15,40,5,105,73,0,0               // movaps        0x4969(%rip),%xmm8        # 4d10 <_sk_callback_sse2+0x1ab>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  69,15,40,209                        // movaps        %xmm9,%xmm10
@@ -27320,7 +27514,7 @@ HIDDEN _sk_xor__sse2
 FUNCTION(_sk_xor__sse2)
 _sk_xor__sse2:
   .byte  68,15,40,195                        // movaps        %xmm3,%xmm8
-  .byte  15,40,29,218,71,0,0                 // movaps        0x47da(%rip),%xmm3        # 4c60 <_sk_callback_sse2+0x1b5>
+  .byte  15,40,29,154,72,0,0                 // movaps        0x489a(%rip),%xmm3        # 4d20 <_sk_callback_sse2+0x1bb>
   .byte  68,15,40,203                        // movaps        %xmm3,%xmm9
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
@@ -27368,7 +27562,7 @@ _sk_darken_sse2:
   .byte  68,15,89,206                        // mulps         %xmm6,%xmm9
   .byte  65,15,95,209                        // maxps         %xmm9,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,69,71,0,0                  // movaps        0x4745(%rip),%xmm2        # 4c70 <_sk_callback_sse2+0x1c5>
+  .byte  15,40,21,5,72,0,0                   // movaps        0x4805(%rip),%xmm2        # 4d30 <_sk_callback_sse2+0x1cb>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -27402,7 +27596,7 @@ _sk_lighten_sse2:
   .byte  68,15,89,206                        // mulps         %xmm6,%xmm9
   .byte  65,15,93,209                        // minps         %xmm9,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,234,70,0,0                 // movaps        0x46ea(%rip),%xmm2        # 4c80 <_sk_callback_sse2+0x1d5>
+  .byte  15,40,21,170,71,0,0                 // movaps        0x47aa(%rip),%xmm2        # 4d40 <_sk_callback_sse2+0x1db>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -27439,7 +27633,7 @@ _sk_difference_sse2:
   .byte  65,15,93,209                        // minps         %xmm9,%xmm2
   .byte  15,88,210                           // addps         %xmm2,%xmm2
   .byte  68,15,92,194                        // subps         %xmm2,%xmm8
-  .byte  15,40,21,132,70,0,0                 // movaps        0x4684(%rip),%xmm2        # 4c90 <_sk_callback_sse2+0x1e5>
+  .byte  15,40,21,68,71,0,0                  // movaps        0x4744(%rip),%xmm2        # 4d50 <_sk_callback_sse2+0x1eb>
   .byte  15,92,211                           // subps         %xmm3,%xmm2
   .byte  15,89,215                           // mulps         %xmm7,%xmm2
   .byte  15,88,218                           // addps         %xmm2,%xmm3
@@ -27466,7 +27660,7 @@ _sk_exclusion_sse2:
   .byte  15,89,214                           // mulps         %xmm6,%xmm2
   .byte  15,88,210                           // addps         %xmm2,%xmm2
   .byte  68,15,92,202                        // subps         %xmm2,%xmm9
-  .byte  15,40,13,69,70,0,0                  // movaps        0x4645(%rip),%xmm1        # 4ca0 <_sk_callback_sse2+0x1f5>
+  .byte  15,40,13,5,71,0,0                   // movaps        0x4705(%rip),%xmm1        # 4d60 <_sk_callback_sse2+0x1fb>
   .byte  15,92,203                           // subps         %xmm3,%xmm1
   .byte  15,89,207                           // mulps         %xmm7,%xmm1
   .byte  15,88,217                           // addps         %xmm1,%xmm3
@@ -27480,7 +27674,7 @@ HIDDEN _sk_colorburn_sse2
 FUNCTION(_sk_colorburn_sse2)
 _sk_colorburn_sse2:
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
-  .byte  68,15,40,21,52,70,0,0               // movaps        0x4634(%rip),%xmm10        # 4cb0 <_sk_callback_sse2+0x205>
+  .byte  68,15,40,21,244,70,0,0              // movaps        0x46f4(%rip),%xmm10        # 4d70 <_sk_callback_sse2+0x20b>
   .byte  69,15,40,202                        // movaps        %xmm10,%xmm9
   .byte  68,15,92,207                        // subps         %xmm7,%xmm9
   .byte  69,15,40,217                        // movaps        %xmm9,%xmm11
@@ -27574,7 +27768,7 @@ HIDDEN _sk_colordodge_sse2
 FUNCTION(_sk_colordodge_sse2)
 _sk_colordodge_sse2:
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
-  .byte  68,15,40,21,234,68,0,0              // movaps        0x44ea(%rip),%xmm10        # 4cc0 <_sk_callback_sse2+0x215>
+  .byte  68,15,40,21,170,69,0,0              // movaps        0x45aa(%rip),%xmm10        # 4d80 <_sk_callback_sse2+0x21b>
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
   .byte  68,15,92,223                        // subps         %xmm7,%xmm11
   .byte  69,15,40,227                        // movaps        %xmm11,%xmm12
@@ -27668,7 +27862,7 @@ _sk_hardlight_sse2:
   .byte  15,41,116,36,232                    // movaps        %xmm6,-0x18(%rsp)
   .byte  15,40,245                           // movaps        %xmm5,%xmm6
   .byte  15,40,236                           // movaps        %xmm4,%xmm5
-  .byte  68,15,40,29,159,67,0,0              // movaps        0x439f(%rip),%xmm11        # 4cd0 <_sk_callback_sse2+0x225>
+  .byte  68,15,40,29,95,68,0,0               // movaps        0x445f(%rip),%xmm11        # 4d90 <_sk_callback_sse2+0x22b>
   .byte  69,15,40,211                        // movaps        %xmm11,%xmm10
   .byte  68,15,92,215                        // subps         %xmm7,%xmm10
   .byte  69,15,40,194                        // movaps        %xmm10,%xmm8
@@ -27756,7 +27950,7 @@ FUNCTION(_sk_overlay_sse2)
 _sk_overlay_sse2:
   .byte  68,15,40,193                        // movaps        %xmm1,%xmm8
   .byte  68,15,40,232                        // movaps        %xmm0,%xmm13
-  .byte  68,15,40,13,109,66,0,0              // movaps        0x426d(%rip),%xmm9        # 4ce0 <_sk_callback_sse2+0x235>
+  .byte  68,15,40,13,45,67,0,0               // movaps        0x432d(%rip),%xmm9        # 4da0 <_sk_callback_sse2+0x23b>
   .byte  69,15,40,209                        // movaps        %xmm9,%xmm10
   .byte  68,15,92,215                        // subps         %xmm7,%xmm10
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
@@ -27847,7 +28041,7 @@ _sk_softlight_sse2:
   .byte  68,15,40,213                        // movaps        %xmm5,%xmm10
   .byte  68,15,94,215                        // divps         %xmm7,%xmm10
   .byte  69,15,84,212                        // andps         %xmm12,%xmm10
-  .byte  68,15,40,13,42,65,0,0               // movaps        0x412a(%rip),%xmm9        # 4cf0 <_sk_callback_sse2+0x245>
+  .byte  68,15,40,13,234,65,0,0              // movaps        0x41ea(%rip),%xmm9        # 4db0 <_sk_callback_sse2+0x24b>
   .byte  69,15,40,249                        // movaps        %xmm9,%xmm15
   .byte  69,15,92,250                        // subps         %xmm10,%xmm15
   .byte  69,15,40,218                        // movaps        %xmm10,%xmm11
@@ -27860,10 +28054,10 @@ _sk_softlight_sse2:
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  15,89,192                           // mulps         %xmm0,%xmm0
   .byte  65,15,88,194                        // addps         %xmm10,%xmm0
-  .byte  68,15,40,53,4,65,0,0                // movaps        0x4104(%rip),%xmm14        # 4d00 <_sk_callback_sse2+0x255>
+  .byte  68,15,40,53,196,65,0,0              // movaps        0x41c4(%rip),%xmm14        # 4dc0 <_sk_callback_sse2+0x25b>
   .byte  69,15,88,222                        // addps         %xmm14,%xmm11
   .byte  68,15,89,216                        // mulps         %xmm0,%xmm11
-  .byte  68,15,40,21,4,65,0,0                // movaps        0x4104(%rip),%xmm10        # 4d10 <_sk_callback_sse2+0x265>
+  .byte  68,15,40,21,196,65,0,0              // movaps        0x41c4(%rip),%xmm10        # 4dd0 <_sk_callback_sse2+0x26b>
   .byte  69,15,89,234                        // mulps         %xmm10,%xmm13
   .byte  69,15,88,235                        // addps         %xmm11,%xmm13
   .byte  15,88,228                           // addps         %xmm4,%xmm4
@@ -28008,7 +28202,7 @@ _sk_hue_sse2:
   .byte  68,15,40,209                        // movaps        %xmm1,%xmm10
   .byte  68,15,40,225                        // movaps        %xmm1,%xmm12
   .byte  68,15,89,211                        // mulps         %xmm3,%xmm10
-  .byte  68,15,40,5,71,63,0,0                // movaps        0x3f47(%rip),%xmm8        # 4d50 <_sk_callback_sse2+0x2a5>
+  .byte  68,15,40,5,7,64,0,0                 // movaps        0x4007(%rip),%xmm8        # 4e10 <_sk_callback_sse2+0x2ab>
   .byte  69,15,40,216                        // movaps        %xmm8,%xmm11
   .byte  15,40,207                           // movaps        %xmm7,%xmm1
   .byte  68,15,92,217                        // subps         %xmm1,%xmm11
@@ -28054,12 +28248,12 @@ _sk_hue_sse2:
   .byte  69,15,84,206                        // andps         %xmm14,%xmm9
   .byte  69,15,84,214                        // andps         %xmm14,%xmm10
   .byte  65,15,84,214                        // andps         %xmm14,%xmm2
-  .byte  68,15,40,61,91,62,0,0               // movaps        0x3e5b(%rip),%xmm15        # 4d20 <_sk_callback_sse2+0x275>
+  .byte  68,15,40,61,27,63,0,0               // movaps        0x3f1b(%rip),%xmm15        # 4de0 <_sk_callback_sse2+0x27b>
   .byte  65,15,89,231                        // mulps         %xmm15,%xmm4
-  .byte  15,40,5,96,62,0,0                   // movaps        0x3e60(%rip),%xmm0        # 4d30 <_sk_callback_sse2+0x285>
+  .byte  15,40,5,32,63,0,0                   // movaps        0x3f20(%rip),%xmm0        # 4df0 <_sk_callback_sse2+0x28b>
   .byte  15,89,240                           // mulps         %xmm0,%xmm6
   .byte  15,88,244                           // addps         %xmm4,%xmm6
-  .byte  68,15,40,53,98,62,0,0               // movaps        0x3e62(%rip),%xmm14        # 4d40 <_sk_callback_sse2+0x295>
+  .byte  68,15,40,53,34,63,0,0               // movaps        0x3f22(%rip),%xmm14        # 4e00 <_sk_callback_sse2+0x29b>
   .byte  68,15,40,239                        // movaps        %xmm7,%xmm13
   .byte  69,15,89,238                        // mulps         %xmm14,%xmm13
   .byte  68,15,88,238                        // addps         %xmm6,%xmm13
@@ -28236,14 +28430,14 @@ _sk_saturation_sse2:
   .byte  68,15,84,211                        // andps         %xmm3,%xmm10
   .byte  68,15,84,203                        // andps         %xmm3,%xmm9
   .byte  15,84,195                           // andps         %xmm3,%xmm0
-  .byte  68,15,40,5,247,59,0,0               // movaps        0x3bf7(%rip),%xmm8        # 4d60 <_sk_callback_sse2+0x2b5>
+  .byte  68,15,40,5,183,60,0,0               // movaps        0x3cb7(%rip),%xmm8        # 4e20 <_sk_callback_sse2+0x2bb>
   .byte  15,40,214                           // movaps        %xmm6,%xmm2
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
-  .byte  15,40,13,249,59,0,0                 // movaps        0x3bf9(%rip),%xmm1        # 4d70 <_sk_callback_sse2+0x2c5>
+  .byte  15,40,13,185,60,0,0                 // movaps        0x3cb9(%rip),%xmm1        # 4e30 <_sk_callback_sse2+0x2cb>
   .byte  15,40,221                           // movaps        %xmm5,%xmm3
   .byte  15,89,217                           // mulps         %xmm1,%xmm3
   .byte  15,88,218                           // addps         %xmm2,%xmm3
-  .byte  68,15,40,37,248,59,0,0              // movaps        0x3bf8(%rip),%xmm12        # 4d80 <_sk_callback_sse2+0x2d5>
+  .byte  68,15,40,37,184,60,0,0              // movaps        0x3cb8(%rip),%xmm12        # 4e40 <_sk_callback_sse2+0x2db>
   .byte  69,15,89,236                        // mulps         %xmm12,%xmm13
   .byte  68,15,88,235                        // addps         %xmm3,%xmm13
   .byte  65,15,40,210                        // movaps        %xmm10,%xmm2
@@ -28288,7 +28482,7 @@ _sk_saturation_sse2:
   .byte  15,40,223                           // movaps        %xmm7,%xmm3
   .byte  15,40,236                           // movaps        %xmm4,%xmm5
   .byte  15,89,221                           // mulps         %xmm5,%xmm3
-  .byte  68,15,40,5,93,59,0,0                // movaps        0x3b5d(%rip),%xmm8        # 4d90 <_sk_callback_sse2+0x2e5>
+  .byte  68,15,40,5,29,60,0,0                // movaps        0x3c1d(%rip),%xmm8        # 4e50 <_sk_callback_sse2+0x2eb>
   .byte  65,15,40,224                        // movaps        %xmm8,%xmm4
   .byte  68,15,92,199                        // subps         %xmm7,%xmm8
   .byte  15,88,253                           // addps         %xmm5,%xmm7
@@ -28389,14 +28583,14 @@ _sk_color_sse2:
   .byte  68,15,40,213                        // movaps        %xmm5,%xmm10
   .byte  69,15,89,208                        // mulps         %xmm8,%xmm10
   .byte  65,15,40,208                        // movaps        %xmm8,%xmm2
-  .byte  68,15,40,45,251,57,0,0              // movaps        0x39fb(%rip),%xmm13        # 4da0 <_sk_callback_sse2+0x2f5>
+  .byte  68,15,40,45,187,58,0,0              // movaps        0x3abb(%rip),%xmm13        # 4e60 <_sk_callback_sse2+0x2fb>
   .byte  68,15,40,198                        // movaps        %xmm6,%xmm8
   .byte  69,15,89,197                        // mulps         %xmm13,%xmm8
-  .byte  68,15,40,53,251,57,0,0              // movaps        0x39fb(%rip),%xmm14        # 4db0 <_sk_callback_sse2+0x305>
+  .byte  68,15,40,53,187,58,0,0              // movaps        0x3abb(%rip),%xmm14        # 4e70 <_sk_callback_sse2+0x30b>
   .byte  65,15,40,195                        // movaps        %xmm11,%xmm0
   .byte  65,15,89,198                        // mulps         %xmm14,%xmm0
   .byte  65,15,88,192                        // addps         %xmm8,%xmm0
-  .byte  68,15,40,29,247,57,0,0              // movaps        0x39f7(%rip),%xmm11        # 4dc0 <_sk_callback_sse2+0x315>
+  .byte  68,15,40,29,183,58,0,0              // movaps        0x3ab7(%rip),%xmm11        # 4e80 <_sk_callback_sse2+0x31b>
   .byte  69,15,89,227                        // mulps         %xmm11,%xmm12
   .byte  68,15,88,224                        // addps         %xmm0,%xmm12
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
@@ -28404,7 +28598,7 @@ _sk_color_sse2:
   .byte  69,15,40,250                        // movaps        %xmm10,%xmm15
   .byte  69,15,89,254                        // mulps         %xmm14,%xmm15
   .byte  68,15,88,248                        // addps         %xmm0,%xmm15
-  .byte  68,15,40,5,227,57,0,0               // movaps        0x39e3(%rip),%xmm8        # 4dd0 <_sk_callback_sse2+0x325>
+  .byte  68,15,40,5,163,58,0,0               // movaps        0x3aa3(%rip),%xmm8        # 4e90 <_sk_callback_sse2+0x32b>
   .byte  65,15,40,224                        // movaps        %xmm8,%xmm4
   .byte  15,92,226                           // subps         %xmm2,%xmm4
   .byte  15,89,252                           // mulps         %xmm4,%xmm7
@@ -28540,15 +28734,15 @@ _sk_luminosity_sse2:
   .byte  68,15,40,205                        // movaps        %xmm5,%xmm9
   .byte  68,15,89,204                        // mulps         %xmm4,%xmm9
   .byte  15,89,222                           // mulps         %xmm6,%xmm3
-  .byte  68,15,40,37,250,55,0,0              // movaps        0x37fa(%rip),%xmm12        # 4de0 <_sk_callback_sse2+0x335>
+  .byte  68,15,40,37,186,56,0,0              // movaps        0x38ba(%rip),%xmm12        # 4ea0 <_sk_callback_sse2+0x33b>
   .byte  68,15,40,199                        // movaps        %xmm7,%xmm8
   .byte  69,15,89,196                        // mulps         %xmm12,%xmm8
-  .byte  68,15,40,45,250,55,0,0              // movaps        0x37fa(%rip),%xmm13        # 4df0 <_sk_callback_sse2+0x345>
+  .byte  68,15,40,45,186,56,0,0              // movaps        0x38ba(%rip),%xmm13        # 4eb0 <_sk_callback_sse2+0x34b>
   .byte  68,15,40,241                        // movaps        %xmm1,%xmm14
   .byte  69,15,89,245                        // mulps         %xmm13,%xmm14
   .byte  69,15,88,240                        // addps         %xmm8,%xmm14
-  .byte  68,15,40,29,246,55,0,0              // movaps        0x37f6(%rip),%xmm11        # 4e00 <_sk_callback_sse2+0x355>
-  .byte  68,15,40,5,254,55,0,0               // movaps        0x37fe(%rip),%xmm8        # 4e10 <_sk_callback_sse2+0x365>
+  .byte  68,15,40,29,182,56,0,0              // movaps        0x38b6(%rip),%xmm11        # 4ec0 <_sk_callback_sse2+0x35b>
+  .byte  68,15,40,5,190,56,0,0               // movaps        0x38be(%rip),%xmm8        # 4ed0 <_sk_callback_sse2+0x36b>
   .byte  69,15,40,248                        // movaps        %xmm8,%xmm15
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  68,15,92,248                        // subps         %xmm0,%xmm15
@@ -28693,7 +28887,7 @@ HIDDEN _sk_clamp_1_sse2
 .globl _sk_clamp_1_sse2
 FUNCTION(_sk_clamp_1_sse2)
 _sk_clamp_1_sse2:
-  .byte  68,15,40,5,7,54,0,0                 // movaps        0x3607(%rip),%xmm8        # 4e20 <_sk_callback_sse2+0x375>
+  .byte  68,15,40,5,199,54,0,0               // movaps        0x36c7(%rip),%xmm8        # 4ee0 <_sk_callback_sse2+0x37b>
   .byte  65,15,93,192                        // minps         %xmm8,%xmm0
   .byte  65,15,93,200                        // minps         %xmm8,%xmm1
   .byte  65,15,93,208                        // minps         %xmm8,%xmm2
@@ -28705,7 +28899,7 @@ HIDDEN _sk_clamp_a_sse2
 .globl _sk_clamp_a_sse2
 FUNCTION(_sk_clamp_a_sse2)
 _sk_clamp_a_sse2:
-  .byte  15,93,29,252,53,0,0                 // minps         0x35fc(%rip),%xmm3        # 4e30 <_sk_callback_sse2+0x385>
+  .byte  15,93,29,188,54,0,0                 // minps         0x36bc(%rip),%xmm3        # 4ef0 <_sk_callback_sse2+0x38b>
   .byte  15,93,195                           // minps         %xmm3,%xmm0
   .byte  15,93,203                           // minps         %xmm3,%xmm1
   .byte  15,93,211                           // minps         %xmm3,%xmm2
@@ -28792,7 +28986,7 @@ HIDDEN _sk_unpremul_sse2
 FUNCTION(_sk_unpremul_sse2)
 _sk_unpremul_sse2:
   .byte  69,15,87,192                        // xorps         %xmm8,%xmm8
-  .byte  68,15,40,13,103,53,0,0              // movaps        0x3567(%rip),%xmm9        # 4e40 <_sk_callback_sse2+0x395>
+  .byte  68,15,40,13,39,54,0,0               // movaps        0x3627(%rip),%xmm9        # 4f00 <_sk_callback_sse2+0x39b>
   .byte  68,15,94,203                        // divps         %xmm3,%xmm9
   .byte  68,15,194,195,4                     // cmpneqps      %xmm3,%xmm8
   .byte  69,15,84,193                        // andps         %xmm9,%xmm8
@@ -28806,20 +29000,20 @@ HIDDEN _sk_from_srgb_sse2
 .globl _sk_from_srgb_sse2
 FUNCTION(_sk_from_srgb_sse2)
 _sk_from_srgb_sse2:
-  .byte  68,15,40,5,82,53,0,0                // movaps        0x3552(%rip),%xmm8        # 4e50 <_sk_callback_sse2+0x3a5>
+  .byte  68,15,40,5,18,54,0,0                // movaps        0x3612(%rip),%xmm8        # 4f10 <_sk_callback_sse2+0x3ab>
   .byte  68,15,40,232                        // movaps        %xmm0,%xmm13
   .byte  69,15,89,232                        // mulps         %xmm8,%xmm13
   .byte  68,15,40,216                        // movaps        %xmm0,%xmm11
   .byte  69,15,89,219                        // mulps         %xmm11,%xmm11
-  .byte  68,15,40,13,74,53,0,0               // movaps        0x354a(%rip),%xmm9        # 4e60 <_sk_callback_sse2+0x3b5>
+  .byte  68,15,40,13,10,54,0,0               // movaps        0x360a(%rip),%xmm9        # 4f20 <_sk_callback_sse2+0x3bb>
   .byte  68,15,40,240                        // movaps        %xmm0,%xmm14
   .byte  69,15,89,241                        // mulps         %xmm9,%xmm14
-  .byte  68,15,40,21,74,53,0,0               // movaps        0x354a(%rip),%xmm10        # 4e70 <_sk_callback_sse2+0x3c5>
+  .byte  68,15,40,21,10,54,0,0               // movaps        0x360a(%rip),%xmm10        # 4f30 <_sk_callback_sse2+0x3cb>
   .byte  69,15,88,242                        // addps         %xmm10,%xmm14
   .byte  69,15,89,243                        // mulps         %xmm11,%xmm14
-  .byte  68,15,40,29,74,53,0,0               // movaps        0x354a(%rip),%xmm11        # 4e80 <_sk_callback_sse2+0x3d5>
+  .byte  68,15,40,29,10,54,0,0               // movaps        0x360a(%rip),%xmm11        # 4f40 <_sk_callback_sse2+0x3db>
   .byte  69,15,88,243                        // addps         %xmm11,%xmm14
-  .byte  68,15,40,37,78,53,0,0               // movaps        0x354e(%rip),%xmm12        # 4e90 <_sk_callback_sse2+0x3e5>
+  .byte  68,15,40,37,14,54,0,0               // movaps        0x360e(%rip),%xmm12        # 4f50 <_sk_callback_sse2+0x3eb>
   .byte  65,15,194,196,1                     // cmpltps       %xmm12,%xmm0
   .byte  68,15,84,232                        // andps         %xmm0,%xmm13
   .byte  65,15,85,198                        // andnps        %xmm14,%xmm0
@@ -28856,22 +29050,22 @@ HIDDEN _sk_to_srgb_sse2
 FUNCTION(_sk_to_srgb_sse2)
 _sk_to_srgb_sse2:
   .byte  68,15,82,232                        // rsqrtps       %xmm0,%xmm13
-  .byte  68,15,40,5,219,52,0,0               // movaps        0x34db(%rip),%xmm8        # 4ea0 <_sk_callback_sse2+0x3f5>
+  .byte  68,15,40,5,155,53,0,0               // movaps        0x359b(%rip),%xmm8        # 4f60 <_sk_callback_sse2+0x3fb>
   .byte  68,15,40,240                        // movaps        %xmm0,%xmm14
   .byte  69,15,89,240                        // mulps         %xmm8,%xmm14
-  .byte  68,15,40,13,219,52,0,0              // movaps        0x34db(%rip),%xmm9        # 4eb0 <_sk_callback_sse2+0x405>
+  .byte  68,15,40,13,155,53,0,0              // movaps        0x359b(%rip),%xmm9        # 4f70 <_sk_callback_sse2+0x40b>
   .byte  69,15,40,253                        // movaps        %xmm13,%xmm15
   .byte  69,15,89,249                        // mulps         %xmm9,%xmm15
-  .byte  68,15,40,21,219,52,0,0              // movaps        0x34db(%rip),%xmm10        # 4ec0 <_sk_callback_sse2+0x415>
+  .byte  68,15,40,21,155,53,0,0              // movaps        0x359b(%rip),%xmm10        # 4f80 <_sk_callback_sse2+0x41b>
   .byte  69,15,88,250                        // addps         %xmm10,%xmm15
   .byte  69,15,89,253                        // mulps         %xmm13,%xmm15
-  .byte  68,15,40,29,219,52,0,0              // movaps        0x34db(%rip),%xmm11        # 4ed0 <_sk_callback_sse2+0x425>
+  .byte  68,15,40,29,155,53,0,0              // movaps        0x359b(%rip),%xmm11        # 4f90 <_sk_callback_sse2+0x42b>
   .byte  69,15,88,251                        // addps         %xmm11,%xmm15
-  .byte  68,15,40,37,223,52,0,0              // movaps        0x34df(%rip),%xmm12        # 4ee0 <_sk_callback_sse2+0x435>
+  .byte  68,15,40,37,159,53,0,0              // movaps        0x359f(%rip),%xmm12        # 4fa0 <_sk_callback_sse2+0x43b>
   .byte  69,15,88,236                        // addps         %xmm12,%xmm13
   .byte  69,15,83,237                        // rcpps         %xmm13,%xmm13
   .byte  69,15,89,239                        // mulps         %xmm15,%xmm13
-  .byte  68,15,40,61,219,52,0,0              // movaps        0x34db(%rip),%xmm15        # 4ef0 <_sk_callback_sse2+0x445>
+  .byte  68,15,40,61,155,53,0,0              // movaps        0x359b(%rip),%xmm15        # 4fb0 <_sk_callback_sse2+0x44b>
   .byte  65,15,194,199,1                     // cmpltps       %xmm15,%xmm0
   .byte  68,15,84,240                        // andps         %xmm0,%xmm14
   .byte  65,15,85,197                        // andnps        %xmm13,%xmm0
@@ -28921,7 +29115,7 @@ _sk_rgb_to_hsl_sse2:
   .byte  68,15,93,218                        // minps         %xmm2,%xmm11
   .byte  65,15,40,202                        // movaps        %xmm10,%xmm1
   .byte  65,15,92,203                        // subps         %xmm11,%xmm1
-  .byte  68,15,40,45,52,52,0,0               // movaps        0x3434(%rip),%xmm13        # 4f00 <_sk_callback_sse2+0x455>
+  .byte  68,15,40,45,244,52,0,0              // movaps        0x34f4(%rip),%xmm13        # 4fc0 <_sk_callback_sse2+0x45b>
   .byte  68,15,94,233                        // divps         %xmm1,%xmm13
   .byte  65,15,40,194                        // movaps        %xmm10,%xmm0
   .byte  65,15,194,192,0                     // cmpeqps       %xmm8,%xmm0
@@ -28930,30 +29124,30 @@ _sk_rgb_to_hsl_sse2:
   .byte  69,15,89,229                        // mulps         %xmm13,%xmm12
   .byte  69,15,40,241                        // movaps        %xmm9,%xmm14
   .byte  68,15,194,242,1                     // cmpltps       %xmm2,%xmm14
-  .byte  68,15,84,53,26,52,0,0               // andps         0x341a(%rip),%xmm14        # 4f10 <_sk_callback_sse2+0x465>
+  .byte  68,15,84,53,218,52,0,0              // andps         0x34da(%rip),%xmm14        # 4fd0 <_sk_callback_sse2+0x46b>
   .byte  69,15,88,244                        // addps         %xmm12,%xmm14
   .byte  69,15,40,250                        // movaps        %xmm10,%xmm15
   .byte  69,15,194,249,0                     // cmpeqps       %xmm9,%xmm15
   .byte  65,15,92,208                        // subps         %xmm8,%xmm2
   .byte  65,15,89,213                        // mulps         %xmm13,%xmm2
-  .byte  68,15,40,37,13,52,0,0               // movaps        0x340d(%rip),%xmm12        # 4f20 <_sk_callback_sse2+0x475>
+  .byte  68,15,40,37,205,52,0,0              // movaps        0x34cd(%rip),%xmm12        # 4fe0 <_sk_callback_sse2+0x47b>
   .byte  65,15,88,212                        // addps         %xmm12,%xmm2
   .byte  69,15,92,193                        // subps         %xmm9,%xmm8
   .byte  69,15,89,197                        // mulps         %xmm13,%xmm8
-  .byte  68,15,88,5,9,52,0,0                 // addps         0x3409(%rip),%xmm8        # 4f30 <_sk_callback_sse2+0x485>
+  .byte  68,15,88,5,201,52,0,0               // addps         0x34c9(%rip),%xmm8        # 4ff0 <_sk_callback_sse2+0x48b>
   .byte  65,15,84,215                        // andps         %xmm15,%xmm2
   .byte  69,15,85,248                        // andnps        %xmm8,%xmm15
   .byte  68,15,86,250                        // orps          %xmm2,%xmm15
   .byte  68,15,84,240                        // andps         %xmm0,%xmm14
   .byte  65,15,85,199                        // andnps        %xmm15,%xmm0
   .byte  65,15,86,198                        // orps          %xmm14,%xmm0
-  .byte  15,89,5,250,51,0,0                  // mulps         0x33fa(%rip),%xmm0        # 4f40 <_sk_callback_sse2+0x495>
+  .byte  15,89,5,186,52,0,0                  // mulps         0x34ba(%rip),%xmm0        # 5000 <_sk_callback_sse2+0x49b>
   .byte  69,15,40,194                        // movaps        %xmm10,%xmm8
   .byte  69,15,194,195,4                     // cmpneqps      %xmm11,%xmm8
   .byte  65,15,84,192                        // andps         %xmm8,%xmm0
   .byte  69,15,92,226                        // subps         %xmm10,%xmm12
   .byte  69,15,88,211                        // addps         %xmm11,%xmm10
-  .byte  68,15,40,13,237,51,0,0              // movaps        0x33ed(%rip),%xmm9        # 4f50 <_sk_callback_sse2+0x4a5>
+  .byte  68,15,40,13,173,52,0,0              // movaps        0x34ad(%rip),%xmm9        # 5010 <_sk_callback_sse2+0x4ab>
   .byte  65,15,40,210                        // movaps        %xmm10,%xmm2
   .byte  65,15,89,209                        // mulps         %xmm9,%xmm2
   .byte  68,15,194,202,1                     // cmpltps       %xmm2,%xmm9
@@ -28977,7 +29171,7 @@ _sk_hsl_to_rgb_sse2:
   .byte  15,41,92,36,168                     // movaps        %xmm3,-0x58(%rsp)
   .byte  68,15,40,218                        // movaps        %xmm2,%xmm11
   .byte  15,40,240                           // movaps        %xmm0,%xmm6
-  .byte  68,15,40,13,172,51,0,0              // movaps        0x33ac(%rip),%xmm9        # 4f60 <_sk_callback_sse2+0x4b5>
+  .byte  68,15,40,13,108,52,0,0              // movaps        0x346c(%rip),%xmm9        # 5020 <_sk_callback_sse2+0x4bb>
   .byte  69,15,40,209                        // movaps        %xmm9,%xmm10
   .byte  69,15,194,211,2                     // cmpleps       %xmm11,%xmm10
   .byte  15,40,193                           // movaps        %xmm1,%xmm0
@@ -28994,28 +29188,28 @@ _sk_hsl_to_rgb_sse2:
   .byte  69,15,88,211                        // addps         %xmm11,%xmm10
   .byte  69,15,88,219                        // addps         %xmm11,%xmm11
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
-  .byte  15,40,5,117,51,0,0                  // movaps        0x3375(%rip),%xmm0        # 4f70 <_sk_callback_sse2+0x4c5>
+  .byte  15,40,5,53,52,0,0                   // movaps        0x3435(%rip),%xmm0        # 5030 <_sk_callback_sse2+0x4cb>
   .byte  15,88,198                           // addps         %xmm6,%xmm0
   .byte  243,15,91,200                       // cvttps2dq     %xmm0,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
   .byte  15,40,216                           // movaps        %xmm0,%xmm3
   .byte  15,194,217,1                        // cmpltps       %xmm1,%xmm3
-  .byte  15,84,29,109,51,0,0                 // andps         0x336d(%rip),%xmm3        # 4f80 <_sk_callback_sse2+0x4d5>
+  .byte  15,84,29,45,52,0,0                  // andps         0x342d(%rip),%xmm3        # 5040 <_sk_callback_sse2+0x4db>
   .byte  15,92,203                           // subps         %xmm3,%xmm1
   .byte  15,92,193                           // subps         %xmm1,%xmm0
-  .byte  68,15,40,45,111,51,0,0              // movaps        0x336f(%rip),%xmm13        # 4f90 <_sk_callback_sse2+0x4e5>
+  .byte  68,15,40,45,47,52,0,0               // movaps        0x342f(%rip),%xmm13        # 5050 <_sk_callback_sse2+0x4eb>
   .byte  69,15,40,197                        // movaps        %xmm13,%xmm8
   .byte  68,15,194,192,2                     // cmpleps       %xmm0,%xmm8
   .byte  69,15,40,242                        // movaps        %xmm10,%xmm14
   .byte  69,15,92,243                        // subps         %xmm11,%xmm14
   .byte  65,15,40,217                        // movaps        %xmm9,%xmm3
   .byte  15,194,216,2                        // cmpleps       %xmm0,%xmm3
-  .byte  15,40,21,127,51,0,0                 // movaps        0x337f(%rip),%xmm2        # 4fc0 <_sk_callback_sse2+0x515>
+  .byte  15,40,21,63,52,0,0                  // movaps        0x343f(%rip),%xmm2        # 5080 <_sk_callback_sse2+0x51b>
   .byte  68,15,40,250                        // movaps        %xmm2,%xmm15
   .byte  68,15,194,248,2                     // cmpleps       %xmm0,%xmm15
-  .byte  15,40,13,79,51,0,0                  // movaps        0x334f(%rip),%xmm1        # 4fa0 <_sk_callback_sse2+0x4f5>
+  .byte  15,40,13,15,52,0,0                  // movaps        0x340f(%rip),%xmm1        # 5060 <_sk_callback_sse2+0x4fb>
   .byte  15,89,193                           // mulps         %xmm1,%xmm0
-  .byte  15,40,45,85,51,0,0                  // movaps        0x3355(%rip),%xmm5        # 4fb0 <_sk_callback_sse2+0x505>
+  .byte  15,40,45,21,52,0,0                  // movaps        0x3415(%rip),%xmm5        # 5070 <_sk_callback_sse2+0x50b>
   .byte  15,40,229                           // movaps        %xmm5,%xmm4
   .byte  15,92,224                           // subps         %xmm0,%xmm4
   .byte  65,15,89,230                        // mulps         %xmm14,%xmm4
@@ -29038,7 +29232,7 @@ _sk_hsl_to_rgb_sse2:
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
   .byte  15,40,222                           // movaps        %xmm6,%xmm3
   .byte  15,194,216,1                        // cmpltps       %xmm0,%xmm3
-  .byte  15,84,29,202,50,0,0                 // andps         0x32ca(%rip),%xmm3        # 4f80 <_sk_callback_sse2+0x4d5>
+  .byte  15,84,29,138,51,0,0                 // andps         0x338a(%rip),%xmm3        # 5040 <_sk_callback_sse2+0x4db>
   .byte  15,92,195                           // subps         %xmm3,%xmm0
   .byte  68,15,40,230                        // movaps        %xmm6,%xmm12
   .byte  68,15,92,224                        // subps         %xmm0,%xmm12
@@ -29068,12 +29262,12 @@ _sk_hsl_to_rgb_sse2:
   .byte  15,40,124,36,136                    // movaps        -0x78(%rsp),%xmm7
   .byte  15,40,231                           // movaps        %xmm7,%xmm4
   .byte  15,85,227                           // andnps        %xmm3,%xmm4
-  .byte  15,88,53,162,50,0,0                 // addps         0x32a2(%rip),%xmm6        # 4fd0 <_sk_callback_sse2+0x525>
+  .byte  15,88,53,98,51,0,0                  // addps         0x3362(%rip),%xmm6        # 5090 <_sk_callback_sse2+0x52b>
   .byte  243,15,91,198                       // cvttps2dq     %xmm6,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
   .byte  15,40,222                           // movaps        %xmm6,%xmm3
   .byte  15,194,216,1                        // cmpltps       %xmm0,%xmm3
-  .byte  15,84,29,61,50,0,0                  // andps         0x323d(%rip),%xmm3        # 4f80 <_sk_callback_sse2+0x4d5>
+  .byte  15,84,29,253,50,0,0                 // andps         0x32fd(%rip),%xmm3        # 5040 <_sk_callback_sse2+0x4db>
   .byte  15,92,195                           // subps         %xmm3,%xmm0
   .byte  15,92,240                           // subps         %xmm0,%xmm6
   .byte  15,89,206                           // mulps         %xmm6,%xmm1
@@ -29137,7 +29331,7 @@ _sk_scale_u8_sse2:
   .byte  102,69,15,96,193                    // punpcklbw     %xmm9,%xmm8
   .byte  102,69,15,97,193                    // punpcklwd     %xmm9,%xmm8
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,203,49,0,0               // mulps         0x31cb(%rip),%xmm8        # 4fe0 <_sk_callback_sse2+0x535>
+  .byte  68,15,89,5,139,50,0,0               // mulps         0x328b(%rip),%xmm8        # 50a0 <_sk_callback_sse2+0x53b>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
@@ -29178,7 +29372,7 @@ _sk_lerp_u8_sse2:
   .byte  102,69,15,96,193                    // punpcklbw     %xmm9,%xmm8
   .byte  102,69,15,97,193                    // punpcklwd     %xmm9,%xmm8
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,105,49,0,0               // mulps         0x3169(%rip),%xmm8        # 4ff0 <_sk_callback_sse2+0x545>
+  .byte  68,15,89,5,41,50,0,0                // mulps         0x3229(%rip),%xmm8        # 50b0 <_sk_callback_sse2+0x54b>
   .byte  15,92,196                           // subps         %xmm4,%xmm0
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -29203,17 +29397,17 @@ _sk_lerp_565_sse2:
   .byte  243,68,15,126,20,120                // movq          (%rax,%rdi,2),%xmm10
   .byte  102,69,15,239,192                   // pxor          %xmm8,%xmm8
   .byte  102,69,15,97,208                    // punpcklwd     %xmm8,%xmm10
-  .byte  102,68,15,111,5,47,49,0,0           // movdqa        0x312f(%rip),%xmm8        # 5000 <_sk_callback_sse2+0x555>
+  .byte  102,68,15,111,5,239,49,0,0          // movdqa        0x31ef(%rip),%xmm8        # 50c0 <_sk_callback_sse2+0x55b>
   .byte  102,69,15,219,194                   // pand          %xmm10,%xmm8
   .byte  69,15,91,192                        // cvtdq2ps      %xmm8,%xmm8
-  .byte  68,15,89,5,46,49,0,0                // mulps         0x312e(%rip),%xmm8        # 5010 <_sk_callback_sse2+0x565>
-  .byte  102,68,15,111,13,53,49,0,0          // movdqa        0x3135(%rip),%xmm9        # 5020 <_sk_callback_sse2+0x575>
+  .byte  68,15,89,5,238,49,0,0               // mulps         0x31ee(%rip),%xmm8        # 50d0 <_sk_callback_sse2+0x56b>
+  .byte  102,68,15,111,13,245,49,0,0         // movdqa        0x31f5(%rip),%xmm9        # 50e0 <_sk_callback_sse2+0x57b>
   .byte  102,69,15,219,202                   // pand          %xmm10,%xmm9
   .byte  69,15,91,201                        // cvtdq2ps      %xmm9,%xmm9
-  .byte  68,15,89,13,52,49,0,0               // mulps         0x3134(%rip),%xmm9        # 5030 <_sk_callback_sse2+0x585>
-  .byte  102,68,15,219,21,59,49,0,0          // pand          0x313b(%rip),%xmm10        # 5040 <_sk_callback_sse2+0x595>
+  .byte  68,15,89,13,244,49,0,0              // mulps         0x31f4(%rip),%xmm9        # 50f0 <_sk_callback_sse2+0x58b>
+  .byte  102,68,15,219,21,251,49,0,0         // pand          0x31fb(%rip),%xmm10        # 5100 <_sk_callback_sse2+0x59b>
   .byte  69,15,91,210                        // cvtdq2ps      %xmm10,%xmm10
-  .byte  68,15,89,21,63,49,0,0               // mulps         0x313f(%rip),%xmm10        # 5050 <_sk_callback_sse2+0x5a5>
+  .byte  68,15,89,21,255,49,0,0              // mulps         0x31ff(%rip),%xmm10        # 5110 <_sk_callback_sse2+0x5ab>
   .byte  15,92,196                           // subps         %xmm4,%xmm0
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  15,88,196                           // addps         %xmm4,%xmm0
@@ -29244,7 +29438,7 @@ _sk_load_tables_sse2:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  76,139,72,8                         // mov           0x8(%rax),%r9
   .byte  243,69,15,111,12,184                // movdqu        (%r8,%rdi,4),%xmm9
-  .byte  102,68,15,111,5,239,48,0,0          // movdqa        0x30ef(%rip),%xmm8        # 5060 <_sk_callback_sse2+0x5b5>
+  .byte  102,68,15,111,5,175,49,0,0          // movdqa        0x31af(%rip),%xmm8        # 5120 <_sk_callback_sse2+0x5bb>
   .byte  102,65,15,111,193                   // movdqa        %xmm9,%xmm0
   .byte  102,65,15,219,192                   // pand          %xmm8,%xmm0
   .byte  102,15,112,200,78                   // pshufd        $0x4e,%xmm0,%xmm1
@@ -29299,7 +29493,7 @@ _sk_load_tables_sse2:
   .byte  65,15,20,208                        // unpcklps      %xmm8,%xmm2
   .byte  102,65,15,114,209,24                // psrld         $0x18,%xmm9
   .byte  65,15,91,217                        // cvtdq2ps      %xmm9,%xmm3
-  .byte  15,89,29,252,47,0,0                 // mulps         0x2ffc(%rip),%xmm3        # 5070 <_sk_callback_sse2+0x5c5>
+  .byte  15,89,29,188,48,0,0                 // mulps         0x30bc(%rip),%xmm3        # 5130 <_sk_callback_sse2+0x5cb>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -29318,7 +29512,7 @@ _sk_load_tables_u16_be_sse2:
   .byte  102,65,15,111,201                   // movdqa        %xmm9,%xmm1
   .byte  102,15,97,200                       // punpcklwd     %xmm0,%xmm1
   .byte  102,68,15,105,200                   // punpckhwd     %xmm0,%xmm9
-  .byte  102,68,15,111,21,207,47,0,0         // movdqa        0x2fcf(%rip),%xmm10        # 5080 <_sk_callback_sse2+0x5d5>
+  .byte  102,68,15,111,21,143,48,0,0         // movdqa        0x308f(%rip),%xmm10        # 5140 <_sk_callback_sse2+0x5db>
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,65,15,219,194                   // pand          %xmm10,%xmm0
   .byte  102,69,15,239,192                   // pxor          %xmm8,%xmm8
@@ -29379,7 +29573,7 @@ _sk_load_tables_u16_be_sse2:
   .byte  102,65,15,235,217                   // por           %xmm9,%xmm3
   .byte  102,65,15,97,216                    // punpcklwd     %xmm8,%xmm3
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,190,46,0,0                 // mulps         0x2ebe(%rip),%xmm3        # 5090 <_sk_callback_sse2+0x5e5>
+  .byte  15,89,29,126,47,0,0                 // mulps         0x2f7e(%rip),%xmm3        # 5150 <_sk_callback_sse2+0x5eb>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -29401,7 +29595,7 @@ _sk_load_tables_rgb_u16_be_sse2:
   .byte  102,68,15,97,208                    // punpcklwd     %xmm0,%xmm10
   .byte  102,65,15,111,195                   // movdqa        %xmm11,%xmm0
   .byte  102,65,15,97,194                    // punpcklwd     %xmm10,%xmm0
-  .byte  102,68,15,111,5,126,46,0,0          // movdqa        0x2e7e(%rip),%xmm8        # 50a0 <_sk_callback_sse2+0x5f5>
+  .byte  102,68,15,111,5,62,47,0,0           // movdqa        0x2f3e(%rip),%xmm8        # 5160 <_sk_callback_sse2+0x5fb>
   .byte  102,15,112,200,78                   // pshufd        $0x4e,%xmm0,%xmm1
   .byte  102,65,15,219,192                   // pand          %xmm8,%xmm0
   .byte  102,69,15,239,201                   // pxor          %xmm9,%xmm9
@@ -29456,7 +29650,7 @@ _sk_load_tables_rgb_u16_be_sse2:
   .byte  15,20,211                           // unpcklps      %xmm3,%xmm2
   .byte  65,15,20,208                        // unpcklps      %xmm8,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,141,45,0,0                 // movaps        0x2d8d(%rip),%xmm3        # 50b0 <_sk_callback_sse2+0x605>
+  .byte  15,40,29,77,46,0,0                  // movaps        0x2e4d(%rip),%xmm3        # 5170 <_sk_callback_sse2+0x60b>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_byte_tables_sse2
@@ -29466,7 +29660,7 @@ _sk_byte_tables_sse2:
   .byte  65,86                               // push          %r14
   .byte  83                                  // push          %rbx
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,142,45,0,0               // movaps        0x2d8e(%rip),%xmm8        # 50c0 <_sk_callback_sse2+0x615>
+  .byte  68,15,40,5,78,46,0,0                // movaps        0x2e4e(%rip),%xmm8        # 5180 <_sk_callback_sse2+0x61b>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,91,192                       // cvtps2dq      %xmm0,%xmm0
   .byte  102,72,15,126,193                   // movq          %xmm0,%rcx
@@ -29493,7 +29687,7 @@ _sk_byte_tables_sse2:
   .byte  102,65,15,96,193                    // punpcklbw     %xmm9,%xmm0
   .byte  102,65,15,97,193                    // punpcklwd     %xmm9,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,21,43,45,0,0               // movaps        0x2d2b(%rip),%xmm10        # 50d0 <_sk_callback_sse2+0x625>
+  .byte  68,15,40,21,235,45,0,0              // movaps        0x2deb(%rip),%xmm10        # 5190 <_sk_callback_sse2+0x62b>
   .byte  65,15,89,194                        // mulps         %xmm10,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,91,201                       // cvtps2dq      %xmm1,%xmm1
@@ -29609,7 +29803,7 @@ _sk_byte_tables_rgb_sse2:
   .byte  102,65,15,96,193                    // punpcklbw     %xmm9,%xmm0
   .byte  102,65,15,97,193                    // punpcklwd     %xmm9,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,21,126,43,0,0              // movaps        0x2b7e(%rip),%xmm10        # 50e0 <_sk_callback_sse2+0x635>
+  .byte  68,15,40,21,62,44,0,0               // movaps        0x2c3e(%rip),%xmm10        # 51a0 <_sk_callback_sse2+0x63b>
   .byte  65,15,89,194                        // mulps         %xmm10,%xmm0
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
   .byte  102,15,91,201                       // cvtps2dq      %xmm1,%xmm1
@@ -29806,15 +30000,15 @@ _sk_parametric_r_sse2:
   .byte  69,15,88,209                        // addps         %xmm9,%xmm10
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,91,202                        // cvtdq2ps      %xmm10,%xmm9
-  .byte  68,15,89,13,189,40,0,0              // mulps         0x28bd(%rip),%xmm9        # 50f0 <_sk_callback_sse2+0x645>
-  .byte  68,15,84,21,197,40,0,0              // andps         0x28c5(%rip),%xmm10        # 5100 <_sk_callback_sse2+0x655>
-  .byte  68,15,86,21,205,40,0,0              // orps          0x28cd(%rip),%xmm10        # 5110 <_sk_callback_sse2+0x665>
-  .byte  68,15,88,13,213,40,0,0              // addps         0x28d5(%rip),%xmm9        # 5120 <_sk_callback_sse2+0x675>
-  .byte  68,15,40,37,221,40,0,0              // movaps        0x28dd(%rip),%xmm12        # 5130 <_sk_callback_sse2+0x685>
+  .byte  68,15,89,13,125,41,0,0              // mulps         0x297d(%rip),%xmm9        # 51b0 <_sk_callback_sse2+0x64b>
+  .byte  68,15,84,21,133,41,0,0              // andps         0x2985(%rip),%xmm10        # 51c0 <_sk_callback_sse2+0x65b>
+  .byte  68,15,86,21,141,41,0,0              // orps          0x298d(%rip),%xmm10        # 51d0 <_sk_callback_sse2+0x66b>
+  .byte  68,15,88,13,149,41,0,0              // addps         0x2995(%rip),%xmm9        # 51e0 <_sk_callback_sse2+0x67b>
+  .byte  68,15,40,37,157,41,0,0              // movaps        0x299d(%rip),%xmm12        # 51f0 <_sk_callback_sse2+0x68b>
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,88,21,221,40,0,0              // addps         0x28dd(%rip),%xmm10        # 5140 <_sk_callback_sse2+0x695>
-  .byte  68,15,40,37,229,40,0,0              // movaps        0x28e5(%rip),%xmm12        # 5150 <_sk_callback_sse2+0x6a5>
+  .byte  68,15,88,21,157,41,0,0              // addps         0x299d(%rip),%xmm10        # 5200 <_sk_callback_sse2+0x69b>
+  .byte  68,15,40,37,165,41,0,0              // movaps        0x29a5(%rip),%xmm12        # 5210 <_sk_callback_sse2+0x6ab>
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
   .byte  69,15,89,203                        // mulps         %xmm11,%xmm9
@@ -29822,22 +30016,22 @@ _sk_parametric_r_sse2:
   .byte  69,15,91,226                        // cvtdq2ps      %xmm10,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,194,236,1                     // cmpltps       %xmm12,%xmm13
-  .byte  68,15,40,21,207,40,0,0              // movaps        0x28cf(%rip),%xmm10        # 5160 <_sk_callback_sse2+0x6b5>
+  .byte  68,15,40,21,143,41,0,0              // movaps        0x298f(%rip),%xmm10        # 5220 <_sk_callback_sse2+0x6bb>
   .byte  69,15,84,234                        // andps         %xmm10,%xmm13
   .byte  69,15,87,219                        // xorps         %xmm11,%xmm11
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,92,236                        // subps         %xmm12,%xmm13
-  .byte  68,15,88,13,195,40,0,0              // addps         0x28c3(%rip),%xmm9        # 5170 <_sk_callback_sse2+0x6c5>
-  .byte  68,15,40,37,203,40,0,0              // movaps        0x28cb(%rip),%xmm12        # 5180 <_sk_callback_sse2+0x6d5>
+  .byte  68,15,88,13,131,41,0,0              // addps         0x2983(%rip),%xmm9        # 5230 <_sk_callback_sse2+0x6cb>
+  .byte  68,15,40,37,139,41,0,0              // movaps        0x298b(%rip),%xmm12        # 5240 <_sk_callback_sse2+0x6db>
   .byte  69,15,89,229                        // mulps         %xmm13,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,40,37,203,40,0,0              // movaps        0x28cb(%rip),%xmm12        # 5190 <_sk_callback_sse2+0x6e5>
+  .byte  68,15,40,37,139,41,0,0              // movaps        0x298b(%rip),%xmm12        # 5250 <_sk_callback_sse2+0x6eb>
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
-  .byte  68,15,40,45,207,40,0,0              // movaps        0x28cf(%rip),%xmm13        # 51a0 <_sk_callback_sse2+0x6f5>
+  .byte  68,15,40,45,143,41,0,0              // movaps        0x298f(%rip),%xmm13        # 5260 <_sk_callback_sse2+0x6fb>
   .byte  69,15,94,236                        // divps         %xmm12,%xmm13
   .byte  69,15,88,233                        // addps         %xmm9,%xmm13
-  .byte  68,15,89,45,207,40,0,0              // mulps         0x28cf(%rip),%xmm13        # 51b0 <_sk_callback_sse2+0x705>
+  .byte  68,15,89,45,143,41,0,0              // mulps         0x298f(%rip),%xmm13        # 5270 <_sk_callback_sse2+0x70b>
   .byte  102,69,15,91,205                    // cvtps2dq      %xmm13,%xmm9
   .byte  243,68,15,16,96,20                  // movss         0x14(%rax),%xmm12
   .byte  69,15,198,228,0                     // shufps        $0x0,%xmm12,%xmm12
@@ -29873,15 +30067,15 @@ _sk_parametric_g_sse2:
   .byte  69,15,88,209                        // addps         %xmm9,%xmm10
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,91,202                        // cvtdq2ps      %xmm10,%xmm9
-  .byte  68,15,89,13,79,40,0,0               // mulps         0x284f(%rip),%xmm9        # 51c0 <_sk_callback_sse2+0x715>
-  .byte  68,15,84,21,87,40,0,0               // andps         0x2857(%rip),%xmm10        # 51d0 <_sk_callback_sse2+0x725>
-  .byte  68,15,86,21,95,40,0,0               // orps          0x285f(%rip),%xmm10        # 51e0 <_sk_callback_sse2+0x735>
-  .byte  68,15,88,13,103,40,0,0              // addps         0x2867(%rip),%xmm9        # 51f0 <_sk_callback_sse2+0x745>
-  .byte  68,15,40,37,111,40,0,0              // movaps        0x286f(%rip),%xmm12        # 5200 <_sk_callback_sse2+0x755>
+  .byte  68,15,89,13,15,41,0,0               // mulps         0x290f(%rip),%xmm9        # 5280 <_sk_callback_sse2+0x71b>
+  .byte  68,15,84,21,23,41,0,0               // andps         0x2917(%rip),%xmm10        # 5290 <_sk_callback_sse2+0x72b>
+  .byte  68,15,86,21,31,41,0,0               // orps          0x291f(%rip),%xmm10        # 52a0 <_sk_callback_sse2+0x73b>
+  .byte  68,15,88,13,39,41,0,0               // addps         0x2927(%rip),%xmm9        # 52b0 <_sk_callback_sse2+0x74b>
+  .byte  68,15,40,37,47,41,0,0               // movaps        0x292f(%rip),%xmm12        # 52c0 <_sk_callback_sse2+0x75b>
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,88,21,111,40,0,0              // addps         0x286f(%rip),%xmm10        # 5210 <_sk_callback_sse2+0x765>
-  .byte  68,15,40,37,119,40,0,0              // movaps        0x2877(%rip),%xmm12        # 5220 <_sk_callback_sse2+0x775>
+  .byte  68,15,88,21,47,41,0,0               // addps         0x292f(%rip),%xmm10        # 52d0 <_sk_callback_sse2+0x76b>
+  .byte  68,15,40,37,55,41,0,0               // movaps        0x2937(%rip),%xmm12        # 52e0 <_sk_callback_sse2+0x77b>
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
   .byte  69,15,89,203                        // mulps         %xmm11,%xmm9
@@ -29889,22 +30083,22 @@ _sk_parametric_g_sse2:
   .byte  69,15,91,226                        // cvtdq2ps      %xmm10,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,194,236,1                     // cmpltps       %xmm12,%xmm13
-  .byte  68,15,40,21,97,40,0,0               // movaps        0x2861(%rip),%xmm10        # 5230 <_sk_callback_sse2+0x785>
+  .byte  68,15,40,21,33,41,0,0               // movaps        0x2921(%rip),%xmm10        # 52f0 <_sk_callback_sse2+0x78b>
   .byte  69,15,84,234                        // andps         %xmm10,%xmm13
   .byte  69,15,87,219                        // xorps         %xmm11,%xmm11
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,92,236                        // subps         %xmm12,%xmm13
-  .byte  68,15,88,13,85,40,0,0               // addps         0x2855(%rip),%xmm9        # 5240 <_sk_callback_sse2+0x795>
-  .byte  68,15,40,37,93,40,0,0               // movaps        0x285d(%rip),%xmm12        # 5250 <_sk_callback_sse2+0x7a5>
+  .byte  68,15,88,13,21,41,0,0               // addps         0x2915(%rip),%xmm9        # 5300 <_sk_callback_sse2+0x79b>
+  .byte  68,15,40,37,29,41,0,0               // movaps        0x291d(%rip),%xmm12        # 5310 <_sk_callback_sse2+0x7ab>
   .byte  69,15,89,229                        // mulps         %xmm13,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,40,37,93,40,0,0               // movaps        0x285d(%rip),%xmm12        # 5260 <_sk_callback_sse2+0x7b5>
+  .byte  68,15,40,37,29,41,0,0               // movaps        0x291d(%rip),%xmm12        # 5320 <_sk_callback_sse2+0x7bb>
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
-  .byte  68,15,40,45,97,40,0,0               // movaps        0x2861(%rip),%xmm13        # 5270 <_sk_callback_sse2+0x7c5>
+  .byte  68,15,40,45,33,41,0,0               // movaps        0x2921(%rip),%xmm13        # 5330 <_sk_callback_sse2+0x7cb>
   .byte  69,15,94,236                        // divps         %xmm12,%xmm13
   .byte  69,15,88,233                        // addps         %xmm9,%xmm13
-  .byte  68,15,89,45,97,40,0,0               // mulps         0x2861(%rip),%xmm13        # 5280 <_sk_callback_sse2+0x7d5>
+  .byte  68,15,89,45,33,41,0,0               // mulps         0x2921(%rip),%xmm13        # 5340 <_sk_callback_sse2+0x7db>
   .byte  102,69,15,91,205                    // cvtps2dq      %xmm13,%xmm9
   .byte  243,68,15,16,96,20                  // movss         0x14(%rax),%xmm12
   .byte  69,15,198,228,0                     // shufps        $0x0,%xmm12,%xmm12
@@ -29940,15 +30134,15 @@ _sk_parametric_b_sse2:
   .byte  69,15,88,209                        // addps         %xmm9,%xmm10
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,91,202                        // cvtdq2ps      %xmm10,%xmm9
-  .byte  68,15,89,13,225,39,0,0              // mulps         0x27e1(%rip),%xmm9        # 5290 <_sk_callback_sse2+0x7e5>
-  .byte  68,15,84,21,233,39,0,0              // andps         0x27e9(%rip),%xmm10        # 52a0 <_sk_callback_sse2+0x7f5>
-  .byte  68,15,86,21,241,39,0,0              // orps          0x27f1(%rip),%xmm10        # 52b0 <_sk_callback_sse2+0x805>
-  .byte  68,15,88,13,249,39,0,0              // addps         0x27f9(%rip),%xmm9        # 52c0 <_sk_callback_sse2+0x815>
-  .byte  68,15,40,37,1,40,0,0                // movaps        0x2801(%rip),%xmm12        # 52d0 <_sk_callback_sse2+0x825>
+  .byte  68,15,89,13,161,40,0,0              // mulps         0x28a1(%rip),%xmm9        # 5350 <_sk_callback_sse2+0x7eb>
+  .byte  68,15,84,21,169,40,0,0              // andps         0x28a9(%rip),%xmm10        # 5360 <_sk_callback_sse2+0x7fb>
+  .byte  68,15,86,21,177,40,0,0              // orps          0x28b1(%rip),%xmm10        # 5370 <_sk_callback_sse2+0x80b>
+  .byte  68,15,88,13,185,40,0,0              // addps         0x28b9(%rip),%xmm9        # 5380 <_sk_callback_sse2+0x81b>
+  .byte  68,15,40,37,193,40,0,0              // movaps        0x28c1(%rip),%xmm12        # 5390 <_sk_callback_sse2+0x82b>
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,88,21,1,40,0,0                // addps         0x2801(%rip),%xmm10        # 52e0 <_sk_callback_sse2+0x835>
-  .byte  68,15,40,37,9,40,0,0                // movaps        0x2809(%rip),%xmm12        # 52f0 <_sk_callback_sse2+0x845>
+  .byte  68,15,88,21,193,40,0,0              // addps         0x28c1(%rip),%xmm10        # 53a0 <_sk_callback_sse2+0x83b>
+  .byte  68,15,40,37,201,40,0,0              // movaps        0x28c9(%rip),%xmm12        # 53b0 <_sk_callback_sse2+0x84b>
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
   .byte  69,15,89,203                        // mulps         %xmm11,%xmm9
@@ -29956,22 +30150,22 @@ _sk_parametric_b_sse2:
   .byte  69,15,91,226                        // cvtdq2ps      %xmm10,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,194,236,1                     // cmpltps       %xmm12,%xmm13
-  .byte  68,15,40,21,243,39,0,0              // movaps        0x27f3(%rip),%xmm10        # 5300 <_sk_callback_sse2+0x855>
+  .byte  68,15,40,21,179,40,0,0              // movaps        0x28b3(%rip),%xmm10        # 53c0 <_sk_callback_sse2+0x85b>
   .byte  69,15,84,234                        // andps         %xmm10,%xmm13
   .byte  69,15,87,219                        // xorps         %xmm11,%xmm11
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,92,236                        // subps         %xmm12,%xmm13
-  .byte  68,15,88,13,231,39,0,0              // addps         0x27e7(%rip),%xmm9        # 5310 <_sk_callback_sse2+0x865>
-  .byte  68,15,40,37,239,39,0,0              // movaps        0x27ef(%rip),%xmm12        # 5320 <_sk_callback_sse2+0x875>
+  .byte  68,15,88,13,167,40,0,0              // addps         0x28a7(%rip),%xmm9        # 53d0 <_sk_callback_sse2+0x86b>
+  .byte  68,15,40,37,175,40,0,0              // movaps        0x28af(%rip),%xmm12        # 53e0 <_sk_callback_sse2+0x87b>
   .byte  69,15,89,229                        // mulps         %xmm13,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,40,37,239,39,0,0              // movaps        0x27ef(%rip),%xmm12        # 5330 <_sk_callback_sse2+0x885>
+  .byte  68,15,40,37,175,40,0,0              // movaps        0x28af(%rip),%xmm12        # 53f0 <_sk_callback_sse2+0x88b>
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
-  .byte  68,15,40,45,243,39,0,0              // movaps        0x27f3(%rip),%xmm13        # 5340 <_sk_callback_sse2+0x895>
+  .byte  68,15,40,45,179,40,0,0              // movaps        0x28b3(%rip),%xmm13        # 5400 <_sk_callback_sse2+0x89b>
   .byte  69,15,94,236                        // divps         %xmm12,%xmm13
   .byte  69,15,88,233                        // addps         %xmm9,%xmm13
-  .byte  68,15,89,45,243,39,0,0              // mulps         0x27f3(%rip),%xmm13        # 5350 <_sk_callback_sse2+0x8a5>
+  .byte  68,15,89,45,179,40,0,0              // mulps         0x28b3(%rip),%xmm13        # 5410 <_sk_callback_sse2+0x8ab>
   .byte  102,69,15,91,205                    // cvtps2dq      %xmm13,%xmm9
   .byte  243,68,15,16,96,20                  // movss         0x14(%rax),%xmm12
   .byte  69,15,198,228,0                     // shufps        $0x0,%xmm12,%xmm12
@@ -30007,15 +30201,15 @@ _sk_parametric_a_sse2:
   .byte  69,15,88,209                        // addps         %xmm9,%xmm10
   .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
   .byte  69,15,91,202                        // cvtdq2ps      %xmm10,%xmm9
-  .byte  68,15,89,13,115,39,0,0              // mulps         0x2773(%rip),%xmm9        # 5360 <_sk_callback_sse2+0x8b5>
-  .byte  68,15,84,21,123,39,0,0              // andps         0x277b(%rip),%xmm10        # 5370 <_sk_callback_sse2+0x8c5>
-  .byte  68,15,86,21,131,39,0,0              // orps          0x2783(%rip),%xmm10        # 5380 <_sk_callback_sse2+0x8d5>
-  .byte  68,15,88,13,139,39,0,0              // addps         0x278b(%rip),%xmm9        # 5390 <_sk_callback_sse2+0x8e5>
-  .byte  68,15,40,37,147,39,0,0              // movaps        0x2793(%rip),%xmm12        # 53a0 <_sk_callback_sse2+0x8f5>
+  .byte  68,15,89,13,51,40,0,0               // mulps         0x2833(%rip),%xmm9        # 5420 <_sk_callback_sse2+0x8bb>
+  .byte  68,15,84,21,59,40,0,0               // andps         0x283b(%rip),%xmm10        # 5430 <_sk_callback_sse2+0x8cb>
+  .byte  68,15,86,21,67,40,0,0               // orps          0x2843(%rip),%xmm10        # 5440 <_sk_callback_sse2+0x8db>
+  .byte  68,15,88,13,75,40,0,0               // addps         0x284b(%rip),%xmm9        # 5450 <_sk_callback_sse2+0x8eb>
+  .byte  68,15,40,37,83,40,0,0               // movaps        0x2853(%rip),%xmm12        # 5460 <_sk_callback_sse2+0x8fb>
   .byte  69,15,89,226                        // mulps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,88,21,147,39,0,0              // addps         0x2793(%rip),%xmm10        # 53b0 <_sk_callback_sse2+0x905>
-  .byte  68,15,40,37,155,39,0,0              // movaps        0x279b(%rip),%xmm12        # 53c0 <_sk_callback_sse2+0x915>
+  .byte  68,15,88,21,83,40,0,0               // addps         0x2853(%rip),%xmm10        # 5470 <_sk_callback_sse2+0x90b>
+  .byte  68,15,40,37,91,40,0,0               // movaps        0x285b(%rip),%xmm12        # 5480 <_sk_callback_sse2+0x91b>
   .byte  69,15,94,226                        // divps         %xmm10,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
   .byte  69,15,89,203                        // mulps         %xmm11,%xmm9
@@ -30023,22 +30217,22 @@ _sk_parametric_a_sse2:
   .byte  69,15,91,226                        // cvtdq2ps      %xmm10,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,194,236,1                     // cmpltps       %xmm12,%xmm13
-  .byte  68,15,40,21,133,39,0,0              // movaps        0x2785(%rip),%xmm10        # 53d0 <_sk_callback_sse2+0x925>
+  .byte  68,15,40,21,69,40,0,0               // movaps        0x2845(%rip),%xmm10        # 5490 <_sk_callback_sse2+0x92b>
   .byte  69,15,84,234                        // andps         %xmm10,%xmm13
   .byte  69,15,87,219                        // xorps         %xmm11,%xmm11
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
   .byte  69,15,40,233                        // movaps        %xmm9,%xmm13
   .byte  69,15,92,236                        // subps         %xmm12,%xmm13
-  .byte  68,15,88,13,121,39,0,0              // addps         0x2779(%rip),%xmm9        # 53e0 <_sk_callback_sse2+0x935>
-  .byte  68,15,40,37,129,39,0,0              // movaps        0x2781(%rip),%xmm12        # 53f0 <_sk_callback_sse2+0x945>
+  .byte  68,15,88,13,57,40,0,0               // addps         0x2839(%rip),%xmm9        # 54a0 <_sk_callback_sse2+0x93b>
+  .byte  68,15,40,37,65,40,0,0               // movaps        0x2841(%rip),%xmm12        # 54b0 <_sk_callback_sse2+0x94b>
   .byte  69,15,89,229                        // mulps         %xmm13,%xmm12
   .byte  69,15,92,204                        // subps         %xmm12,%xmm9
-  .byte  68,15,40,37,129,39,0,0              // movaps        0x2781(%rip),%xmm12        # 5400 <_sk_callback_sse2+0x955>
+  .byte  68,15,40,37,65,40,0,0               // movaps        0x2841(%rip),%xmm12        # 54c0 <_sk_callback_sse2+0x95b>
   .byte  69,15,92,229                        // subps         %xmm13,%xmm12
-  .byte  68,15,40,45,133,39,0,0              // movaps        0x2785(%rip),%xmm13        # 5410 <_sk_callback_sse2+0x965>
+  .byte  68,15,40,45,69,40,0,0               // movaps        0x2845(%rip),%xmm13        # 54d0 <_sk_callback_sse2+0x96b>
   .byte  69,15,94,236                        // divps         %xmm12,%xmm13
   .byte  69,15,88,233                        // addps         %xmm9,%xmm13
-  .byte  68,15,89,45,133,39,0,0              // mulps         0x2785(%rip),%xmm13        # 5420 <_sk_callback_sse2+0x975>
+  .byte  68,15,89,45,69,40,0,0               // mulps         0x2845(%rip),%xmm13        # 54e0 <_sk_callback_sse2+0x97b>
   .byte  102,69,15,91,205                    // cvtps2dq      %xmm13,%xmm9
   .byte  243,68,15,16,96,20                  // movss         0x14(%rax),%xmm12
   .byte  69,15,198,228,0                     // shufps        $0x0,%xmm12,%xmm12
@@ -30055,29 +30249,29 @@ HIDDEN _sk_lab_to_xyz_sse2
 .globl _sk_lab_to_xyz_sse2
 FUNCTION(_sk_lab_to_xyz_sse2)
 _sk_lab_to_xyz_sse2:
-  .byte  15,89,5,98,39,0,0                   // mulps         0x2762(%rip),%xmm0        # 5430 <_sk_callback_sse2+0x985>
-  .byte  68,15,40,5,106,39,0,0               // movaps        0x276a(%rip),%xmm8        # 5440 <_sk_callback_sse2+0x995>
+  .byte  15,89,5,34,40,0,0                   // mulps         0x2822(%rip),%xmm0        # 54f0 <_sk_callback_sse2+0x98b>
+  .byte  68,15,40,5,42,40,0,0                // movaps        0x282a(%rip),%xmm8        # 5500 <_sk_callback_sse2+0x99b>
   .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
-  .byte  68,15,40,13,110,39,0,0              // movaps        0x276e(%rip),%xmm9        # 5450 <_sk_callback_sse2+0x9a5>
+  .byte  68,15,40,13,46,40,0,0               // movaps        0x282e(%rip),%xmm9        # 5510 <_sk_callback_sse2+0x9ab>
   .byte  65,15,88,201                        // addps         %xmm9,%xmm1
   .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
   .byte  65,15,88,209                        // addps         %xmm9,%xmm2
-  .byte  15,88,5,107,39,0,0                  // addps         0x276b(%rip),%xmm0        # 5460 <_sk_callback_sse2+0x9b5>
-  .byte  15,89,5,116,39,0,0                  // mulps         0x2774(%rip),%xmm0        # 5470 <_sk_callback_sse2+0x9c5>
-  .byte  15,89,13,125,39,0,0                 // mulps         0x277d(%rip),%xmm1        # 5480 <_sk_callback_sse2+0x9d5>
+  .byte  15,88,5,43,40,0,0                   // addps         0x282b(%rip),%xmm0        # 5520 <_sk_callback_sse2+0x9bb>
+  .byte  15,89,5,52,40,0,0                   // mulps         0x2834(%rip),%xmm0        # 5530 <_sk_callback_sse2+0x9cb>
+  .byte  15,89,13,61,40,0,0                  // mulps         0x283d(%rip),%xmm1        # 5540 <_sk_callback_sse2+0x9db>
   .byte  15,88,200                           // addps         %xmm0,%xmm1
-  .byte  15,89,21,131,39,0,0                 // mulps         0x2783(%rip),%xmm2        # 5490 <_sk_callback_sse2+0x9e5>
+  .byte  15,89,21,67,40,0,0                  // mulps         0x2843(%rip),%xmm2        # 5550 <_sk_callback_sse2+0x9eb>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  68,15,92,202                        // subps         %xmm2,%xmm9
   .byte  68,15,40,225                        // movaps        %xmm1,%xmm12
   .byte  69,15,89,228                        // mulps         %xmm12,%xmm12
   .byte  68,15,89,225                        // mulps         %xmm1,%xmm12
-  .byte  15,40,21,120,39,0,0                 // movaps        0x2778(%rip),%xmm2        # 54a0 <_sk_callback_sse2+0x9f5>
+  .byte  15,40,21,56,40,0,0                  // movaps        0x2838(%rip),%xmm2        # 5560 <_sk_callback_sse2+0x9fb>
   .byte  68,15,40,194                        // movaps        %xmm2,%xmm8
   .byte  69,15,194,196,1                     // cmpltps       %xmm12,%xmm8
-  .byte  68,15,40,21,119,39,0,0              // movaps        0x2777(%rip),%xmm10        # 54b0 <_sk_callback_sse2+0xa05>
+  .byte  68,15,40,21,55,40,0,0               // movaps        0x2837(%rip),%xmm10        # 5570 <_sk_callback_sse2+0xa0b>
   .byte  65,15,88,202                        // addps         %xmm10,%xmm1
-  .byte  68,15,40,29,123,39,0,0              // movaps        0x277b(%rip),%xmm11        # 54c0 <_sk_callback_sse2+0xa15>
+  .byte  68,15,40,29,59,40,0,0               // movaps        0x283b(%rip),%xmm11        # 5580 <_sk_callback_sse2+0xa1b>
   .byte  65,15,89,203                        // mulps         %xmm11,%xmm1
   .byte  69,15,84,224                        // andps         %xmm8,%xmm12
   .byte  68,15,85,193                        // andnps        %xmm1,%xmm8
@@ -30101,8 +30295,8 @@ _sk_lab_to_xyz_sse2:
   .byte  15,84,194                           // andps         %xmm2,%xmm0
   .byte  65,15,85,209                        // andnps        %xmm9,%xmm2
   .byte  15,86,208                           // orps          %xmm0,%xmm2
-  .byte  68,15,89,5,43,39,0,0                // mulps         0x272b(%rip),%xmm8        # 54d0 <_sk_callback_sse2+0xa25>
-  .byte  15,89,21,52,39,0,0                  // mulps         0x2734(%rip),%xmm2        # 54e0 <_sk_callback_sse2+0xa35>
+  .byte  68,15,89,5,235,39,0,0               // mulps         0x27eb(%rip),%xmm8        # 5590 <_sk_callback_sse2+0xa2b>
+  .byte  15,89,21,244,39,0,0                 // mulps         0x27f4(%rip),%xmm2        # 55a0 <_sk_callback_sse2+0xa3b>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  65,15,40,192                        // movaps        %xmm8,%xmm0
   .byte  255,224                             // jmpq          *%rax
@@ -30118,7 +30312,7 @@ _sk_load_a8_sse2:
   .byte  102,15,96,193                       // punpcklbw     %xmm1,%xmm0
   .byte  102,15,97,193                       // punpcklwd     %xmm1,%xmm0
   .byte  15,91,216                           // cvtdq2ps      %xmm0,%xmm3
-  .byte  15,89,29,28,39,0,0                  // mulps         0x271c(%rip),%xmm3        # 54f0 <_sk_callback_sse2+0xa45>
+  .byte  15,89,29,220,39,0,0                 // mulps         0x27dc(%rip),%xmm3        # 55b0 <_sk_callback_sse2+0xa4b>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
@@ -30163,7 +30357,7 @@ _sk_gather_a8_sse2:
   .byte  102,15,96,193                       // punpcklbw     %xmm1,%xmm0
   .byte  102,15,97,193                       // punpcklwd     %xmm1,%xmm0
   .byte  15,91,216                           // cvtdq2ps      %xmm0,%xmm3
-  .byte  15,89,29,139,38,0,0                 // mulps         0x268b(%rip),%xmm3        # 5500 <_sk_callback_sse2+0xa55>
+  .byte  15,89,29,75,39,0,0                  // mulps         0x274b(%rip),%xmm3        # 55c0 <_sk_callback_sse2+0xa5b>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
@@ -30176,7 +30370,7 @@ FUNCTION(_sk_store_a8_sse2)
 _sk_store_a8_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,127,38,0,0               // movaps        0x267f(%rip),%xmm8        # 5510 <_sk_callback_sse2+0xa65>
+  .byte  68,15,40,5,63,39,0,0                // movaps        0x273f(%rip),%xmm8        # 55d0 <_sk_callback_sse2+0xa6b>
   .byte  68,15,89,195                        // mulps         %xmm3,%xmm8
   .byte  102,69,15,91,192                    // cvtps2dq      %xmm8,%xmm8
   .byte  102,65,15,114,240,16                // pslld         $0x10,%xmm8
@@ -30198,9 +30392,9 @@ _sk_load_g8_sse2:
   .byte  102,15,96,193                       // punpcklbw     %xmm1,%xmm0
   .byte  102,15,97,193                       // punpcklwd     %xmm1,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,70,38,0,0                   // mulps         0x2646(%rip),%xmm0        # 5520 <_sk_callback_sse2+0xa75>
+  .byte  15,89,5,6,39,0,0                    // mulps         0x2706(%rip),%xmm0        # 55e0 <_sk_callback_sse2+0xa7b>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,77,38,0,0                  // movaps        0x264d(%rip),%xmm3        # 5530 <_sk_callback_sse2+0xa85>
+  .byte  15,40,29,13,39,0,0                  // movaps        0x270d(%rip),%xmm3        # 55f0 <_sk_callback_sse2+0xa8b>
   .byte  15,40,200                           // movaps        %xmm0,%xmm1
   .byte  15,40,208                           // movaps        %xmm0,%xmm2
   .byte  255,224                             // jmpq          *%rax
@@ -30243,9 +30437,9 @@ _sk_gather_g8_sse2:
   .byte  102,15,96,193                       // punpcklbw     %xmm1,%xmm0
   .byte  102,15,97,193                       // punpcklwd     %xmm1,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,194,37,0,0                  // mulps         0x25c2(%rip),%xmm0        # 5540 <_sk_callback_sse2+0xa95>
+  .byte  15,89,5,130,38,0,0                  // mulps         0x2682(%rip),%xmm0        # 5600 <_sk_callback_sse2+0xa9b>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,201,37,0,0                 // movaps        0x25c9(%rip),%xmm3        # 5550 <_sk_callback_sse2+0xaa5>
+  .byte  15,40,29,137,38,0,0                 // movaps        0x2689(%rip),%xmm3        # 5610 <_sk_callback_sse2+0xaab>
   .byte  15,40,200                           // movaps        %xmm0,%xmm1
   .byte  15,40,208                           // movaps        %xmm0,%xmm2
   .byte  255,224                             // jmpq          *%rax
@@ -30308,11 +30502,11 @@ _sk_gather_i8_sse2:
   .byte  102,67,15,110,12,136                // movd          (%r8,%r9,4),%xmm1
   .byte  102,68,15,98,201                    // punpckldq     %xmm1,%xmm9
   .byte  102,68,15,98,200                    // punpckldq     %xmm0,%xmm9
-  .byte  102,15,111,21,232,36,0,0            // movdqa        0x24e8(%rip),%xmm2        # 5560 <_sk_callback_sse2+0xab5>
+  .byte  102,15,111,21,168,37,0,0            // movdqa        0x25a8(%rip),%xmm2        # 5620 <_sk_callback_sse2+0xabb>
   .byte  102,65,15,111,193                   // movdqa        %xmm9,%xmm0
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,228,36,0,0               // movaps        0x24e4(%rip),%xmm8        # 5570 <_sk_callback_sse2+0xac5>
+  .byte  68,15,40,5,164,37,0,0               // movaps        0x25a4(%rip),%xmm8        # 5630 <_sk_callback_sse2+0xacb>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,65,15,111,201                   // movdqa        %xmm9,%xmm1
   .byte  102,15,114,209,8                    // psrld         $0x8,%xmm1
@@ -30339,19 +30533,19 @@ _sk_load_565_sse2:
   .byte  243,15,126,20,120                   // movq          (%rax,%rdi,2),%xmm2
   .byte  102,15,239,192                      // pxor          %xmm0,%xmm0
   .byte  102,15,97,208                       // punpcklwd     %xmm0,%xmm2
-  .byte  102,15,111,5,154,36,0,0             // movdqa        0x249a(%rip),%xmm0        # 5580 <_sk_callback_sse2+0xad5>
+  .byte  102,15,111,5,90,37,0,0              // movdqa        0x255a(%rip),%xmm0        # 5640 <_sk_callback_sse2+0xadb>
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,156,36,0,0                  // mulps         0x249c(%rip),%xmm0        # 5590 <_sk_callback_sse2+0xae5>
-  .byte  102,15,111,13,164,36,0,0            // movdqa        0x24a4(%rip),%xmm1        # 55a0 <_sk_callback_sse2+0xaf5>
+  .byte  15,89,5,92,37,0,0                   // mulps         0x255c(%rip),%xmm0        # 5650 <_sk_callback_sse2+0xaeb>
+  .byte  102,15,111,13,100,37,0,0            // movdqa        0x2564(%rip),%xmm1        # 5660 <_sk_callback_sse2+0xafb>
   .byte  102,15,219,202                      // pand          %xmm2,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,166,36,0,0                 // mulps         0x24a6(%rip),%xmm1        # 55b0 <_sk_callback_sse2+0xb05>
-  .byte  102,15,219,21,174,36,0,0            // pand          0x24ae(%rip),%xmm2        # 55c0 <_sk_callback_sse2+0xb15>
+  .byte  15,89,13,102,37,0,0                 // mulps         0x2566(%rip),%xmm1        # 5670 <_sk_callback_sse2+0xb0b>
+  .byte  102,15,219,21,110,37,0,0            // pand          0x256e(%rip),%xmm2        # 5680 <_sk_callback_sse2+0xb1b>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,180,36,0,0                 // mulps         0x24b4(%rip),%xmm2        # 55d0 <_sk_callback_sse2+0xb25>
+  .byte  15,89,21,116,37,0,0                 // mulps         0x2574(%rip),%xmm2        # 5690 <_sk_callback_sse2+0xb2b>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,187,36,0,0                 // movaps        0x24bb(%rip),%xmm3        # 55e0 <_sk_callback_sse2+0xb35>
+  .byte  15,40,29,123,37,0,0                 // movaps        0x257b(%rip),%xmm3        # 56a0 <_sk_callback_sse2+0xb3b>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_gather_565_sse2
@@ -30386,19 +30580,19 @@ _sk_gather_565_sse2:
   .byte  102,15,196,208,3                    // pinsrw        $0x3,%eax,%xmm2
   .byte  102,15,239,192                      // pxor          %xmm0,%xmm0
   .byte  102,15,97,208                       // punpcklwd     %xmm0,%xmm2
-  .byte  102,15,111,5,68,36,0,0              // movdqa        0x2444(%rip),%xmm0        # 55f0 <_sk_callback_sse2+0xb45>
+  .byte  102,15,111,5,4,37,0,0               // movdqa        0x2504(%rip),%xmm0        # 56b0 <_sk_callback_sse2+0xb4b>
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,70,36,0,0                   // mulps         0x2446(%rip),%xmm0        # 5600 <_sk_callback_sse2+0xb55>
-  .byte  102,15,111,13,78,36,0,0             // movdqa        0x244e(%rip),%xmm1        # 5610 <_sk_callback_sse2+0xb65>
+  .byte  15,89,5,6,37,0,0                    // mulps         0x2506(%rip),%xmm0        # 56c0 <_sk_callback_sse2+0xb5b>
+  .byte  102,15,111,13,14,37,0,0             // movdqa        0x250e(%rip),%xmm1        # 56d0 <_sk_callback_sse2+0xb6b>
   .byte  102,15,219,202                      // pand          %xmm2,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,80,36,0,0                  // mulps         0x2450(%rip),%xmm1        # 5620 <_sk_callback_sse2+0xb75>
-  .byte  102,15,219,21,88,36,0,0             // pand          0x2458(%rip),%xmm2        # 5630 <_sk_callback_sse2+0xb85>
+  .byte  15,89,13,16,37,0,0                  // mulps         0x2510(%rip),%xmm1        # 56e0 <_sk_callback_sse2+0xb7b>
+  .byte  102,15,219,21,24,37,0,0             // pand          0x2518(%rip),%xmm2        # 56f0 <_sk_callback_sse2+0xb8b>
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,94,36,0,0                  // mulps         0x245e(%rip),%xmm2        # 5640 <_sk_callback_sse2+0xb95>
+  .byte  15,89,21,30,37,0,0                  // mulps         0x251e(%rip),%xmm2        # 5700 <_sk_callback_sse2+0xb9b>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,101,36,0,0                 // movaps        0x2465(%rip),%xmm3        # 5650 <_sk_callback_sse2+0xba5>
+  .byte  15,40,29,37,37,0,0                  // movaps        0x2525(%rip),%xmm3        # 5710 <_sk_callback_sse2+0xbab>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_store_565_sse2
@@ -30407,12 +30601,12 @@ FUNCTION(_sk_store_565_sse2)
 _sk_store_565_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,102,36,0,0               // movaps        0x2466(%rip),%xmm8        # 5660 <_sk_callback_sse2+0xbb5>
+  .byte  68,15,40,5,38,37,0,0                // movaps        0x2526(%rip),%xmm8        # 5720 <_sk_callback_sse2+0xbbb>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
   .byte  102,65,15,114,241,11                // pslld         $0xb,%xmm9
-  .byte  68,15,40,21,91,36,0,0               // movaps        0x245b(%rip),%xmm10        # 5670 <_sk_callback_sse2+0xbc5>
+  .byte  68,15,40,21,27,37,0,0               // movaps        0x251b(%rip),%xmm10        # 5730 <_sk_callback_sse2+0xbcb>
   .byte  68,15,89,209                        // mulps         %xmm1,%xmm10
   .byte  102,69,15,91,210                    // cvtps2dq      %xmm10,%xmm10
   .byte  102,65,15,114,242,5                 // pslld         $0x5,%xmm10
@@ -30436,21 +30630,21 @@ _sk_load_4444_sse2:
   .byte  243,15,126,28,120                   // movq          (%rax,%rdi,2),%xmm3
   .byte  102,15,239,192                      // pxor          %xmm0,%xmm0
   .byte  102,15,97,216                       // punpcklwd     %xmm0,%xmm3
-  .byte  102,15,111,5,20,36,0,0              // movdqa        0x2414(%rip),%xmm0        # 5680 <_sk_callback_sse2+0xbd5>
+  .byte  102,15,111,5,212,36,0,0             // movdqa        0x24d4(%rip),%xmm0        # 5740 <_sk_callback_sse2+0xbdb>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,22,36,0,0                   // mulps         0x2416(%rip),%xmm0        # 5690 <_sk_callback_sse2+0xbe5>
-  .byte  102,15,111,13,30,36,0,0             // movdqa        0x241e(%rip),%xmm1        # 56a0 <_sk_callback_sse2+0xbf5>
+  .byte  15,89,5,214,36,0,0                  // mulps         0x24d6(%rip),%xmm0        # 5750 <_sk_callback_sse2+0xbeb>
+  .byte  102,15,111,13,222,36,0,0            // movdqa        0x24de(%rip),%xmm1        # 5760 <_sk_callback_sse2+0xbfb>
   .byte  102,15,219,203                      // pand          %xmm3,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,32,36,0,0                  // mulps         0x2420(%rip),%xmm1        # 56b0 <_sk_callback_sse2+0xc05>
-  .byte  102,15,111,21,40,36,0,0             // movdqa        0x2428(%rip),%xmm2        # 56c0 <_sk_callback_sse2+0xc15>
+  .byte  15,89,13,224,36,0,0                 // mulps         0x24e0(%rip),%xmm1        # 5770 <_sk_callback_sse2+0xc0b>
+  .byte  102,15,111,21,232,36,0,0            // movdqa        0x24e8(%rip),%xmm2        # 5780 <_sk_callback_sse2+0xc1b>
   .byte  102,15,219,211                      // pand          %xmm3,%xmm2
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,42,36,0,0                  // mulps         0x242a(%rip),%xmm2        # 56d0 <_sk_callback_sse2+0xc25>
-  .byte  102,15,219,29,50,36,0,0             // pand          0x2432(%rip),%xmm3        # 56e0 <_sk_callback_sse2+0xc35>
+  .byte  15,89,21,234,36,0,0                 // mulps         0x24ea(%rip),%xmm2        # 5790 <_sk_callback_sse2+0xc2b>
+  .byte  102,15,219,29,242,36,0,0            // pand          0x24f2(%rip),%xmm3        # 57a0 <_sk_callback_sse2+0xc3b>
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,56,36,0,0                  // mulps         0x2438(%rip),%xmm3        # 56f0 <_sk_callback_sse2+0xc45>
+  .byte  15,89,29,248,36,0,0                 // mulps         0x24f8(%rip),%xmm3        # 57b0 <_sk_callback_sse2+0xc4b>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -30486,21 +30680,21 @@ _sk_gather_4444_sse2:
   .byte  102,15,196,216,3                    // pinsrw        $0x3,%eax,%xmm3
   .byte  102,15,239,192                      // pxor          %xmm0,%xmm0
   .byte  102,15,97,216                       // punpcklwd     %xmm0,%xmm3
-  .byte  102,15,111,5,191,35,0,0             // movdqa        0x23bf(%rip),%xmm0        # 5700 <_sk_callback_sse2+0xc55>
+  .byte  102,15,111,5,127,36,0,0             // movdqa        0x247f(%rip),%xmm0        # 57c0 <_sk_callback_sse2+0xc5b>
   .byte  102,15,219,195                      // pand          %xmm3,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  15,89,5,193,35,0,0                  // mulps         0x23c1(%rip),%xmm0        # 5710 <_sk_callback_sse2+0xc65>
-  .byte  102,15,111,13,201,35,0,0            // movdqa        0x23c9(%rip),%xmm1        # 5720 <_sk_callback_sse2+0xc75>
+  .byte  15,89,5,129,36,0,0                  // mulps         0x2481(%rip),%xmm0        # 57d0 <_sk_callback_sse2+0xc6b>
+  .byte  102,15,111,13,137,36,0,0            // movdqa        0x2489(%rip),%xmm1        # 57e0 <_sk_callback_sse2+0xc7b>
   .byte  102,15,219,203                      // pand          %xmm3,%xmm1
   .byte  15,91,201                           // cvtdq2ps      %xmm1,%xmm1
-  .byte  15,89,13,203,35,0,0                 // mulps         0x23cb(%rip),%xmm1        # 5730 <_sk_callback_sse2+0xc85>
-  .byte  102,15,111,21,211,35,0,0            // movdqa        0x23d3(%rip),%xmm2        # 5740 <_sk_callback_sse2+0xc95>
+  .byte  15,89,13,139,36,0,0                 // mulps         0x248b(%rip),%xmm1        # 57f0 <_sk_callback_sse2+0xc8b>
+  .byte  102,15,111,21,147,36,0,0            // movdqa        0x2493(%rip),%xmm2        # 5800 <_sk_callback_sse2+0xc9b>
   .byte  102,15,219,211                      // pand          %xmm3,%xmm2
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
-  .byte  15,89,21,213,35,0,0                 // mulps         0x23d5(%rip),%xmm2        # 5750 <_sk_callback_sse2+0xca5>
-  .byte  102,15,219,29,221,35,0,0            // pand          0x23dd(%rip),%xmm3        # 5760 <_sk_callback_sse2+0xcb5>
+  .byte  15,89,21,149,36,0,0                 // mulps         0x2495(%rip),%xmm2        # 5810 <_sk_callback_sse2+0xcab>
+  .byte  102,15,219,29,157,36,0,0            // pand          0x249d(%rip),%xmm3        # 5820 <_sk_callback_sse2+0xcbb>
   .byte  15,91,219                           // cvtdq2ps      %xmm3,%xmm3
-  .byte  15,89,29,227,35,0,0                 // mulps         0x23e3(%rip),%xmm3        # 5770 <_sk_callback_sse2+0xcc5>
+  .byte  15,89,29,163,36,0,0                 // mulps         0x24a3(%rip),%xmm3        # 5830 <_sk_callback_sse2+0xccb>
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
 
@@ -30510,7 +30704,7 @@ FUNCTION(_sk_store_4444_sse2)
 _sk_store_4444_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,226,35,0,0               // movaps        0x23e2(%rip),%xmm8        # 5780 <_sk_callback_sse2+0xcd5>
+  .byte  68,15,40,5,162,36,0,0               // movaps        0x24a2(%rip),%xmm8        # 5840 <_sk_callback_sse2+0xcdb>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
@@ -30542,11 +30736,11 @@ _sk_load_8888_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
   .byte  68,15,16,12,184                     // movups        (%rax,%rdi,4),%xmm9
-  .byte  15,40,21,117,35,0,0                 // movaps        0x2375(%rip),%xmm2        # 5790 <_sk_callback_sse2+0xce5>
+  .byte  15,40,21,53,36,0,0                  // movaps        0x2435(%rip),%xmm2        # 5850 <_sk_callback_sse2+0xceb>
   .byte  65,15,40,193                        // movaps        %xmm9,%xmm0
   .byte  15,84,194                           // andps         %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,115,35,0,0               // movaps        0x2373(%rip),%xmm8        # 57a0 <_sk_callback_sse2+0xcf5>
+  .byte  68,15,40,5,51,36,0,0                // movaps        0x2433(%rip),%xmm8        # 5860 <_sk_callback_sse2+0xcfb>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  65,15,40,201                        // movaps        %xmm9,%xmm1
   .byte  102,15,114,209,8                    // psrld         $0x8,%xmm1
@@ -30595,11 +30789,11 @@ _sk_gather_8888_sse2:
   .byte  102,67,15,110,12,129                // movd          (%r9,%r8,4),%xmm1
   .byte  102,68,15,98,201                    // punpckldq     %xmm1,%xmm9
   .byte  102,68,15,98,200                    // punpckldq     %xmm0,%xmm9
-  .byte  102,15,111,21,196,34,0,0            // movdqa        0x22c4(%rip),%xmm2        # 57b0 <_sk_callback_sse2+0xd05>
+  .byte  102,15,111,21,132,35,0,0            // movdqa        0x2384(%rip),%xmm2        # 5870 <_sk_callback_sse2+0xd0b>
   .byte  102,65,15,111,193                   // movdqa        %xmm9,%xmm0
   .byte  102,15,219,194                      // pand          %xmm2,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,5,192,34,0,0               // movaps        0x22c0(%rip),%xmm8        # 57c0 <_sk_callback_sse2+0xd15>
+  .byte  68,15,40,5,128,35,0,0               // movaps        0x2380(%rip),%xmm8        # 5880 <_sk_callback_sse2+0xd1b>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,65,15,111,201                   // movdqa        %xmm9,%xmm1
   .byte  102,15,114,209,8                    // psrld         $0x8,%xmm1
@@ -30623,7 +30817,7 @@ FUNCTION(_sk_store_8888_sse2)
 _sk_store_8888_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,5,131,34,0,0               // movaps        0x2283(%rip),%xmm8        # 57d0 <_sk_callback_sse2+0xd25>
+  .byte  68,15,40,5,67,35,0,0                // movaps        0x2343(%rip),%xmm8        # 5890 <_sk_callback_sse2+0xd2b>
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  102,69,15,91,201                    // cvtps2dq      %xmm9,%xmm9
@@ -30662,7 +30856,7 @@ _sk_load_f16_sse2:
   .byte  102,69,15,239,210                   // pxor          %xmm10,%xmm10
   .byte  102,65,15,111,206                   // movdqa        %xmm14,%xmm1
   .byte  102,65,15,97,202                    // punpcklwd     %xmm10,%xmm1
-  .byte  102,68,15,111,13,243,33,0,0         // movdqa        0x21f3(%rip),%xmm9        # 57e0 <_sk_callback_sse2+0xd35>
+  .byte  102,68,15,111,13,179,34,0,0         // movdqa        0x22b3(%rip),%xmm9        # 58a0 <_sk_callback_sse2+0xd3b>
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,65,15,219,193                   // pand          %xmm9,%xmm0
   .byte  102,15,239,200                      // pxor          %xmm0,%xmm1
@@ -30670,11 +30864,11 @@ _sk_load_f16_sse2:
   .byte  102,68,15,111,233                   // movdqa        %xmm1,%xmm13
   .byte  102,65,15,114,245,13                // pslld         $0xd,%xmm13
   .byte  102,68,15,235,232                   // por           %xmm0,%xmm13
-  .byte  102,68,15,111,29,216,33,0,0         // movdqa        0x21d8(%rip),%xmm11        # 57f0 <_sk_callback_sse2+0xd45>
+  .byte  102,68,15,111,29,152,34,0,0         // movdqa        0x2298(%rip),%xmm11        # 58b0 <_sk_callback_sse2+0xd4b>
   .byte  102,69,15,254,235                   // paddd         %xmm11,%xmm13
-  .byte  102,68,15,111,37,218,33,0,0         // movdqa        0x21da(%rip),%xmm12        # 5800 <_sk_callback_sse2+0xd55>
+  .byte  102,68,15,111,37,154,34,0,0         // movdqa        0x229a(%rip),%xmm12        # 58c0 <_sk_callback_sse2+0xd5b>
   .byte  102,65,15,239,204                   // pxor          %xmm12,%xmm1
-  .byte  102,15,111,29,221,33,0,0            // movdqa        0x21dd(%rip),%xmm3        # 5810 <_sk_callback_sse2+0xd65>
+  .byte  102,15,111,29,157,34,0,0            // movdqa        0x229d(%rip),%xmm3        # 58d0 <_sk_callback_sse2+0xd6b>
   .byte  102,15,111,195                      // movdqa        %xmm3,%xmm0
   .byte  102,15,102,193                      // pcmpgtd       %xmm1,%xmm0
   .byte  102,65,15,223,197                   // pandn         %xmm13,%xmm0
@@ -30760,7 +30954,7 @@ _sk_gather_f16_sse2:
   .byte  102,69,15,239,210                   // pxor          %xmm10,%xmm10
   .byte  102,65,15,111,206                   // movdqa        %xmm14,%xmm1
   .byte  102,65,15,97,202                    // punpcklwd     %xmm10,%xmm1
-  .byte  102,68,15,111,13,107,32,0,0         // movdqa        0x206b(%rip),%xmm9        # 5820 <_sk_callback_sse2+0xd75>
+  .byte  102,68,15,111,13,43,33,0,0          // movdqa        0x212b(%rip),%xmm9        # 58e0 <_sk_callback_sse2+0xd7b>
   .byte  102,15,111,193                      // movdqa        %xmm1,%xmm0
   .byte  102,65,15,219,193                   // pand          %xmm9,%xmm0
   .byte  102,15,239,200                      // pxor          %xmm0,%xmm1
@@ -30768,11 +30962,11 @@ _sk_gather_f16_sse2:
   .byte  102,68,15,111,233                   // movdqa        %xmm1,%xmm13
   .byte  102,65,15,114,245,13                // pslld         $0xd,%xmm13
   .byte  102,68,15,235,232                   // por           %xmm0,%xmm13
-  .byte  102,68,15,111,29,80,32,0,0          // movdqa        0x2050(%rip),%xmm11        # 5830 <_sk_callback_sse2+0xd85>
+  .byte  102,68,15,111,29,16,33,0,0          // movdqa        0x2110(%rip),%xmm11        # 58f0 <_sk_callback_sse2+0xd8b>
   .byte  102,69,15,254,235                   // paddd         %xmm11,%xmm13
-  .byte  102,68,15,111,37,82,32,0,0          // movdqa        0x2052(%rip),%xmm12        # 5840 <_sk_callback_sse2+0xd95>
+  .byte  102,68,15,111,37,18,33,0,0          // movdqa        0x2112(%rip),%xmm12        # 5900 <_sk_callback_sse2+0xd9b>
   .byte  102,65,15,239,204                   // pxor          %xmm12,%xmm1
-  .byte  102,15,111,29,85,32,0,0             // movdqa        0x2055(%rip),%xmm3        # 5850 <_sk_callback_sse2+0xda5>
+  .byte  102,15,111,29,21,33,0,0             // movdqa        0x2115(%rip),%xmm3        # 5910 <_sk_callback_sse2+0xdab>
   .byte  102,15,111,195                      // movdqa        %xmm3,%xmm0
   .byte  102,15,102,193                      // pcmpgtd       %xmm1,%xmm0
   .byte  102,65,15,223,197                   // pandn         %xmm13,%xmm0
@@ -30825,17 +31019,17 @@ FUNCTION(_sk_store_f16_sse2)
 _sk_store_f16_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  102,68,15,111,21,125,31,0,0         // movdqa        0x1f7d(%rip),%xmm10        # 5860 <_sk_callback_sse2+0xdb5>
+  .byte  102,68,15,111,21,61,32,0,0          // movdqa        0x203d(%rip),%xmm10        # 5920 <_sk_callback_sse2+0xdbb>
   .byte  102,68,15,111,224                   // movdqa        %xmm0,%xmm12
   .byte  102,68,15,111,232                   // movdqa        %xmm0,%xmm13
   .byte  102,69,15,219,234                   // pand          %xmm10,%xmm13
   .byte  102,69,15,239,229                   // pxor          %xmm13,%xmm12
-  .byte  102,68,15,111,13,112,31,0,0         // movdqa        0x1f70(%rip),%xmm9        # 5870 <_sk_callback_sse2+0xdc5>
+  .byte  102,68,15,111,13,48,32,0,0          // movdqa        0x2030(%rip),%xmm9        # 5930 <_sk_callback_sse2+0xdcb>
   .byte  102,65,15,114,213,16                // psrld         $0x10,%xmm13
   .byte  102,69,15,111,193                   // movdqa        %xmm9,%xmm8
   .byte  102,69,15,102,196                   // pcmpgtd       %xmm12,%xmm8
   .byte  102,65,15,114,212,13                // psrld         $0xd,%xmm12
-  .byte  102,68,15,111,29,97,31,0,0          // movdqa        0x1f61(%rip),%xmm11        # 5880 <_sk_callback_sse2+0xdd5>
+  .byte  102,68,15,111,29,33,32,0,0          // movdqa        0x2021(%rip),%xmm11        # 5940 <_sk_callback_sse2+0xddb>
   .byte  102,69,15,235,235                   // por           %xmm11,%xmm13
   .byte  102,69,15,254,236                   // paddd         %xmm12,%xmm13
   .byte  102,65,15,114,245,16                // pslld         $0x10,%xmm13
@@ -30914,7 +31108,7 @@ _sk_load_u16_be_sse2:
   .byte  102,69,15,239,201                   // pxor          %xmm9,%xmm9
   .byte  102,65,15,97,201                    // punpcklwd     %xmm9,%xmm1
   .byte  15,91,193                           // cvtdq2ps      %xmm1,%xmm0
-  .byte  68,15,40,5,255,29,0,0               // movaps        0x1dff(%rip),%xmm8        # 5890 <_sk_callback_sse2+0xde5>
+  .byte  68,15,40,5,191,30,0,0               // movaps        0x1ebf(%rip),%xmm8        # 5950 <_sk_callback_sse2+0xdeb>
   .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
   .byte  102,15,113,241,8                    // psllw         $0x8,%xmm1
@@ -30967,7 +31161,7 @@ _sk_load_rgb_u16_be_sse2:
   .byte  102,69,15,239,192                   // pxor          %xmm8,%xmm8
   .byte  102,65,15,97,192                    // punpcklwd     %xmm8,%xmm0
   .byte  15,91,192                           // cvtdq2ps      %xmm0,%xmm0
-  .byte  68,15,40,13,59,29,0,0               // movaps        0x1d3b(%rip),%xmm9        # 58a0 <_sk_callback_sse2+0xdf5>
+  .byte  68,15,40,13,251,29,0,0              // movaps        0x1dfb(%rip),%xmm9        # 5960 <_sk_callback_sse2+0xdfb>
   .byte  65,15,89,193                        // mulps         %xmm9,%xmm0
   .byte  102,15,111,203                      // movdqa        %xmm3,%xmm1
   .byte  102,15,113,241,8                    // psllw         $0x8,%xmm1
@@ -30984,7 +31178,7 @@ _sk_load_rgb_u16_be_sse2:
   .byte  15,91,210                           // cvtdq2ps      %xmm2,%xmm2
   .byte  65,15,89,209                        // mulps         %xmm9,%xmm2
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  15,40,29,2,29,0,0                   // movaps        0x1d02(%rip),%xmm3        # 58b0 <_sk_callback_sse2+0xe05>
+  .byte  15,40,29,194,29,0,0                 // movaps        0x1dc2(%rip),%xmm3        # 5970 <_sk_callback_sse2+0xe0b>
   .byte  255,224                             // jmpq          *%rax
 
 HIDDEN _sk_store_u16_be_sse2
@@ -30993,7 +31187,7 @@ FUNCTION(_sk_store_u16_be_sse2)
 _sk_store_u16_be_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  72,139,0                            // mov           (%rax),%rax
-  .byte  68,15,40,13,3,29,0,0                // movaps        0x1d03(%rip),%xmm9        # 58c0 <_sk_callback_sse2+0xe15>
+  .byte  68,15,40,13,195,29,0,0              // movaps        0x1dc3(%rip),%xmm9        # 5980 <_sk_callback_sse2+0xe1b>
   .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  102,69,15,91,192                    // cvtps2dq      %xmm8,%xmm8
@@ -31139,7 +31333,7 @@ _sk_repeat_x_sse2:
   .byte  243,69,15,91,209                    // cvttps2dq     %xmm9,%xmm10
   .byte  69,15,91,210                        // cvtdq2ps      %xmm10,%xmm10
   .byte  69,15,194,202,1                     // cmpltps       %xmm10,%xmm9
-  .byte  68,15,84,13,3,27,0,0                // andps         0x1b03(%rip),%xmm9        # 58d0 <_sk_callback_sse2+0xe25>
+  .byte  68,15,84,13,195,27,0,0              // andps         0x1bc3(%rip),%xmm9        # 5990 <_sk_callback_sse2+0xe2b>
   .byte  69,15,92,209                        // subps         %xmm9,%xmm10
   .byte  69,15,89,208                        // mulps         %xmm8,%xmm10
   .byte  65,15,92,194                        // subps         %xmm10,%xmm0
@@ -31159,7 +31353,7 @@ _sk_repeat_y_sse2:
   .byte  243,69,15,91,209                    // cvttps2dq     %xmm9,%xmm10
   .byte  69,15,91,210                        // cvtdq2ps      %xmm10,%xmm10
   .byte  69,15,194,202,1                     // cmpltps       %xmm10,%xmm9
-  .byte  68,15,84,13,213,26,0,0              // andps         0x1ad5(%rip),%xmm9        # 58e0 <_sk_callback_sse2+0xe35>
+  .byte  68,15,84,13,149,27,0,0              // andps         0x1b95(%rip),%xmm9        # 59a0 <_sk_callback_sse2+0xe3b>
   .byte  69,15,92,209                        // subps         %xmm9,%xmm10
   .byte  69,15,89,208                        // mulps         %xmm8,%xmm10
   .byte  65,15,92,202                        // subps         %xmm10,%xmm1
@@ -31183,7 +31377,7 @@ _sk_mirror_x_sse2:
   .byte  243,69,15,91,218                    // cvttps2dq     %xmm10,%xmm11
   .byte  69,15,91,219                        // cvtdq2ps      %xmm11,%xmm11
   .byte  69,15,194,211,1                     // cmpltps       %xmm11,%xmm10
-  .byte  68,15,84,21,149,26,0,0              // andps         0x1a95(%rip),%xmm10        # 58f0 <_sk_callback_sse2+0xe45>
+  .byte  68,15,84,21,85,27,0,0               // andps         0x1b55(%rip),%xmm10        # 59b0 <_sk_callback_sse2+0xe4b>
   .byte  69,15,87,228                        // xorps         %xmm12,%xmm12
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
   .byte  69,15,89,216                        // mulps         %xmm8,%xmm11
@@ -31211,7 +31405,7 @@ _sk_mirror_y_sse2:
   .byte  243,69,15,91,218                    // cvttps2dq     %xmm10,%xmm11
   .byte  69,15,91,219                        // cvtdq2ps      %xmm11,%xmm11
   .byte  69,15,194,211,1                     // cmpltps       %xmm11,%xmm10
-  .byte  68,15,84,21,69,26,0,0               // andps         0x1a45(%rip),%xmm10        # 5900 <_sk_callback_sse2+0xe55>
+  .byte  68,15,84,21,5,27,0,0                // andps         0x1b05(%rip),%xmm10        # 59c0 <_sk_callback_sse2+0xe5b>
   .byte  69,15,87,228                        // xorps         %xmm12,%xmm12
   .byte  69,15,92,218                        // subps         %xmm10,%xmm11
   .byte  69,15,89,216                        // mulps         %xmm8,%xmm11
@@ -31228,10 +31422,10 @@ HIDDEN _sk_luminance_to_alpha_sse2
 FUNCTION(_sk_luminance_to_alpha_sse2)
 _sk_luminance_to_alpha_sse2:
   .byte  15,40,218                           // movaps        %xmm2,%xmm3
-  .byte  15,89,5,39,26,0,0                   // mulps         0x1a27(%rip),%xmm0        # 5910 <_sk_callback_sse2+0xe65>
-  .byte  15,89,13,48,26,0,0                  // mulps         0x1a30(%rip),%xmm1        # 5920 <_sk_callback_sse2+0xe75>
+  .byte  15,89,5,231,26,0,0                  // mulps         0x1ae7(%rip),%xmm0        # 59d0 <_sk_callback_sse2+0xe6b>
+  .byte  15,89,13,240,26,0,0                 // mulps         0x1af0(%rip),%xmm1        # 59e0 <_sk_callback_sse2+0xe7b>
   .byte  15,88,200                           // addps         %xmm0,%xmm1
-  .byte  15,89,29,54,26,0,0                  // mulps         0x1a36(%rip),%xmm3        # 5930 <_sk_callback_sse2+0xe85>
+  .byte  15,89,29,246,26,0,0                 // mulps         0x1af6(%rip),%xmm3        # 59f0 <_sk_callback_sse2+0xe8b>
   .byte  15,88,217                           // addps         %xmm1,%xmm3
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,87,192                           // xorps         %xmm0,%xmm0
@@ -31406,6 +31600,56 @@ _sk_matrix_4x5_sse2:
   .byte  65,15,40,219                        // movaps        %xmm11,%xmm3
   .byte  255,224                             // jmpq          *%rax
 
+HIDDEN _sk_matrix_4x3_sse2
+.globl _sk_matrix_4x3_sse2
+FUNCTION(_sk_matrix_4x3_sse2)
+_sk_matrix_4x3_sse2:
+  .byte  68,15,40,201                        // movaps        %xmm1,%xmm9
+  .byte  68,15,40,192                        // movaps        %xmm0,%xmm8
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  243,15,16,0                         // movss         (%rax),%xmm0
+  .byte  243,15,16,72,4                      // movss         0x4(%rax),%xmm1
+  .byte  15,198,192,0                        // shufps        $0x0,%xmm0,%xmm0
+  .byte  243,15,16,80,16                     // movss         0x10(%rax),%xmm2
+  .byte  15,198,210,0                        // shufps        $0x0,%xmm2,%xmm2
+  .byte  243,15,16,88,32                     // movss         0x20(%rax),%xmm3
+  .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
+  .byte  65,15,89,209                        // mulps         %xmm9,%xmm2
+  .byte  15,88,211                           // addps         %xmm3,%xmm2
+  .byte  65,15,89,192                        // mulps         %xmm8,%xmm0
+  .byte  15,88,194                           // addps         %xmm2,%xmm0
+  .byte  15,198,201,0                        // shufps        $0x0,%xmm1,%xmm1
+  .byte  243,15,16,80,20                     // movss         0x14(%rax),%xmm2
+  .byte  15,198,210,0                        // shufps        $0x0,%xmm2,%xmm2
+  .byte  243,15,16,88,36                     // movss         0x24(%rax),%xmm3
+  .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
+  .byte  65,15,89,209                        // mulps         %xmm9,%xmm2
+  .byte  15,88,211                           // addps         %xmm3,%xmm2
+  .byte  65,15,89,200                        // mulps         %xmm8,%xmm1
+  .byte  15,88,202                           // addps         %xmm2,%xmm1
+  .byte  243,15,16,80,8                      // movss         0x8(%rax),%xmm2
+  .byte  15,198,210,0                        // shufps        $0x0,%xmm2,%xmm2
+  .byte  243,15,16,88,24                     // movss         0x18(%rax),%xmm3
+  .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
+  .byte  243,68,15,16,80,40                  // movss         0x28(%rax),%xmm10
+  .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
+  .byte  65,15,89,217                        // mulps         %xmm9,%xmm3
+  .byte  65,15,88,218                        // addps         %xmm10,%xmm3
+  .byte  65,15,89,208                        // mulps         %xmm8,%xmm2
+  .byte  15,88,211                           // addps         %xmm3,%xmm2
+  .byte  243,15,16,88,12                     // movss         0xc(%rax),%xmm3
+  .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
+  .byte  243,68,15,16,80,28                  // movss         0x1c(%rax),%xmm10
+  .byte  69,15,198,210,0                     // shufps        $0x0,%xmm10,%xmm10
+  .byte  243,68,15,16,88,44                  // movss         0x2c(%rax),%xmm11
+  .byte  69,15,198,219,0                     // shufps        $0x0,%xmm11,%xmm11
+  .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
+  .byte  69,15,88,211                        // addps         %xmm11,%xmm10
+  .byte  65,15,89,216                        // mulps         %xmm8,%xmm3
+  .byte  65,15,88,218                        // addps         %xmm10,%xmm3
+  .byte  72,173                              // lods          %ds:(%rsi),%rax
+  .byte  255,224                             // jmpq          *%rax
+
 HIDDEN _sk_matrix_perspective_sse2
 .globl _sk_matrix_perspective_sse2
 FUNCTION(_sk_matrix_perspective_sse2)
@@ -31457,9 +31701,9 @@ _sk_evenly_spaced_gradient_sse2:
   .byte  72,139,8                            // mov           (%rax),%rcx
   .byte  76,139,88,8                         // mov           0x8(%rax),%r11
   .byte  72,255,201                          // dec           %rcx
-  .byte  120,7                               // js            4279 <_sk_evenly_spaced_gradient_sse2+0x15>
+  .byte  120,7                               // js            4333 <_sk_evenly_spaced_gradient_sse2+0x15>
   .byte  243,72,15,42,201                    // cvtsi2ss      %rcx,%xmm1
-  .byte  235,21                              // jmp           428e <_sk_evenly_spaced_gradient_sse2+0x2a>
+  .byte  235,21                              // jmp           4348 <_sk_evenly_spaced_gradient_sse2+0x2a>
   .byte  73,137,200                          // mov           %rcx,%r8
   .byte  73,209,232                          // shr           %r8
   .byte  131,225,1                           // and           $0x1,%ecx
@@ -31559,12 +31803,12 @@ _sk_gradient_sse2:
   .byte  76,139,0                            // mov           (%rax),%r8
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
   .byte  73,131,248,2                        // cmp           $0x2,%r8
-  .byte  114,50                              // jb            4451 <_sk_gradient_sse2+0x41>
+  .byte  114,50                              // jb            450b <_sk_gradient_sse2+0x41>
   .byte  72,139,72,72                        // mov           0x48(%rax),%rcx
   .byte  73,255,200                          // dec           %r8
   .byte  72,131,193,4                        // add           $0x4,%rcx
   .byte  102,15,239,201                      // pxor          %xmm1,%xmm1
-  .byte  15,40,21,11,21,0,0                  // movaps        0x150b(%rip),%xmm2        # 5940 <_sk_callback_sse2+0xe95>
+  .byte  15,40,21,17,21,0,0                  // movaps        0x1511(%rip),%xmm2        # 5a00 <_sk_callback_sse2+0xe9b>
   .byte  243,15,16,25                        // movss         (%rcx),%xmm3
   .byte  15,198,219,0                        // shufps        $0x0,%xmm3,%xmm3
   .byte  15,194,216,2                        // cmpleps       %xmm0,%xmm3
@@ -31572,7 +31816,7 @@ _sk_gradient_sse2:
   .byte  102,15,254,203                      // paddd         %xmm3,%xmm1
   .byte  72,131,193,4                        // add           $0x4,%rcx
   .byte  73,255,200                          // dec           %r8
-  .byte  117,228                             // jne           4435 <_sk_gradient_sse2+0x25>
+  .byte  117,228                             // jne           44ef <_sk_gradient_sse2+0x25>
   .byte  65,86                               // push          %r14
   .byte  83                                  // push          %rbx
   .byte  102,15,112,209,78                   // pshufd        $0x4e,%xmm1,%xmm2
@@ -31712,29 +31956,29 @@ _sk_xy_to_unit_angle_sse2:
   .byte  69,15,94,220                        // divps         %xmm12,%xmm11
   .byte  69,15,40,227                        // movaps        %xmm11,%xmm12
   .byte  69,15,89,228                        // mulps         %xmm12,%xmm12
-  .byte  68,15,40,45,205,18,0,0              // movaps        0x12cd(%rip),%xmm13        # 5950 <_sk_callback_sse2+0xea5>
+  .byte  68,15,40,45,211,18,0,0              // movaps        0x12d3(%rip),%xmm13        # 5a10 <_sk_callback_sse2+0xeab>
   .byte  69,15,89,236                        // mulps         %xmm12,%xmm13
-  .byte  68,15,88,45,209,18,0,0              // addps         0x12d1(%rip),%xmm13        # 5960 <_sk_callback_sse2+0xeb5>
+  .byte  68,15,88,45,215,18,0,0              // addps         0x12d7(%rip),%xmm13        # 5a20 <_sk_callback_sse2+0xebb>
   .byte  69,15,89,236                        // mulps         %xmm12,%xmm13
-  .byte  68,15,88,45,213,18,0,0              // addps         0x12d5(%rip),%xmm13        # 5970 <_sk_callback_sse2+0xec5>
+  .byte  68,15,88,45,219,18,0,0              // addps         0x12db(%rip),%xmm13        # 5a30 <_sk_callback_sse2+0xecb>
   .byte  69,15,89,236                        // mulps         %xmm12,%xmm13
-  .byte  68,15,88,45,217,18,0,0              // addps         0x12d9(%rip),%xmm13        # 5980 <_sk_callback_sse2+0xed5>
+  .byte  68,15,88,45,223,18,0,0              // addps         0x12df(%rip),%xmm13        # 5a40 <_sk_callback_sse2+0xedb>
   .byte  69,15,89,235                        // mulps         %xmm11,%xmm13
   .byte  69,15,194,202,1                     // cmpltps       %xmm10,%xmm9
-  .byte  68,15,40,21,216,18,0,0              // movaps        0x12d8(%rip),%xmm10        # 5990 <_sk_callback_sse2+0xee5>
+  .byte  68,15,40,21,222,18,0,0              // movaps        0x12de(%rip),%xmm10        # 5a50 <_sk_callback_sse2+0xeeb>
   .byte  69,15,92,213                        // subps         %xmm13,%xmm10
   .byte  69,15,84,209                        // andps         %xmm9,%xmm10
   .byte  69,15,85,205                        // andnps        %xmm13,%xmm9
   .byte  69,15,86,202                        // orps          %xmm10,%xmm9
   .byte  68,15,194,192,1                     // cmpltps       %xmm0,%xmm8
-  .byte  68,15,40,21,203,18,0,0              // movaps        0x12cb(%rip),%xmm10        # 59a0 <_sk_callback_sse2+0xef5>
+  .byte  68,15,40,21,209,18,0,0              // movaps        0x12d1(%rip),%xmm10        # 5a60 <_sk_callback_sse2+0xefb>
   .byte  69,15,92,209                        // subps         %xmm9,%xmm10
   .byte  69,15,84,208                        // andps         %xmm8,%xmm10
   .byte  69,15,85,193                        // andnps        %xmm9,%xmm8
   .byte  69,15,86,194                        // orps          %xmm10,%xmm8
   .byte  68,15,40,201                        // movaps        %xmm1,%xmm9
   .byte  68,15,194,200,1                     // cmpltps       %xmm0,%xmm9
-  .byte  68,15,40,21,186,18,0,0              // movaps        0x12ba(%rip),%xmm10        # 59b0 <_sk_callback_sse2+0xf05>
+  .byte  68,15,40,21,192,18,0,0              // movaps        0x12c0(%rip),%xmm10        # 5a70 <_sk_callback_sse2+0xf0b>
   .byte  69,15,92,208                        // subps         %xmm8,%xmm10
   .byte  69,15,84,209                        // andps         %xmm9,%xmm10
   .byte  69,15,85,200                        // andnps        %xmm8,%xmm9
@@ -31761,7 +32005,7 @@ HIDDEN _sk_save_xy_sse2
 FUNCTION(_sk_save_xy_sse2)
 _sk_save_xy_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,140,18,0,0               // movaps        0x128c(%rip),%xmm8        # 59c0 <_sk_callback_sse2+0xf15>
+  .byte  68,15,40,5,146,18,0,0               // movaps        0x1292(%rip),%xmm8        # 5a80 <_sk_callback_sse2+0xf1b>
   .byte  15,17,0                             // movups        %xmm0,(%rax)
   .byte  68,15,40,200                        // movaps        %xmm0,%xmm9
   .byte  69,15,88,200                        // addps         %xmm8,%xmm9
@@ -31769,7 +32013,7 @@ _sk_save_xy_sse2:
   .byte  69,15,91,210                        // cvtdq2ps      %xmm10,%xmm10
   .byte  69,15,40,217                        // movaps        %xmm9,%xmm11
   .byte  69,15,194,218,1                     // cmpltps       %xmm10,%xmm11
-  .byte  68,15,40,37,119,18,0,0              // movaps        0x1277(%rip),%xmm12        # 59d0 <_sk_callback_sse2+0xf25>
+  .byte  68,15,40,37,125,18,0,0              // movaps        0x127d(%rip),%xmm12        # 5a90 <_sk_callback_sse2+0xf2b>
   .byte  69,15,84,220                        // andps         %xmm12,%xmm11
   .byte  69,15,92,211                        // subps         %xmm11,%xmm10
   .byte  69,15,92,202                        // subps         %xmm10,%xmm9
@@ -31816,8 +32060,8 @@ _sk_bilinear_nx_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,240,17,0,0                  // addps         0x11f0(%rip),%xmm0        # 59e0 <_sk_callback_sse2+0xf35>
-  .byte  68,15,40,13,248,17,0,0              // movaps        0x11f8(%rip),%xmm9        # 59f0 <_sk_callback_sse2+0xf45>
+  .byte  15,88,5,246,17,0,0                  // addps         0x11f6(%rip),%xmm0        # 5aa0 <_sk_callback_sse2+0xf3b>
+  .byte  68,15,40,13,254,17,0,0              // movaps        0x11fe(%rip),%xmm9        # 5ab0 <_sk_callback_sse2+0xf4b>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,128,0,0,0              // movups        %xmm9,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -31830,7 +32074,7 @@ _sk_bilinear_px_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,231,17,0,0                  // addps         0x11e7(%rip),%xmm0        # 5a00 <_sk_callback_sse2+0xf55>
+  .byte  15,88,5,237,17,0,0                  // addps         0x11ed(%rip),%xmm0        # 5ac0 <_sk_callback_sse2+0xf5b>
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -31842,8 +32086,8 @@ _sk_bilinear_ny_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,217,17,0,0                 // addps         0x11d9(%rip),%xmm1        # 5a10 <_sk_callback_sse2+0xf65>
-  .byte  68,15,40,13,225,17,0,0              // movaps        0x11e1(%rip),%xmm9        # 5a20 <_sk_callback_sse2+0xf75>
+  .byte  15,88,13,223,17,0,0                 // addps         0x11df(%rip),%xmm1        # 5ad0 <_sk_callback_sse2+0xf6b>
+  .byte  68,15,40,13,231,17,0,0              // movaps        0x11e7(%rip),%xmm9        # 5ae0 <_sk_callback_sse2+0xf7b>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  68,15,17,136,160,0,0,0              // movups        %xmm9,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -31856,7 +32100,7 @@ _sk_bilinear_py_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,207,17,0,0                 // addps         0x11cf(%rip),%xmm1        # 5a30 <_sk_callback_sse2+0xf85>
+  .byte  15,88,13,213,17,0,0                 // addps         0x11d5(%rip),%xmm1        # 5af0 <_sk_callback_sse2+0xf8b>
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -31868,13 +32112,13 @@ _sk_bicubic_n3x_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,194,17,0,0                  // addps         0x11c2(%rip),%xmm0        # 5a40 <_sk_callback_sse2+0xf95>
-  .byte  68,15,40,13,202,17,0,0              // movaps        0x11ca(%rip),%xmm9        # 5a50 <_sk_callback_sse2+0xfa5>
+  .byte  15,88,5,200,17,0,0                  // addps         0x11c8(%rip),%xmm0        # 5b00 <_sk_callback_sse2+0xf9b>
+  .byte  68,15,40,13,208,17,0,0              // movaps        0x11d0(%rip),%xmm9        # 5b10 <_sk_callback_sse2+0xfab>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,198,17,0,0              // mulps         0x11c6(%rip),%xmm9        # 5a60 <_sk_callback_sse2+0xfb5>
-  .byte  68,15,88,13,206,17,0,0              // addps         0x11ce(%rip),%xmm9        # 5a70 <_sk_callback_sse2+0xfc5>
+  .byte  68,15,89,13,204,17,0,0              // mulps         0x11cc(%rip),%xmm9        # 5b20 <_sk_callback_sse2+0xfbb>
+  .byte  68,15,88,13,212,17,0,0              // addps         0x11d4(%rip),%xmm9        # 5b30 <_sk_callback_sse2+0xfcb>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,128,0,0,0              // movups        %xmm9,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -31887,16 +32131,16 @@ _sk_bicubic_n1x_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,189,17,0,0                  // addps         0x11bd(%rip),%xmm0        # 5a80 <_sk_callback_sse2+0xfd5>
-  .byte  68,15,40,13,197,17,0,0              // movaps        0x11c5(%rip),%xmm9        # 5a90 <_sk_callback_sse2+0xfe5>
+  .byte  15,88,5,195,17,0,0                  // addps         0x11c3(%rip),%xmm0        # 5b40 <_sk_callback_sse2+0xfdb>
+  .byte  68,15,40,13,203,17,0,0              // movaps        0x11cb(%rip),%xmm9        # 5b50 <_sk_callback_sse2+0xfeb>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,201,17,0,0               // movaps        0x11c9(%rip),%xmm8        # 5aa0 <_sk_callback_sse2+0xff5>
+  .byte  68,15,40,5,207,17,0,0               // movaps        0x11cf(%rip),%xmm8        # 5b60 <_sk_callback_sse2+0xffb>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,205,17,0,0               // addps         0x11cd(%rip),%xmm8        # 5ab0 <_sk_callback_sse2+0x1005>
+  .byte  68,15,88,5,211,17,0,0               // addps         0x11d3(%rip),%xmm8        # 5b70 <_sk_callback_sse2+0x100b>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,209,17,0,0               // addps         0x11d1(%rip),%xmm8        # 5ac0 <_sk_callback_sse2+0x1015>
+  .byte  68,15,88,5,215,17,0,0               // addps         0x11d7(%rip),%xmm8        # 5b80 <_sk_callback_sse2+0x101b>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,213,17,0,0               // addps         0x11d5(%rip),%xmm8        # 5ad0 <_sk_callback_sse2+0x1025>
+  .byte  68,15,88,5,219,17,0,0               // addps         0x11db(%rip),%xmm8        # 5b90 <_sk_callback_sse2+0x102b>
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -31906,17 +32150,17 @@ HIDDEN _sk_bicubic_p1x_sse2
 FUNCTION(_sk_bicubic_p1x_sse2)
 _sk_bicubic_p1x_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,207,17,0,0               // movaps        0x11cf(%rip),%xmm8        # 5ae0 <_sk_callback_sse2+0x1035>
+  .byte  68,15,40,5,213,17,0,0               // movaps        0x11d5(%rip),%xmm8        # 5ba0 <_sk_callback_sse2+0x103b>
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,72,64                      // movups        0x40(%rax),%xmm9
   .byte  65,15,88,192                        // addps         %xmm8,%xmm0
-  .byte  68,15,40,21,203,17,0,0              // movaps        0x11cb(%rip),%xmm10        # 5af0 <_sk_callback_sse2+0x1045>
+  .byte  68,15,40,21,209,17,0,0              // movaps        0x11d1(%rip),%xmm10        # 5bb0 <_sk_callback_sse2+0x104b>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,207,17,0,0              // addps         0x11cf(%rip),%xmm10        # 5b00 <_sk_callback_sse2+0x1055>
+  .byte  68,15,88,21,213,17,0,0              // addps         0x11d5(%rip),%xmm10        # 5bc0 <_sk_callback_sse2+0x105b>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,203,17,0,0              // addps         0x11cb(%rip),%xmm10        # 5b10 <_sk_callback_sse2+0x1065>
+  .byte  68,15,88,21,209,17,0,0              // addps         0x11d1(%rip),%xmm10        # 5bd0 <_sk_callback_sse2+0x106b>
   .byte  68,15,17,144,128,0,0,0              // movups        %xmm10,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -31928,11 +32172,11 @@ _sk_bicubic_p3x_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,0                             // movups        (%rax),%xmm0
   .byte  68,15,16,64,64                      // movups        0x40(%rax),%xmm8
-  .byte  15,88,5,190,17,0,0                  // addps         0x11be(%rip),%xmm0        # 5b20 <_sk_callback_sse2+0x1075>
+  .byte  15,88,5,196,17,0,0                  // addps         0x11c4(%rip),%xmm0        # 5be0 <_sk_callback_sse2+0x107b>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,190,17,0,0               // mulps         0x11be(%rip),%xmm8        # 5b30 <_sk_callback_sse2+0x1085>
-  .byte  68,15,88,5,198,17,0,0               // addps         0x11c6(%rip),%xmm8        # 5b40 <_sk_callback_sse2+0x1095>
+  .byte  68,15,89,5,196,17,0,0               // mulps         0x11c4(%rip),%xmm8        # 5bf0 <_sk_callback_sse2+0x108b>
+  .byte  68,15,88,5,204,17,0,0               // addps         0x11cc(%rip),%xmm8        # 5c00 <_sk_callback_sse2+0x109b>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,128,0,0,0              // movups        %xmm8,0x80(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -31945,13 +32189,13 @@ _sk_bicubic_n3y_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,180,17,0,0                 // addps         0x11b4(%rip),%xmm1        # 5b50 <_sk_callback_sse2+0x10a5>
-  .byte  68,15,40,13,188,17,0,0              // movaps        0x11bc(%rip),%xmm9        # 5b60 <_sk_callback_sse2+0x10b5>
+  .byte  15,88,13,186,17,0,0                 // addps         0x11ba(%rip),%xmm1        # 5c10 <_sk_callback_sse2+0x10ab>
+  .byte  68,15,40,13,194,17,0,0              // movaps        0x11c2(%rip),%xmm9        # 5c20 <_sk_callback_sse2+0x10bb>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
   .byte  69,15,40,193                        // movaps        %xmm9,%xmm8
   .byte  69,15,89,192                        // mulps         %xmm8,%xmm8
-  .byte  68,15,89,13,184,17,0,0              // mulps         0x11b8(%rip),%xmm9        # 5b70 <_sk_callback_sse2+0x10c5>
-  .byte  68,15,88,13,192,17,0,0              // addps         0x11c0(%rip),%xmm9        # 5b80 <_sk_callback_sse2+0x10d5>
+  .byte  68,15,89,13,190,17,0,0              // mulps         0x11be(%rip),%xmm9        # 5c30 <_sk_callback_sse2+0x10cb>
+  .byte  68,15,88,13,198,17,0,0              // addps         0x11c6(%rip),%xmm9        # 5c40 <_sk_callback_sse2+0x10db>
   .byte  69,15,89,200                        // mulps         %xmm8,%xmm9
   .byte  68,15,17,136,160,0,0,0              // movups        %xmm9,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -31964,16 +32208,16 @@ _sk_bicubic_n1y_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,174,17,0,0                 // addps         0x11ae(%rip),%xmm1        # 5b90 <_sk_callback_sse2+0x10e5>
-  .byte  68,15,40,13,182,17,0,0              // movaps        0x11b6(%rip),%xmm9        # 5ba0 <_sk_callback_sse2+0x10f5>
+  .byte  15,88,13,180,17,0,0                 // addps         0x11b4(%rip),%xmm1        # 5c50 <_sk_callback_sse2+0x10eb>
+  .byte  68,15,40,13,188,17,0,0              // movaps        0x11bc(%rip),%xmm9        # 5c60 <_sk_callback_sse2+0x10fb>
   .byte  69,15,92,200                        // subps         %xmm8,%xmm9
-  .byte  68,15,40,5,186,17,0,0               // movaps        0x11ba(%rip),%xmm8        # 5bb0 <_sk_callback_sse2+0x1105>
+  .byte  68,15,40,5,192,17,0,0               // movaps        0x11c0(%rip),%xmm8        # 5c70 <_sk_callback_sse2+0x110b>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,190,17,0,0               // addps         0x11be(%rip),%xmm8        # 5bc0 <_sk_callback_sse2+0x1115>
+  .byte  68,15,88,5,196,17,0,0               // addps         0x11c4(%rip),%xmm8        # 5c80 <_sk_callback_sse2+0x111b>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,194,17,0,0               // addps         0x11c2(%rip),%xmm8        # 5bd0 <_sk_callback_sse2+0x1125>
+  .byte  68,15,88,5,200,17,0,0               // addps         0x11c8(%rip),%xmm8        # 5c90 <_sk_callback_sse2+0x112b>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
-  .byte  68,15,88,5,198,17,0,0               // addps         0x11c6(%rip),%xmm8        # 5be0 <_sk_callback_sse2+0x1135>
+  .byte  68,15,88,5,204,17,0,0               // addps         0x11cc(%rip),%xmm8        # 5ca0 <_sk_callback_sse2+0x113b>
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -31983,17 +32227,17 @@ HIDDEN _sk_bicubic_p1y_sse2
 FUNCTION(_sk_bicubic_p1y_sse2)
 _sk_bicubic_p1y_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
-  .byte  68,15,40,5,192,17,0,0               // movaps        0x11c0(%rip),%xmm8        # 5bf0 <_sk_callback_sse2+0x1145>
+  .byte  68,15,40,5,198,17,0,0               // movaps        0x11c6(%rip),%xmm8        # 5cb0 <_sk_callback_sse2+0x114b>
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,72,96                      // movups        0x60(%rax),%xmm9
   .byte  65,15,88,200                        // addps         %xmm8,%xmm1
-  .byte  68,15,40,21,187,17,0,0              // movaps        0x11bb(%rip),%xmm10        # 5c00 <_sk_callback_sse2+0x1155>
+  .byte  68,15,40,21,193,17,0,0              // movaps        0x11c1(%rip),%xmm10        # 5cc0 <_sk_callback_sse2+0x115b>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,191,17,0,0              // addps         0x11bf(%rip),%xmm10        # 5c10 <_sk_callback_sse2+0x1165>
+  .byte  68,15,88,21,197,17,0,0              // addps         0x11c5(%rip),%xmm10        # 5cd0 <_sk_callback_sse2+0x116b>
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
   .byte  69,15,88,208                        // addps         %xmm8,%xmm10
   .byte  69,15,89,209                        // mulps         %xmm9,%xmm10
-  .byte  68,15,88,21,187,17,0,0              // addps         0x11bb(%rip),%xmm10        # 5c20 <_sk_callback_sse2+0x1175>
+  .byte  68,15,88,21,193,17,0,0              // addps         0x11c1(%rip),%xmm10        # 5ce0 <_sk_callback_sse2+0x117b>
   .byte  68,15,17,144,160,0,0,0              // movups        %xmm10,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  255,224                             // jmpq          *%rax
@@ -32005,11 +32249,11 @@ _sk_bicubic_p3y_sse2:
   .byte  72,173                              // lods          %ds:(%rsi),%rax
   .byte  15,16,72,32                         // movups        0x20(%rax),%xmm1
   .byte  68,15,16,64,96                      // movups        0x60(%rax),%xmm8
-  .byte  15,88,13,173,17,0,0                 // addps         0x11ad(%rip),%xmm1        # 5c30 <_sk_callback_sse2+0x1185>
+  .byte  15,88,13,179,17,0,0                 // addps         0x11b3(%rip),%xmm1        # 5cf0 <_sk_callback_sse2+0x118b>
   .byte  69,15,40,200                        // movaps        %xmm8,%xmm9
   .byte  69,15,89,201                        // mulps         %xmm9,%xmm9
-  .byte  68,15,89,5,173,17,0,0               // mulps         0x11ad(%rip),%xmm8        # 5c40 <_sk_callback_sse2+0x1195>
-  .byte  68,15,88,5,181,17,0,0               // addps         0x11b5(%rip),%xmm8        # 5c50 <_sk_callback_sse2+0x11a5>
+  .byte  68,15,89,5,179,17,0,0               // mulps         0x11b3(%rip),%xmm8        # 5d00 <_sk_callback_sse2+0x119b>
+  .byte  68,15,88,5,187,17,0,0               // addps         0x11bb(%rip),%xmm8        # 5d10 <_sk_callback_sse2+0x11ab>
   .byte  69,15,89,193                        // mulps         %xmm9,%xmm8
   .byte  68,15,17,128,160,0,0,0              // movups        %xmm8,0xa0(%rax)
   .byte  72,173                              // lods          %ds:(%rsi),%rax
@@ -32228,11 +32472,11 @@ BALIGN16
   .byte  128,191,0,0,128,191,0               // cmpb          $0x0,-0x40800000(%rdi)
   .byte  0,224                               // add           %ah,%al
   .byte  64,0,0                              // add           %al,(%rax)
-  .byte  224,64                              // loopne        4d58 <.literal16+0x1d8>
+  .byte  224,64                              // loopne        4e18 <.literal16+0x1d8>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,64                              // loopne        4d5c <.literal16+0x1dc>
+  .byte  224,64                              // loopne        4e1c <.literal16+0x1dc>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,64                              // loopne        4d60 <.literal16+0x1e0>
+  .byte  224,64                              // loopne        4e20 <.literal16+0x1e0>
   .byte  154                                 // (bad)
   .byte  153                                 // cltd
   .byte  153                                 // cltd
@@ -32252,13 +32496,13 @@ BALIGN16
   .byte  10,23                               // or            (%rdi),%dl
   .byte  63                                  // (bad)
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4d81 <.literal16+0x201>
+  .byte  71,225,61                           // rex.RXB       loope 4e41 <.literal16+0x201>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4d85 <.literal16+0x205>
+  .byte  71,225,61                           // rex.RXB       loope 4e45 <.literal16+0x205>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4d89 <.literal16+0x209>
+  .byte  71,225,61                           // rex.RXB       loope 4e49 <.literal16+0x209>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4d8d <.literal16+0x20d>
+  .byte  71,225,61                           // rex.RXB       loope 4e4d <.literal16+0x20d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -32283,13 +32527,13 @@ BALIGN16
   .byte  10,23                               // or            (%rdi),%dl
   .byte  63                                  // (bad)
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4dc1 <.literal16+0x241>
+  .byte  71,225,61                           // rex.RXB       loope 4e81 <.literal16+0x241>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4dc5 <.literal16+0x245>
+  .byte  71,225,61                           // rex.RXB       loope 4e85 <.literal16+0x245>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4dc9 <.literal16+0x249>
+  .byte  71,225,61                           // rex.RXB       loope 4e89 <.literal16+0x249>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4dcd <.literal16+0x24d>
+  .byte  71,225,61                           // rex.RXB       loope 4e8d <.literal16+0x24d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -32314,13 +32558,13 @@ BALIGN16
   .byte  10,23                               // or            (%rdi),%dl
   .byte  63                                  // (bad)
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4e01 <.literal16+0x281>
+  .byte  71,225,61                           // rex.RXB       loope 4ec1 <.literal16+0x281>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4e05 <.literal16+0x285>
+  .byte  71,225,61                           // rex.RXB       loope 4ec5 <.literal16+0x285>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4e09 <.literal16+0x289>
+  .byte  71,225,61                           // rex.RXB       loope 4ec9 <.literal16+0x289>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4e0d <.literal16+0x28d>
+  .byte  71,225,61                           // rex.RXB       loope 4ecd <.literal16+0x28d>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -32345,13 +32589,13 @@ BALIGN16
   .byte  10,23                               // or            (%rdi),%dl
   .byte  63                                  // (bad)
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4e41 <.literal16+0x2c1>
+  .byte  71,225,61                           // rex.RXB       loope 4f01 <.literal16+0x2c1>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4e45 <.literal16+0x2c5>
+  .byte  71,225,61                           // rex.RXB       loope 4f05 <.literal16+0x2c5>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4e49 <.literal16+0x2c9>
+  .byte  71,225,61                           // rex.RXB       loope 4f09 <.literal16+0x2c9>
   .byte  174                                 // scas          %es:(%rdi),%al
-  .byte  71,225,61                           // rex.RXB       loope 4e4d <.literal16+0x2cd>
+  .byte  71,225,61                           // rex.RXB       loope 4f0d <.literal16+0x2cd>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -32572,13 +32816,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        5029 <.literal16+0x4a9>
+  .byte  224,7                               // loopne        50e9 <.literal16+0x4a9>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        502d <.literal16+0x4ad>
+  .byte  224,7                               // loopne        50ed <.literal16+0x4ad>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        5031 <.literal16+0x4b1>
+  .byte  224,7                               // loopne        50f1 <.literal16+0x4b1>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        5035 <.literal16+0x4b5>
+  .byte  224,7                               // loopne        50f5 <.literal16+0x4b5>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -32643,11 +32887,11 @@ BALIGN16
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,127,67                            // add           %bh,0x43(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            510b <.literal16+0x58b>
+  .byte  127,67                              // jg            51cb <.literal16+0x58b>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            510f <.literal16+0x58f>
+  .byte  127,67                              // jg            51cf <.literal16+0x58f>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            5113 <.literal16+0x593>
+  .byte  127,67                              // jg            51d3 <.literal16+0x593>
   .byte  129,128,128,59,129,128,128,59,129,128// addl          $0x80813b80,-0x7f7ec480(%rax)
   .byte  128,59,129                          // cmpb          $0x81,(%rbx)
   .byte  128,128,59,129,128,128,59           // addb          $0x3b,-0x7f7f7ec5(%rax)
@@ -32662,16 +32906,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            5104 <.literal16+0x584>
+  .byte  127,0                               // jg            51c4 <.literal16+0x584>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            5108 <.literal16+0x588>
+  .byte  127,0                               // jg            51c8 <.literal16+0x588>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            510c <.literal16+0x58c>
+  .byte  127,0                               // jg            51cc <.literal16+0x58c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            5110 <.literal16+0x590>
+  .byte  127,0                               // jg            51d0 <.literal16+0x590>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -32680,7 +32924,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            5195 <.literal16+0x615>
+  .byte  119,115                             // ja            5255 <.literal16+0x615>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -32691,7 +32935,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           50f9 <.literal16+0x579>
+  .byte  117,191                             // jne           51b9 <.literal16+0x579>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -32703,7 +32947,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3913a <_sk_callback_sse2+0xffffffffe9a3468f>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a391fa <_sk_callback_sse2+0xffffffffe9a34695>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
@@ -32757,16 +33001,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            51d4 <.literal16+0x654>
+  .byte  127,0                               // jg            5294 <.literal16+0x654>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            51d8 <.literal16+0x658>
+  .byte  127,0                               // jg            5298 <.literal16+0x658>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            51dc <.literal16+0x65c>
+  .byte  127,0                               // jg            529c <.literal16+0x65c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            51e0 <.literal16+0x660>
+  .byte  127,0                               // jg            52a0 <.literal16+0x660>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -32775,7 +33019,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            5265 <.literal16+0x6e5>
+  .byte  119,115                             // ja            5325 <.literal16+0x6e5>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -32786,7 +33030,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           51c9 <.literal16+0x649>
+  .byte  117,191                             // jne           5289 <.literal16+0x649>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -32798,7 +33042,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3920a <_sk_callback_sse2+0xffffffffe9a3475f>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a392ca <_sk_callback_sse2+0xffffffffe9a34765>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
@@ -32852,16 +33096,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            52a4 <.literal16+0x724>
+  .byte  127,0                               // jg            5364 <.literal16+0x724>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            52a8 <.literal16+0x728>
+  .byte  127,0                               // jg            5368 <.literal16+0x728>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            52ac <.literal16+0x72c>
+  .byte  127,0                               // jg            536c <.literal16+0x72c>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            52b0 <.literal16+0x730>
+  .byte  127,0                               // jg            5370 <.literal16+0x730>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -32870,7 +33114,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            5335 <.literal16+0x7b5>
+  .byte  119,115                             // ja            53f5 <.literal16+0x7b5>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -32881,7 +33125,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           5299 <.literal16+0x719>
+  .byte  117,191                             // jne           5359 <.literal16+0x719>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -32893,7 +33137,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a392da <_sk_callback_sse2+0xffffffffe9a3482f>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3939a <_sk_callback_sse2+0xffffffffe9a34835>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
@@ -32947,16 +33191,16 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  52,255                              // xor           $0xff,%al
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            5374 <.literal16+0x7f4>
+  .byte  127,0                               // jg            5434 <.literal16+0x7f4>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            5378 <.literal16+0x7f8>
+  .byte  127,0                               // jg            5438 <.literal16+0x7f8>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            537c <.literal16+0x7fc>
+  .byte  127,0                               // jg            543c <.literal16+0x7fc>
   .byte  255                                 // (bad)
   .byte  255                                 // (bad)
-  .byte  127,0                               // jg            5380 <.literal16+0x800>
+  .byte  127,0                               // jg            5440 <.literal16+0x800>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -32965,7 +33209,7 @@ BALIGN16
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
-  .byte  119,115                             // ja            5405 <.literal16+0x885>
+  .byte  119,115                             // ja            54c5 <.literal16+0x885>
   .byte  248                                 // clc
   .byte  194,119,115                         // retq          $0x7377
   .byte  248                                 // clc
@@ -32976,7 +33220,7 @@ BALIGN16
   .byte  194,117,191                         // retq          $0xbf75
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
-  .byte  117,191                             // jne           5369 <.literal16+0x7e9>
+  .byte  117,191                             // jne           5429 <.literal16+0x7e9>
   .byte  191,63,117,191,191                  // mov           $0xbfbf753f,%edi
   .byte  63                                  // (bad)
   .byte  249                                 // stc
@@ -32988,7 +33232,7 @@ BALIGN16
   .byte  249                                 // stc
   .byte  68,180,62                           // rex.R         mov $0x3e,%spl
   .byte  163,233,220,63,163,233,220,63,163   // movabs        %eax,0xa33fdce9a33fdce9
-  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a393aa <_sk_callback_sse2+0xffffffffe9a348ff>
+  .byte  233,220,63,163,233                  // jmpq          ffffffffe9a3946a <_sk_callback_sse2+0xffffffffe9a34905>
   .byte  220,63                              // fdivrl        (%rdi)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
@@ -33038,13 +33282,13 @@ BALIGN16
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
   .byte  200,66,0,0                          // enterq        $0x42,$0x0
-  .byte  127,67                              // jg            5487 <.literal16+0x907>
+  .byte  127,67                              // jg            5547 <.literal16+0x907>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            548b <.literal16+0x90b>
+  .byte  127,67                              // jg            554b <.literal16+0x90b>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            548f <.literal16+0x90f>
+  .byte  127,67                              // jg            554f <.literal16+0x90f>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            5493 <.literal16+0x913>
+  .byte  127,67                              // jg            5553 <.literal16+0x913>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,195                               // add           %al,%bl
   .byte  0,0                                 // add           %al,(%rax)
@@ -33091,16 +33335,16 @@ BALIGN16
   .byte  128,3,62                            // addb          $0x3e,(%rbx)
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           5513 <.literal16+0x993>
+  .byte  118,63                              // jbe           55d3 <.literal16+0x993>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           5517 <.literal16+0x997>
+  .byte  118,63                              // jbe           55d7 <.literal16+0x997>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           551b <.literal16+0x99b>
+  .byte  118,63                              // jbe           55db <.literal16+0x99b>
   .byte  31                                  // (bad)
   .byte  215                                 // xlat          %ds:(%rbx)
-  .byte  118,63                              // jbe           551f <.literal16+0x99f>
+  .byte  118,63                              // jbe           55df <.literal16+0x99f>
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
   .byte  246,64,83,63                        // testb         $0x3f,0x53(%rax)
@@ -33112,11 +33356,11 @@ BALIGN16
   .byte  128,59,0                            // cmpb          $0x0,(%rbx)
   .byte  0,127,67                            // add           %bh,0x43(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            555b <.literal16+0x9db>
+  .byte  127,67                              // jg            561b <.literal16+0x9db>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            555f <.literal16+0x9df>
+  .byte  127,67                              // jg            561f <.literal16+0x9df>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            5563 <.literal16+0x9e3>
+  .byte  127,67                              // jg            5623 <.literal16+0x9e3>
   .byte  129,128,128,59,129,128,128,59,129,128// addl          $0x80813b80,-0x7f7ec480(%rax)
   .byte  128,59,129                          // cmpb          $0x81,(%rbx)
   .byte  128,128,59,0,0,128,63               // addb          $0x3f,-0x7fffffc5(%rax)
@@ -33156,13 +33400,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        55a9 <.literal16+0xa29>
+  .byte  224,7                               // loopne        5669 <.literal16+0xa29>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        55ad <.literal16+0xa2d>
+  .byte  224,7                               // loopne        566d <.literal16+0xa2d>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        55b1 <.literal16+0xa31>
+  .byte  224,7                               // loopne        5671 <.literal16+0xa31>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        55b5 <.literal16+0xa35>
+  .byte  224,7                               // loopne        5675 <.literal16+0xa35>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -33208,13 +33452,13 @@ BALIGN16
   .byte  132,55                              // test          %dh,(%rdi)
   .byte  8,33                                // or            %ah,(%rcx)
   .byte  132,55                              // test          %dh,(%rdi)
-  .byte  224,7                               // loopne        5619 <.literal16+0xa99>
+  .byte  224,7                               // loopne        56d9 <.literal16+0xa99>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        561d <.literal16+0xa9d>
+  .byte  224,7                               // loopne        56dd <.literal16+0xa9d>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        5621 <.literal16+0xaa1>
+  .byte  224,7                               // loopne        56e1 <.literal16+0xaa1>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  224,7                               // loopne        5625 <.literal16+0xaa5>
+  .byte  224,7                               // loopne        56e5 <.literal16+0xaa5>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  33,8                                // and           %ecx,(%rax)
   .byte  2,58                                // add           (%rdx),%bh
@@ -33252,13 +33496,13 @@ BALIGN16
   .byte  65,0,0                              // add           %al,(%r8)
   .byte  248                                 // clc
   .byte  65,0,0                              // add           %al,(%r8)
-  .byte  124,66                              // jl            56b6 <.literal16+0xb36>
+  .byte  124,66                              // jl            5776 <.literal16+0xb36>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            56ba <.literal16+0xb3a>
+  .byte  124,66                              // jl            577a <.literal16+0xb3a>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            56be <.literal16+0xb3e>
+  .byte  124,66                              // jl            577e <.literal16+0xb3e>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  124,66                              // jl            56c2 <.literal16+0xb42>
+  .byte  124,66                              // jl            5782 <.literal16+0xb42>
   .byte  0,240                               // add           %dh,%al
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,240                               // add           %dh,%al
@@ -33348,13 +33592,13 @@ BALIGN16
   .byte  136,136,61,137,136,136              // mov           %cl,-0x777776c3(%rax)
   .byte  61,137,136,136,61                   // cmp           $0x3d888889,%eax
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            57c5 <.literal16+0xc45>
+  .byte  112,65                              // jo            5885 <.literal16+0xc45>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            57c9 <.literal16+0xc49>
+  .byte  112,65                              // jo            5889 <.literal16+0xc49>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            57cd <.literal16+0xc4d>
+  .byte  112,65                              // jo            588d <.literal16+0xc4d>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  112,65                              // jo            57d1 <.literal16+0xc51>
+  .byte  112,65                              // jo            5891 <.literal16+0xc51>
   .byte  255,0                               // incl          (%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  255,0                               // incl          (%rax)
@@ -33376,11 +33620,11 @@ BALIGN16
   .byte  128,59,129                          // cmpb          $0x81,(%rbx)
   .byte  128,128,59,0,0,127,67               // addb          $0x43,0x7f00003b(%rax)
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            581b <.literal16+0xc9b>
+  .byte  127,67                              // jg            58db <.literal16+0xc9b>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            581f <.literal16+0xc9f>
+  .byte  127,67                              // jg            58df <.literal16+0xc9f>
   .byte  0,0                                 // add           %al,(%rax)
-  .byte  127,67                              // jg            5823 <.literal16+0xca3>
+  .byte  127,67                              // jg            58e3 <.literal16+0xca3>
   .byte  0,128,0,0,0,128                     // add           %al,-0x80000000(%rax)
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,128,0,0,0,128                     // add           %al,-0x80000000(%rax)
@@ -33456,13 +33700,13 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  255                                 // (bad)
-  .byte  127,71                              // jg            590b <.literal16+0xd8b>
+  .byte  127,71                              // jg            59cb <.literal16+0xd8b>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            590f <.literal16+0xd8f>
+  .byte  127,71                              // jg            59cf <.literal16+0xd8f>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            5913 <.literal16+0xd93>
+  .byte  127,71                              // jg            59d3 <.literal16+0xd93>
   .byte  0,255                               // add           %bh,%bh
-  .byte  127,71                              // jg            5917 <.literal16+0xd97>
+  .byte  127,71                              // jg            59d7 <.literal16+0xd97>
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,0                            // cmpb          $0x0,(%rdi)
   .byte  0,128,63,0,0,128                    // add           %al,-0x7fffffc1(%rax)
@@ -33623,11 +33867,11 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,114                          // cmpb          $0x72,(%rdi)
   .byte  28,199                              // sbb           $0xc7,%al
-  .byte  62,114,28                           // jb,pt         5a82 <.literal16+0xf02>
+  .byte  62,114,28                           // jb,pt         5b42 <.literal16+0xf02>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5a86 <.literal16+0xf06>
+  .byte  62,114,28                           // jb,pt         5b46 <.literal16+0xf06>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5a8a <.literal16+0xf0a>
+  .byte  62,114,28                           // jb,pt         5b4a <.literal16+0xf0a>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -33671,7 +33915,7 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63e915 <_sk_callback_sse2+0x3d639e6a>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63e9d5 <_sk_callback_sse2+0x3d639e70>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -33697,7 +33941,7 @@ BALIGN16
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63e955 <_sk_callback_sse2+0x3d639eaa>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63ea15 <_sk_callback_sse2+0x3d639eb0>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
@@ -33706,13 +33950,13 @@ BALIGN16
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
-  .byte  114,28                              // jb            5b4e <.literal16+0xfce>
+  .byte  114,28                              // jb            5c0e <.literal16+0xfce>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5b52 <.literal16+0xfd2>
+  .byte  62,114,28                           // jb,pt         5c12 <.literal16+0xfd2>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5b56 <.literal16+0xfd6>
+  .byte  62,114,28                           // jb,pt         5c16 <.literal16+0xfd6>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5b5a <.literal16+0xfda>
+  .byte  62,114,28                           // jb,pt         5c1a <.literal16+0xfda>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -33733,11 +33977,11 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128,63,114                          // cmpb          $0x72,(%rdi)
   .byte  28,199                              // sbb           $0xc7,%al
-  .byte  62,114,28                           // jb,pt         5b92 <.literal16+0x1012>
+  .byte  62,114,28                           // jb,pt         5c52 <.literal16+0x1012>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5b96 <.literal16+0x1016>
+  .byte  62,114,28                           // jb,pt         5c56 <.literal16+0x1016>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5b9a <.literal16+0x101a>
+  .byte  62,114,28                           // jb,pt         5c5a <.literal16+0x101a>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
@@ -33781,7 +34025,7 @@ BALIGN16
   .byte  0,0                                 // add           %al,(%rax)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63ea25 <_sk_callback_sse2+0x3d639f7a>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63eae5 <_sk_callback_sse2+0x3d639f80>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  0,63                                // add           %bh,(%rdi)
   .byte  0,0                                 // add           %al,(%rax)
@@ -33807,7 +34051,7 @@ BALIGN16
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
   .byte  57,142,99,61,57,142                 // cmp           %ecx,-0x71c6c29d(%rsi)
-  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63ea65 <_sk_callback_sse2+0x3d639fba>
+  .byte  99,61,57,142,99,61                  // movslq        0x3d638e39(%rip),%edi        # 3d63eb25 <_sk_callback_sse2+0x3d639fc0>
   .byte  57,142,99,61,0,0                    // cmp           %ecx,0x3d63(%rsi)
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
@@ -33816,13 +34060,13 @@ BALIGN16
   .byte  192,63,0                            // sarb          $0x0,(%rdi)
   .byte  0,192                               // add           %al,%al
   .byte  63                                  // (bad)
-  .byte  114,28                              // jb            5c5e <.literal16+0x10de>
+  .byte  114,28                              // jb            5d1e <.literal16+0x10de>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5c62 <_sk_callback_sse2+0x11b7>
+  .byte  62,114,28                           // jb,pt         5d22 <_sk_callback_sse2+0x11bd>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5c66 <_sk_callback_sse2+0x11bb>
+  .byte  62,114,28                           // jb,pt         5d26 <_sk_callback_sse2+0x11c1>
   .byte  199                                 // (bad)
-  .byte  62,114,28                           // jb,pt         5c6a <_sk_callback_sse2+0x11bf>
+  .byte  62,114,28                           // jb,pt         5d2a <_sk_callback_sse2+0x11c5>
   .byte  199                                 // (bad)
   .byte  62,171                              // ds            stos %eax,%es:(%rdi)
   .byte  170                                 // stos          %al,%es:(%rdi)
index c1e2208..3890657 100644 (file)
@@ -106,14 +106,14 @@ _sk_seed_shader_hsw LABEL PROC
   DB  197,249,110,199                     ; vmovd         %edi,%xmm0
   DB  196,226,125,88,192                  ; vpbroadcastd  %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,254,70,0,0        ; vbroadcastss  0x46fe(%rip),%ymm1        # 4858 <_sk_callback_hsw+0x11c>
+  DB  196,226,125,24,13,122,71,0,0        ; vbroadcastss  0x477a(%rip),%ymm1        # 48d4 <_sk_callback_hsw+0x11b>
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
   DB  197,252,88,2                        ; vaddps        (%rdx),%ymm0,%ymm0
   DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,21,226,70,0,0        ; vbroadcastss  0x46e2(%rip),%ymm2        # 485c <_sk_callback_hsw+0x120>
+  DB  196,226,125,24,21,94,71,0,0         ; vbroadcastss  0x475e(%rip),%ymm2        # 48d8 <_sk_callback_hsw+0x11f>
   DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
   DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
   DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
@@ -132,13 +132,13 @@ _sk_dither_hsw LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  196,66,125,88,8                     ; vpbroadcastd  (%r8),%ymm9
   DB  196,65,61,239,201                   ; vpxor         %ymm9,%ymm8,%ymm9
-  DB  196,98,125,88,21,161,70,0,0         ; vpbroadcastd  0x46a1(%rip),%ymm10        # 4860 <_sk_callback_hsw+0x124>
+  DB  196,98,125,88,21,29,71,0,0          ; vpbroadcastd  0x471d(%rip),%ymm10        # 48dc <_sk_callback_hsw+0x123>
   DB  196,65,53,219,218                   ; vpand         %ymm10,%ymm9,%ymm11
   DB  196,193,37,114,243,5                ; vpslld        $0x5,%ymm11,%ymm11
   DB  196,65,61,219,210                   ; vpand         %ymm10,%ymm8,%ymm10
   DB  196,193,45,114,242,4                ; vpslld        $0x4,%ymm10,%ymm10
-  DB  196,98,125,88,37,134,70,0,0         ; vpbroadcastd  0x4686(%rip),%ymm12        # 4864 <_sk_callback_hsw+0x128>
-  DB  196,98,125,88,45,129,70,0,0         ; vpbroadcastd  0x4681(%rip),%ymm13        # 4868 <_sk_callback_hsw+0x12c>
+  DB  196,98,125,88,37,2,71,0,0           ; vpbroadcastd  0x4702(%rip),%ymm12        # 48e0 <_sk_callback_hsw+0x127>
+  DB  196,98,125,88,45,253,70,0,0         ; vpbroadcastd  0x46fd(%rip),%ymm13        # 48e4 <_sk_callback_hsw+0x12b>
   DB  196,65,53,219,245                   ; vpand         %ymm13,%ymm9,%ymm14
   DB  196,193,13,114,246,2                ; vpslld        $0x2,%ymm14,%ymm14
   DB  196,65,61,219,237                   ; vpand         %ymm13,%ymm8,%ymm13
@@ -153,8 +153,8 @@ _sk_dither_hsw LABEL PROC
   DB  196,65,61,235,194                   ; vpor          %ymm10,%ymm8,%ymm8
   DB  196,65,61,235,193                   ; vpor          %ymm9,%ymm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,51,70,0,0          ; vbroadcastss  0x4633(%rip),%ymm9        # 486c <_sk_callback_hsw+0x130>
-  DB  196,98,125,24,21,46,70,0,0          ; vbroadcastss  0x462e(%rip),%ymm10        # 4870 <_sk_callback_hsw+0x134>
+  DB  196,98,125,24,13,175,70,0,0         ; vbroadcastss  0x46af(%rip),%ymm9        # 48e8 <_sk_callback_hsw+0x12f>
+  DB  196,98,125,24,21,170,70,0,0         ; vbroadcastss  0x46aa(%rip),%ymm10        # 48ec <_sk_callback_hsw+0x133>
   DB  196,66,61,184,209                   ; vfmadd231ps   %ymm9,%ymm8,%ymm10
   DB  196,98,125,24,64,8                  ; vbroadcastss  0x8(%rax),%ymm8
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
@@ -213,7 +213,7 @@ _sk_clear_hsw LABEL PROC
 PUBLIC _sk_srcatop_hsw
 _sk_srcatop_hsw LABEL PROC
   DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
-  DB  196,98,125,24,5,133,69,0,0          ; vbroadcastss  0x4585(%rip),%ymm8        # 4874 <_sk_callback_hsw+0x138>
+  DB  196,98,125,24,5,1,70,0,0            ; vbroadcastss  0x4601(%rip),%ymm8        # 48f0 <_sk_callback_hsw+0x137>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,226,61,184,196                  ; vfmadd231ps   %ymm4,%ymm8,%ymm0
   DB  197,244,89,207                      ; vmulps        %ymm7,%ymm1,%ymm1
@@ -227,7 +227,7 @@ _sk_srcatop_hsw LABEL PROC
 
 PUBLIC _sk_dstatop_hsw
 _sk_dstatop_hsw LABEL PROC
-  DB  196,98,125,24,5,88,69,0,0           ; vbroadcastss  0x4558(%rip),%ymm8        # 4878 <_sk_callback_hsw+0x13c>
+  DB  196,98,125,24,5,212,69,0,0          ; vbroadcastss  0x45d4(%rip),%ymm8        # 48f4 <_sk_callback_hsw+0x13b>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  196,226,101,184,196                 ; vfmadd231ps   %ymm4,%ymm3,%ymm0
@@ -260,7 +260,7 @@ _sk_dstin_hsw LABEL PROC
 
 PUBLIC _sk_srcout_hsw
 _sk_srcout_hsw LABEL PROC
-  DB  196,98,125,24,5,255,68,0,0          ; vbroadcastss  0x44ff(%rip),%ymm8        # 487c <_sk_callback_hsw+0x140>
+  DB  196,98,125,24,5,123,69,0,0          ; vbroadcastss  0x457b(%rip),%ymm8        # 48f8 <_sk_callback_hsw+0x13f>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -271,7 +271,7 @@ _sk_srcout_hsw LABEL PROC
 
 PUBLIC _sk_dstout_hsw
 _sk_dstout_hsw LABEL PROC
-  DB  196,226,125,24,5,226,68,0,0         ; vbroadcastss  0x44e2(%rip),%ymm0        # 4880 <_sk_callback_hsw+0x144>
+  DB  196,226,125,24,5,94,69,0,0          ; vbroadcastss  0x455e(%rip),%ymm0        # 48fc <_sk_callback_hsw+0x143>
   DB  197,252,92,219                      ; vsubps        %ymm3,%ymm0,%ymm3
   DB  197,228,89,196                      ; vmulps        %ymm4,%ymm3,%ymm0
   DB  197,228,89,205                      ; vmulps        %ymm5,%ymm3,%ymm1
@@ -282,7 +282,7 @@ _sk_dstout_hsw LABEL PROC
 
 PUBLIC _sk_srcover_hsw
 _sk_srcover_hsw LABEL PROC
-  DB  196,98,125,24,5,197,68,0,0          ; vbroadcastss  0x44c5(%rip),%ymm8        # 4884 <_sk_callback_hsw+0x148>
+  DB  196,98,125,24,5,65,69,0,0           ; vbroadcastss  0x4541(%rip),%ymm8        # 4900 <_sk_callback_hsw+0x147>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,194,93,184,192                  ; vfmadd231ps   %ymm8,%ymm4,%ymm0
   DB  196,194,85,184,200                  ; vfmadd231ps   %ymm8,%ymm5,%ymm1
@@ -293,7 +293,7 @@ _sk_srcover_hsw LABEL PROC
 
 PUBLIC _sk_dstover_hsw
 _sk_dstover_hsw LABEL PROC
-  DB  196,98,125,24,5,164,68,0,0          ; vbroadcastss  0x44a4(%rip),%ymm8        # 4888 <_sk_callback_hsw+0x14c>
+  DB  196,98,125,24,5,32,69,0,0           ; vbroadcastss  0x4520(%rip),%ymm8        # 4904 <_sk_callback_hsw+0x14b>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
   DB  196,226,61,168,205                  ; vfmadd213ps   %ymm5,%ymm8,%ymm1
@@ -313,7 +313,7 @@ _sk_modulate_hsw LABEL PROC
 
 PUBLIC _sk_multiply_hsw
 _sk_multiply_hsw LABEL PROC
-  DB  196,98,125,24,5,111,68,0,0          ; vbroadcastss  0x446f(%rip),%ymm8        # 488c <_sk_callback_hsw+0x150>
+  DB  196,98,125,24,5,235,68,0,0          ; vbroadcastss  0x44eb(%rip),%ymm8        # 4908 <_sk_callback_hsw+0x14f>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,208                       ; vmulps        %ymm0,%ymm9,%ymm10
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -355,7 +355,7 @@ _sk_screen_hsw LABEL PROC
 
 PUBLIC _sk_xor__hsw
 _sk_xor__hsw LABEL PROC
-  DB  196,98,125,24,5,234,67,0,0          ; vbroadcastss  0x43ea(%rip),%ymm8        # 4890 <_sk_callback_hsw+0x154>
+  DB  196,98,125,24,5,102,68,0,0          ; vbroadcastss  0x4466(%rip),%ymm8        # 490c <_sk_callback_hsw+0x153>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -387,7 +387,7 @@ _sk_darken_hsw LABEL PROC
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,95,209                  ; vmaxps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,114,67,0,0          ; vbroadcastss  0x4372(%rip),%ymm8        # 4894 <_sk_callback_hsw+0x158>
+  DB  196,98,125,24,5,238,67,0,0          ; vbroadcastss  0x43ee(%rip),%ymm8        # 4910 <_sk_callback_hsw+0x157>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -410,7 +410,7 @@ _sk_lighten_hsw LABEL PROC
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,33,67,0,0           ; vbroadcastss  0x4321(%rip),%ymm8        # 4898 <_sk_callback_hsw+0x15c>
+  DB  196,98,125,24,5,157,67,0,0          ; vbroadcastss  0x439d(%rip),%ymm8        # 4914 <_sk_callback_hsw+0x15b>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -436,7 +436,7 @@ _sk_difference_hsw LABEL PROC
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,196,66,0,0          ; vbroadcastss  0x42c4(%rip),%ymm8        # 489c <_sk_callback_hsw+0x160>
+  DB  196,98,125,24,5,64,67,0,0           ; vbroadcastss  0x4340(%rip),%ymm8        # 4918 <_sk_callback_hsw+0x15f>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -456,7 +456,7 @@ _sk_exclusion_hsw LABEL PROC
   DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,130,66,0,0          ; vbroadcastss  0x4282(%rip),%ymm8        # 48a0 <_sk_callback_hsw+0x164>
+  DB  196,98,125,24,5,254,66,0,0          ; vbroadcastss  0x42fe(%rip),%ymm8        # 491c <_sk_callback_hsw+0x163>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  196,194,69,184,216                  ; vfmadd231ps   %ymm8,%ymm7,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -464,7 +464,7 @@ _sk_exclusion_hsw LABEL PROC
 
 PUBLIC _sk_colorburn_hsw
 _sk_colorburn_hsw LABEL PROC
-  DB  196,98,125,24,5,112,66,0,0          ; vbroadcastss  0x4270(%rip),%ymm8        # 48a4 <_sk_callback_hsw+0x168>
+  DB  196,98,125,24,5,236,66,0,0          ; vbroadcastss  0x42ec(%rip),%ymm8        # 4920 <_sk_callback_hsw+0x167>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,216                       ; vmulps        %ymm0,%ymm9,%ymm11
   DB  196,65,44,87,210                    ; vxorps        %ymm10,%ymm10,%ymm10
@@ -520,7 +520,7 @@ _sk_colorburn_hsw LABEL PROC
 PUBLIC _sk_colordodge_hsw
 _sk_colordodge_hsw LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
-  DB  196,98,125,24,13,123,65,0,0         ; vbroadcastss  0x417b(%rip),%ymm9        # 48a8 <_sk_callback_hsw+0x16c>
+  DB  196,98,125,24,13,247,65,0,0         ; vbroadcastss  0x41f7(%rip),%ymm9        # 4924 <_sk_callback_hsw+0x16b>
   DB  197,52,92,215                       ; vsubps        %ymm7,%ymm9,%ymm10
   DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
   DB  197,52,92,203                       ; vsubps        %ymm3,%ymm9,%ymm9
@@ -571,7 +571,7 @@ _sk_colordodge_hsw LABEL PROC
 
 PUBLIC _sk_hardlight_hsw
 _sk_hardlight_hsw LABEL PROC
-  DB  196,98,125,24,5,156,64,0,0          ; vbroadcastss  0x409c(%rip),%ymm8        # 48ac <_sk_callback_hsw+0x170>
+  DB  196,98,125,24,5,24,65,0,0           ; vbroadcastss  0x4118(%rip),%ymm8        # 4928 <_sk_callback_hsw+0x16f>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -620,7 +620,7 @@ _sk_hardlight_hsw LABEL PROC
 
 PUBLIC _sk_overlay_hsw
 _sk_overlay_hsw LABEL PROC
-  DB  196,98,125,24,5,212,63,0,0          ; vbroadcastss  0x3fd4(%rip),%ymm8        # 48b0 <_sk_callback_hsw+0x174>
+  DB  196,98,125,24,5,80,64,0,0           ; vbroadcastss  0x4050(%rip),%ymm8        # 492c <_sk_callback_hsw+0x173>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -680,10 +680,10 @@ _sk_softlight_hsw LABEL PROC
   DB  196,65,20,88,197                    ; vaddps        %ymm13,%ymm13,%ymm8
   DB  196,65,60,88,192                    ; vaddps        %ymm8,%ymm8,%ymm8
   DB  196,66,61,168,192                   ; vfmadd213ps   %ymm8,%ymm8,%ymm8
-  DB  196,98,125,24,29,219,62,0,0         ; vbroadcastss  0x3edb(%rip),%ymm11        # 48b8 <_sk_callback_hsw+0x17c>
+  DB  196,98,125,24,29,87,63,0,0          ; vbroadcastss  0x3f57(%rip),%ymm11        # 4934 <_sk_callback_hsw+0x17b>
   DB  196,65,20,88,227                    ; vaddps        %ymm11,%ymm13,%ymm12
   DB  196,65,28,89,192                    ; vmulps        %ymm8,%ymm12,%ymm8
-  DB  196,98,125,24,37,204,62,0,0         ; vbroadcastss  0x3ecc(%rip),%ymm12        # 48bc <_sk_callback_hsw+0x180>
+  DB  196,98,125,24,37,72,63,0,0          ; vbroadcastss  0x3f48(%rip),%ymm12        # 4938 <_sk_callback_hsw+0x17f>
   DB  196,66,21,184,196                   ; vfmadd231ps   %ymm12,%ymm13,%ymm8
   DB  196,65,124,82,245                   ; vrsqrtps      %ymm13,%ymm14
   DB  196,65,124,83,246                   ; vrcpps        %ymm14,%ymm14
@@ -693,7 +693,7 @@ _sk_softlight_hsw LABEL PROC
   DB  197,4,194,255,2                     ; vcmpleps      %ymm7,%ymm15,%ymm15
   DB  196,67,13,74,240,240                ; vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   DB  197,116,88,249                      ; vaddps        %ymm1,%ymm1,%ymm15
-  DB  196,98,125,24,5,143,62,0,0          ; vbroadcastss  0x3e8f(%rip),%ymm8        # 48b4 <_sk_callback_hsw+0x178>
+  DB  196,98,125,24,5,11,63,0,0           ; vbroadcastss  0x3f0b(%rip),%ymm8        # 4930 <_sk_callback_hsw+0x177>
   DB  196,65,60,92,237                    ; vsubps        %ymm13,%ymm8,%ymm13
   DB  197,132,92,195                      ; vsubps        %ymm3,%ymm15,%ymm0
   DB  196,98,125,168,235                  ; vfmadd213ps   %ymm3,%ymm0,%ymm13
@@ -806,11 +806,11 @@ _sk_hue_hsw LABEL PROC
   DB  196,65,28,89,210                    ; vmulps        %ymm10,%ymm12,%ymm10
   DB  196,65,44,94,214                    ; vdivps        %ymm14,%ymm10,%ymm10
   DB  196,67,45,74,224,240                ; vblendvps     %ymm15,%ymm8,%ymm10,%ymm12
-  DB  196,98,125,24,53,142,60,0,0         ; vbroadcastss  0x3c8e(%rip),%ymm14        # 48c0 <_sk_callback_hsw+0x184>
-  DB  196,98,125,24,61,137,60,0,0         ; vbroadcastss  0x3c89(%rip),%ymm15        # 48c4 <_sk_callback_hsw+0x188>
+  DB  196,98,125,24,53,10,61,0,0          ; vbroadcastss  0x3d0a(%rip),%ymm14        # 493c <_sk_callback_hsw+0x183>
+  DB  196,98,125,24,61,5,61,0,0           ; vbroadcastss  0x3d05(%rip),%ymm15        # 4940 <_sk_callback_hsw+0x187>
   DB  196,65,84,89,239                    ; vmulps        %ymm15,%ymm5,%ymm13
   DB  196,66,93,184,238                   ; vfmadd231ps   %ymm14,%ymm4,%ymm13
-  DB  196,226,125,24,5,122,60,0,0         ; vbroadcastss  0x3c7a(%rip),%ymm0        # 48c8 <_sk_callback_hsw+0x18c>
+  DB  196,226,125,24,5,246,60,0,0         ; vbroadcastss  0x3cf6(%rip),%ymm0        # 4944 <_sk_callback_hsw+0x18b>
   DB  196,98,77,184,232                   ; vfmadd231ps   %ymm0,%ymm6,%ymm13
   DB  196,65,116,89,215                   ; vmulps        %ymm15,%ymm1,%ymm10
   DB  196,66,53,184,214                   ; vfmadd231ps   %ymm14,%ymm9,%ymm10
@@ -865,7 +865,7 @@ _sk_hue_hsw LABEL PROC
   DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
   DB  196,65,36,95,200                    ; vmaxps        %ymm8,%ymm11,%ymm9
   DB  196,65,116,95,192                   ; vmaxps        %ymm8,%ymm1,%ymm8
-  DB  196,226,125,24,13,103,59,0,0        ; vbroadcastss  0x3b67(%rip),%ymm1        # 48cc <_sk_callback_hsw+0x190>
+  DB  196,226,125,24,13,227,59,0,0        ; vbroadcastss  0x3be3(%rip),%ymm1        # 4948 <_sk_callback_hsw+0x18f>
   DB  197,116,92,215                      ; vsubps        %ymm7,%ymm1,%ymm10
   DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
   DB  197,116,92,219                      ; vsubps        %ymm3,%ymm1,%ymm11
@@ -919,11 +919,11 @@ _sk_saturation_hsw LABEL PROC
   DB  196,65,28,89,210                    ; vmulps        %ymm10,%ymm12,%ymm10
   DB  196,65,44,94,214                    ; vdivps        %ymm14,%ymm10,%ymm10
   DB  196,67,45,74,224,240                ; vblendvps     %ymm15,%ymm8,%ymm10,%ymm12
-  DB  196,98,125,24,53,120,58,0,0         ; vbroadcastss  0x3a78(%rip),%ymm14        # 48d0 <_sk_callback_hsw+0x194>
-  DB  196,98,125,24,61,115,58,0,0         ; vbroadcastss  0x3a73(%rip),%ymm15        # 48d4 <_sk_callback_hsw+0x198>
+  DB  196,98,125,24,53,244,58,0,0         ; vbroadcastss  0x3af4(%rip),%ymm14        # 494c <_sk_callback_hsw+0x193>
+  DB  196,98,125,24,61,239,58,0,0         ; vbroadcastss  0x3aef(%rip),%ymm15        # 4950 <_sk_callback_hsw+0x197>
   DB  196,65,84,89,239                    ; vmulps        %ymm15,%ymm5,%ymm13
   DB  196,66,93,184,238                   ; vfmadd231ps   %ymm14,%ymm4,%ymm13
-  DB  196,226,125,24,5,100,58,0,0         ; vbroadcastss  0x3a64(%rip),%ymm0        # 48d8 <_sk_callback_hsw+0x19c>
+  DB  196,226,125,24,5,224,58,0,0         ; vbroadcastss  0x3ae0(%rip),%ymm0        # 4954 <_sk_callback_hsw+0x19b>
   DB  196,98,77,184,232                   ; vfmadd231ps   %ymm0,%ymm6,%ymm13
   DB  196,65,116,89,215                   ; vmulps        %ymm15,%ymm1,%ymm10
   DB  196,66,53,184,214                   ; vfmadd231ps   %ymm14,%ymm9,%ymm10
@@ -978,7 +978,7 @@ _sk_saturation_hsw LABEL PROC
   DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
   DB  196,65,36,95,200                    ; vmaxps        %ymm8,%ymm11,%ymm9
   DB  196,65,116,95,192                   ; vmaxps        %ymm8,%ymm1,%ymm8
-  DB  196,226,125,24,13,81,57,0,0         ; vbroadcastss  0x3951(%rip),%ymm1        # 48dc <_sk_callback_hsw+0x1a0>
+  DB  196,226,125,24,13,205,57,0,0        ; vbroadcastss  0x39cd(%rip),%ymm1        # 4958 <_sk_callback_hsw+0x19f>
   DB  197,116,92,215                      ; vsubps        %ymm7,%ymm1,%ymm10
   DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
   DB  197,116,92,219                      ; vsubps        %ymm3,%ymm1,%ymm11
@@ -1006,11 +1006,11 @@ _sk_color_hsw LABEL PROC
   DB  197,108,89,199                      ; vmulps        %ymm7,%ymm2,%ymm8
   DB  197,116,89,215                      ; vmulps        %ymm7,%ymm1,%ymm10
   DB  197,52,89,223                       ; vmulps        %ymm7,%ymm9,%ymm11
-  DB  196,98,125,24,45,228,56,0,0         ; vbroadcastss  0x38e4(%rip),%ymm13        # 48e0 <_sk_callback_hsw+0x1a4>
-  DB  196,98,125,24,53,223,56,0,0         ; vbroadcastss  0x38df(%rip),%ymm14        # 48e4 <_sk_callback_hsw+0x1a8>
+  DB  196,98,125,24,45,96,57,0,0          ; vbroadcastss  0x3960(%rip),%ymm13        # 495c <_sk_callback_hsw+0x1a3>
+  DB  196,98,125,24,53,91,57,0,0          ; vbroadcastss  0x395b(%rip),%ymm14        # 4960 <_sk_callback_hsw+0x1a7>
   DB  196,65,84,89,230                    ; vmulps        %ymm14,%ymm5,%ymm12
   DB  196,66,93,184,229                   ; vfmadd231ps   %ymm13,%ymm4,%ymm12
-  DB  196,98,125,24,61,208,56,0,0         ; vbroadcastss  0x38d0(%rip),%ymm15        # 48e8 <_sk_callback_hsw+0x1ac>
+  DB  196,98,125,24,61,76,57,0,0          ; vbroadcastss  0x394c(%rip),%ymm15        # 4964 <_sk_callback_hsw+0x1ab>
   DB  196,66,77,184,231                   ; vfmadd231ps   %ymm15,%ymm6,%ymm12
   DB  196,65,44,89,206                    ; vmulps        %ymm14,%ymm10,%ymm9
   DB  196,66,61,184,205                   ; vfmadd231ps   %ymm13,%ymm8,%ymm9
@@ -1066,7 +1066,7 @@ _sk_color_hsw LABEL PROC
   DB  196,193,116,95,206                  ; vmaxps        %ymm14,%ymm1,%ymm1
   DB  196,65,44,95,198                    ; vmaxps        %ymm14,%ymm10,%ymm8
   DB  196,65,124,95,206                   ; vmaxps        %ymm14,%ymm0,%ymm9
-  DB  196,226,125,24,5,178,55,0,0         ; vbroadcastss  0x37b2(%rip),%ymm0        # 48ec <_sk_callback_hsw+0x1b0>
+  DB  196,226,125,24,5,46,56,0,0          ; vbroadcastss  0x382e(%rip),%ymm0        # 4968 <_sk_callback_hsw+0x1af>
   DB  197,124,92,215                      ; vsubps        %ymm7,%ymm0,%ymm10
   DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
   DB  197,124,92,219                      ; vsubps        %ymm3,%ymm0,%ymm11
@@ -1094,11 +1094,11 @@ _sk_luminosity_hsw LABEL PROC
   DB  197,100,89,196                      ; vmulps        %ymm4,%ymm3,%ymm8
   DB  197,100,89,213                      ; vmulps        %ymm5,%ymm3,%ymm10
   DB  197,100,89,222                      ; vmulps        %ymm6,%ymm3,%ymm11
-  DB  196,98,125,24,45,69,55,0,0          ; vbroadcastss  0x3745(%rip),%ymm13        # 48f0 <_sk_callback_hsw+0x1b4>
-  DB  196,98,125,24,53,64,55,0,0          ; vbroadcastss  0x3740(%rip),%ymm14        # 48f4 <_sk_callback_hsw+0x1b8>
+  DB  196,98,125,24,45,193,55,0,0         ; vbroadcastss  0x37c1(%rip),%ymm13        # 496c <_sk_callback_hsw+0x1b3>
+  DB  196,98,125,24,53,188,55,0,0         ; vbroadcastss  0x37bc(%rip),%ymm14        # 4970 <_sk_callback_hsw+0x1b7>
   DB  196,65,116,89,230                   ; vmulps        %ymm14,%ymm1,%ymm12
   DB  196,66,109,184,229                  ; vfmadd231ps   %ymm13,%ymm2,%ymm12
-  DB  196,98,125,24,61,49,55,0,0          ; vbroadcastss  0x3731(%rip),%ymm15        # 48f8 <_sk_callback_hsw+0x1bc>
+  DB  196,98,125,24,61,173,55,0,0         ; vbroadcastss  0x37ad(%rip),%ymm15        # 4974 <_sk_callback_hsw+0x1bb>
   DB  196,66,53,184,231                   ; vfmadd231ps   %ymm15,%ymm9,%ymm12
   DB  196,65,44,89,206                    ; vmulps        %ymm14,%ymm10,%ymm9
   DB  196,66,61,184,205                   ; vfmadd231ps   %ymm13,%ymm8,%ymm9
@@ -1154,7 +1154,7 @@ _sk_luminosity_hsw LABEL PROC
   DB  196,193,116,95,206                  ; vmaxps        %ymm14,%ymm1,%ymm1
   DB  196,65,44,95,198                    ; vmaxps        %ymm14,%ymm10,%ymm8
   DB  196,65,124,95,206                   ; vmaxps        %ymm14,%ymm0,%ymm9
-  DB  196,226,125,24,5,19,54,0,0          ; vbroadcastss  0x3613(%rip),%ymm0        # 48fc <_sk_callback_hsw+0x1c0>
+  DB  196,226,125,24,5,143,54,0,0         ; vbroadcastss  0x368f(%rip),%ymm0        # 4978 <_sk_callback_hsw+0x1bf>
   DB  197,124,92,215                      ; vsubps        %ymm7,%ymm0,%ymm10
   DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
   DB  197,124,92,219                      ; vsubps        %ymm3,%ymm0,%ymm11
@@ -1184,7 +1184,7 @@ _sk_clamp_0_hsw LABEL PROC
 
 PUBLIC _sk_clamp_1_hsw
 _sk_clamp_1_hsw LABEL PROC
-  DB  196,98,125,24,5,172,53,0,0          ; vbroadcastss  0x35ac(%rip),%ymm8        # 4900 <_sk_callback_hsw+0x1c4>
+  DB  196,98,125,24,5,40,54,0,0           ; vbroadcastss  0x3628(%rip),%ymm8        # 497c <_sk_callback_hsw+0x1c3>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
@@ -1194,7 +1194,7 @@ _sk_clamp_1_hsw LABEL PROC
 
 PUBLIC _sk_clamp_a_hsw
 _sk_clamp_a_hsw LABEL PROC
-  DB  196,98,125,24,5,143,53,0,0          ; vbroadcastss  0x358f(%rip),%ymm8        # 4904 <_sk_callback_hsw+0x1c8>
+  DB  196,98,125,24,5,11,54,0,0           ; vbroadcastss  0x360b(%rip),%ymm8        # 4980 <_sk_callback_hsw+0x1c7>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
   DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
@@ -1266,7 +1266,7 @@ PUBLIC _sk_unpremul_hsw
 _sk_unpremul_hsw LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
-  DB  196,98,125,24,21,215,52,0,0         ; vbroadcastss  0x34d7(%rip),%ymm10        # 4908 <_sk_callback_hsw+0x1cc>
+  DB  196,98,125,24,21,83,53,0,0          ; vbroadcastss  0x3553(%rip),%ymm10        # 4984 <_sk_callback_hsw+0x1cb>
   DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
   DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
@@ -1277,16 +1277,16 @@ _sk_unpremul_hsw LABEL PROC
 
 PUBLIC _sk_from_srgb_hsw
 _sk_from_srgb_hsw LABEL PROC
-  DB  196,98,125,24,5,184,52,0,0          ; vbroadcastss  0x34b8(%rip),%ymm8        # 490c <_sk_callback_hsw+0x1d0>
+  DB  196,98,125,24,5,52,53,0,0           ; vbroadcastss  0x3534(%rip),%ymm8        # 4988 <_sk_callback_hsw+0x1cf>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
-  DB  196,98,125,24,29,170,52,0,0         ; vbroadcastss  0x34aa(%rip),%ymm11        # 4910 <_sk_callback_hsw+0x1d4>
-  DB  196,98,125,24,37,165,52,0,0         ; vbroadcastss  0x34a5(%rip),%ymm12        # 4914 <_sk_callback_hsw+0x1d8>
+  DB  196,98,125,24,29,38,53,0,0          ; vbroadcastss  0x3526(%rip),%ymm11        # 498c <_sk_callback_hsw+0x1d3>
+  DB  196,98,125,24,37,33,53,0,0          ; vbroadcastss  0x3521(%rip),%ymm12        # 4990 <_sk_callback_hsw+0x1d7>
   DB  196,65,124,40,236                   ; vmovaps       %ymm12,%ymm13
   DB  196,66,125,168,235                  ; vfmadd213ps   %ymm11,%ymm0,%ymm13
-  DB  196,98,125,24,53,150,52,0,0         ; vbroadcastss  0x3496(%rip),%ymm14        # 4918 <_sk_callback_hsw+0x1dc>
+  DB  196,98,125,24,53,18,53,0,0          ; vbroadcastss  0x3512(%rip),%ymm14        # 4994 <_sk_callback_hsw+0x1db>
   DB  196,66,45,168,238                   ; vfmadd213ps   %ymm14,%ymm10,%ymm13
-  DB  196,98,125,24,21,140,52,0,0         ; vbroadcastss  0x348c(%rip),%ymm10        # 491c <_sk_callback_hsw+0x1e0>
+  DB  196,98,125,24,21,8,53,0,0           ; vbroadcastss  0x3508(%rip),%ymm10        # 4998 <_sk_callback_hsw+0x1df>
   DB  196,193,124,194,194,1               ; vcmpltps      %ymm10,%ymm0,%ymm0
   DB  196,195,21,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm13,%ymm0
   DB  196,65,116,89,200                   ; vmulps        %ymm8,%ymm1,%ymm9
@@ -1308,19 +1308,19 @@ _sk_from_srgb_hsw LABEL PROC
 PUBLIC _sk_to_srgb_hsw
 _sk_to_srgb_hsw LABEL PROC
   DB  197,124,82,200                      ; vrsqrtps      %ymm0,%ymm9
-  DB  196,98,125,24,5,48,52,0,0           ; vbroadcastss  0x3430(%rip),%ymm8        # 4920 <_sk_callback_hsw+0x1e4>
+  DB  196,98,125,24,5,172,52,0,0          ; vbroadcastss  0x34ac(%rip),%ymm8        # 499c <_sk_callback_hsw+0x1e3>
   DB  196,65,124,89,208                   ; vmulps        %ymm8,%ymm0,%ymm10
-  DB  196,98,125,24,29,38,52,0,0          ; vbroadcastss  0x3426(%rip),%ymm11        # 4924 <_sk_callback_hsw+0x1e8>
-  DB  196,98,125,24,37,33,52,0,0          ; vbroadcastss  0x3421(%rip),%ymm12        # 4928 <_sk_callback_hsw+0x1ec>
+  DB  196,98,125,24,29,162,52,0,0         ; vbroadcastss  0x34a2(%rip),%ymm11        # 49a0 <_sk_callback_hsw+0x1e7>
+  DB  196,98,125,24,37,157,52,0,0         ; vbroadcastss  0x349d(%rip),%ymm12        # 49a4 <_sk_callback_hsw+0x1eb>
   DB  196,65,124,40,236                   ; vmovaps       %ymm12,%ymm13
   DB  196,66,53,168,235                   ; vfmadd213ps   %ymm11,%ymm9,%ymm13
-  DB  196,98,125,24,53,18,52,0,0          ; vbroadcastss  0x3412(%rip),%ymm14        # 492c <_sk_callback_hsw+0x1f0>
+  DB  196,98,125,24,53,142,52,0,0         ; vbroadcastss  0x348e(%rip),%ymm14        # 49a8 <_sk_callback_hsw+0x1ef>
   DB  196,66,53,168,238                   ; vfmadd213ps   %ymm14,%ymm9,%ymm13
-  DB  196,98,125,24,61,8,52,0,0           ; vbroadcastss  0x3408(%rip),%ymm15        # 4930 <_sk_callback_hsw+0x1f4>
+  DB  196,98,125,24,61,132,52,0,0         ; vbroadcastss  0x3484(%rip),%ymm15        # 49ac <_sk_callback_hsw+0x1f3>
   DB  196,65,52,88,207                    ; vaddps        %ymm15,%ymm9,%ymm9
   DB  196,65,124,83,201                   ; vrcpps        %ymm9,%ymm9
   DB  196,65,20,89,201                    ; vmulps        %ymm9,%ymm13,%ymm9
-  DB  196,98,125,24,45,244,51,0,0         ; vbroadcastss  0x33f4(%rip),%ymm13        # 4934 <_sk_callback_hsw+0x1f8>
+  DB  196,98,125,24,45,112,52,0,0         ; vbroadcastss  0x3470(%rip),%ymm13        # 49b0 <_sk_callback_hsw+0x1f7>
   DB  196,193,124,194,197,1               ; vcmpltps      %ymm13,%ymm0,%ymm0
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
@@ -1352,26 +1352,26 @@ _sk_rgb_to_hsl_hsw LABEL PROC
   DB  197,124,93,201                      ; vminps        %ymm1,%ymm0,%ymm9
   DB  197,52,93,202                       ; vminps        %ymm2,%ymm9,%ymm9
   DB  196,65,60,92,209                    ; vsubps        %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,29,105,51,0,0         ; vbroadcastss  0x3369(%rip),%ymm11        # 4938 <_sk_callback_hsw+0x1fc>
+  DB  196,98,125,24,29,229,51,0,0         ; vbroadcastss  0x33e5(%rip),%ymm11        # 49b4 <_sk_callback_hsw+0x1fb>
   DB  196,65,36,94,218                    ; vdivps        %ymm10,%ymm11,%ymm11
   DB  197,116,92,226                      ; vsubps        %ymm2,%ymm1,%ymm12
   DB  197,116,194,234,1                   ; vcmpltps      %ymm2,%ymm1,%ymm13
-  DB  196,98,125,24,53,86,51,0,0          ; vbroadcastss  0x3356(%rip),%ymm14        # 493c <_sk_callback_hsw+0x200>
+  DB  196,98,125,24,53,210,51,0,0         ; vbroadcastss  0x33d2(%rip),%ymm14        # 49b8 <_sk_callback_hsw+0x1ff>
   DB  196,65,4,87,255                     ; vxorps        %ymm15,%ymm15,%ymm15
   DB  196,67,5,74,238,208                 ; vblendvps     %ymm13,%ymm14,%ymm15,%ymm13
   DB  196,66,37,168,229                   ; vfmadd213ps   %ymm13,%ymm11,%ymm12
   DB  197,236,92,208                      ; vsubps        %ymm0,%ymm2,%ymm2
   DB  197,124,92,233                      ; vsubps        %ymm1,%ymm0,%ymm13
-  DB  196,98,125,24,53,61,51,0,0          ; vbroadcastss  0x333d(%rip),%ymm14        # 4944 <_sk_callback_hsw+0x208>
+  DB  196,98,125,24,53,185,51,0,0         ; vbroadcastss  0x33b9(%rip),%ymm14        # 49c0 <_sk_callback_hsw+0x207>
   DB  196,66,37,168,238                   ; vfmadd213ps   %ymm14,%ymm11,%ymm13
-  DB  196,98,125,24,53,43,51,0,0          ; vbroadcastss  0x332b(%rip),%ymm14        # 4940 <_sk_callback_hsw+0x204>
+  DB  196,98,125,24,53,167,51,0,0         ; vbroadcastss  0x33a7(%rip),%ymm14        # 49bc <_sk_callback_hsw+0x203>
   DB  196,194,37,168,214                  ; vfmadd213ps   %ymm14,%ymm11,%ymm2
   DB  197,188,194,201,0                   ; vcmpeqps      %ymm1,%ymm8,%ymm1
   DB  196,227,21,74,202,16                ; vblendvps     %ymm1,%ymm2,%ymm13,%ymm1
   DB  197,188,194,192,0                   ; vcmpeqps      %ymm0,%ymm8,%ymm0
   DB  196,195,117,74,196,0                ; vblendvps     %ymm0,%ymm12,%ymm1,%ymm0
   DB  196,193,60,88,201                   ; vaddps        %ymm9,%ymm8,%ymm1
-  DB  196,98,125,24,29,14,51,0,0          ; vbroadcastss  0x330e(%rip),%ymm11        # 494c <_sk_callback_hsw+0x210>
+  DB  196,98,125,24,29,138,51,0,0         ; vbroadcastss  0x338a(%rip),%ymm11        # 49c8 <_sk_callback_hsw+0x20f>
   DB  196,193,116,89,211                  ; vmulps        %ymm11,%ymm1,%ymm2
   DB  197,36,194,218,1                    ; vcmpltps      %ymm2,%ymm11,%ymm11
   DB  196,65,12,92,224                    ; vsubps        %ymm8,%ymm14,%ymm12
@@ -1381,7 +1381,7 @@ _sk_rgb_to_hsl_hsw LABEL PROC
   DB  197,172,94,201                      ; vdivps        %ymm1,%ymm10,%ymm1
   DB  196,195,125,74,199,128              ; vblendvps     %ymm8,%ymm15,%ymm0,%ymm0
   DB  196,195,117,74,207,128              ; vblendvps     %ymm8,%ymm15,%ymm1,%ymm1
-  DB  196,98,125,24,5,209,50,0,0          ; vbroadcastss  0x32d1(%rip),%ymm8        # 4948 <_sk_callback_hsw+0x20c>
+  DB  196,98,125,24,5,77,51,0,0           ; vbroadcastss  0x334d(%rip),%ymm8        # 49c4 <_sk_callback_hsw+0x20b>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -1396,30 +1396,30 @@ _sk_hsl_to_rgb_hsw LABEL PROC
   DB  197,252,17,28,36                    ; vmovups       %ymm3,(%rsp)
   DB  197,252,40,233                      ; vmovaps       %ymm1,%ymm5
   DB  197,252,40,224                      ; vmovaps       %ymm0,%ymm4
-  DB  196,98,125,24,5,152,50,0,0          ; vbroadcastss  0x3298(%rip),%ymm8        # 4950 <_sk_callback_hsw+0x214>
+  DB  196,98,125,24,5,20,51,0,0           ; vbroadcastss  0x3314(%rip),%ymm8        # 49cc <_sk_callback_hsw+0x213>
   DB  197,60,194,202,2                    ; vcmpleps      %ymm2,%ymm8,%ymm9
   DB  197,84,89,210                       ; vmulps        %ymm2,%ymm5,%ymm10
   DB  196,65,84,92,218                    ; vsubps        %ymm10,%ymm5,%ymm11
   DB  196,67,45,74,203,144                ; vblendvps     %ymm9,%ymm11,%ymm10,%ymm9
   DB  197,52,88,210                       ; vaddps        %ymm2,%ymm9,%ymm10
-  DB  196,98,125,24,13,123,50,0,0         ; vbroadcastss  0x327b(%rip),%ymm9        # 4954 <_sk_callback_hsw+0x218>
+  DB  196,98,125,24,13,247,50,0,0         ; vbroadcastss  0x32f7(%rip),%ymm9        # 49d0 <_sk_callback_hsw+0x217>
   DB  196,66,109,170,202                  ; vfmsub213ps   %ymm10,%ymm2,%ymm9
-  DB  196,98,125,24,29,113,50,0,0         ; vbroadcastss  0x3271(%rip),%ymm11        # 4958 <_sk_callback_hsw+0x21c>
+  DB  196,98,125,24,29,237,50,0,0         ; vbroadcastss  0x32ed(%rip),%ymm11        # 49d4 <_sk_callback_hsw+0x21b>
   DB  196,65,92,88,219                    ; vaddps        %ymm11,%ymm4,%ymm11
   DB  196,67,125,8,227,1                  ; vroundps      $0x1,%ymm11,%ymm12
   DB  196,65,36,92,252                    ; vsubps        %ymm12,%ymm11,%ymm15
   DB  196,65,44,92,217                    ; vsubps        %ymm9,%ymm10,%ymm11
-  DB  196,98,125,24,45,91,50,0,0          ; vbroadcastss  0x325b(%rip),%ymm13        # 4960 <_sk_callback_hsw+0x224>
+  DB  196,98,125,24,45,215,50,0,0         ; vbroadcastss  0x32d7(%rip),%ymm13        # 49dc <_sk_callback_hsw+0x223>
   DB  196,193,4,89,197                    ; vmulps        %ymm13,%ymm15,%ymm0
-  DB  196,98,125,24,53,81,50,0,0          ; vbroadcastss  0x3251(%rip),%ymm14        # 4964 <_sk_callback_hsw+0x228>
+  DB  196,98,125,24,53,205,50,0,0         ; vbroadcastss  0x32cd(%rip),%ymm14        # 49e0 <_sk_callback_hsw+0x227>
   DB  197,12,92,224                       ; vsubps        %ymm0,%ymm14,%ymm12
   DB  196,66,37,168,225                   ; vfmadd213ps   %ymm9,%ymm11,%ymm12
-  DB  196,226,125,24,29,55,50,0,0         ; vbroadcastss  0x3237(%rip),%ymm3        # 495c <_sk_callback_hsw+0x220>
+  DB  196,226,125,24,29,179,50,0,0        ; vbroadcastss  0x32b3(%rip),%ymm3        # 49d8 <_sk_callback_hsw+0x21f>
   DB  196,193,100,194,255,2               ; vcmpleps      %ymm15,%ymm3,%ymm7
   DB  196,195,29,74,249,112               ; vblendvps     %ymm7,%ymm9,%ymm12,%ymm7
   DB  196,65,60,194,231,2                 ; vcmpleps      %ymm15,%ymm8,%ymm12
   DB  196,227,45,74,255,192               ; vblendvps     %ymm12,%ymm7,%ymm10,%ymm7
-  DB  196,98,125,24,37,34,50,0,0          ; vbroadcastss  0x3222(%rip),%ymm12        # 4968 <_sk_callback_hsw+0x22c>
+  DB  196,98,125,24,37,158,50,0,0         ; vbroadcastss  0x329e(%rip),%ymm12        # 49e4 <_sk_callback_hsw+0x22b>
   DB  196,65,28,194,255,2                 ; vcmpleps      %ymm15,%ymm12,%ymm15
   DB  196,194,37,168,193                  ; vfmadd213ps   %ymm9,%ymm11,%ymm0
   DB  196,99,125,74,255,240               ; vblendvps     %ymm15,%ymm7,%ymm0,%ymm15
@@ -1435,7 +1435,7 @@ _sk_hsl_to_rgb_hsw LABEL PROC
   DB  197,156,194,192,2                   ; vcmpleps      %ymm0,%ymm12,%ymm0
   DB  196,194,37,168,249                  ; vfmadd213ps   %ymm9,%ymm11,%ymm7
   DB  196,227,69,74,201,0                 ; vblendvps     %ymm0,%ymm1,%ymm7,%ymm1
-  DB  196,226,125,24,5,206,49,0,0         ; vbroadcastss  0x31ce(%rip),%ymm0        # 496c <_sk_callback_hsw+0x230>
+  DB  196,226,125,24,5,74,50,0,0          ; vbroadcastss  0x324a(%rip),%ymm0        # 49e8 <_sk_callback_hsw+0x22f>
   DB  197,220,88,192                      ; vaddps        %ymm0,%ymm4,%ymm0
   DB  196,227,125,8,224,1                 ; vroundps      $0x1,%ymm0,%ymm4
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
@@ -1485,7 +1485,7 @@ _sk_scale_u8_hsw LABEL PROC
   DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
   DB  196,66,125,49,192                   ; vpmovzxbd     %xmm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,8,49,0,0           ; vbroadcastss  0x3108(%rip),%ymm9        # 4970 <_sk_callback_hsw+0x234>
+  DB  196,98,125,24,13,132,49,0,0         ; vbroadcastss  0x3184(%rip),%ymm9        # 49ec <_sk_callback_hsw+0x233>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -1533,7 +1533,7 @@ _sk_lerp_u8_hsw LABEL PROC
   DB  197,122,126,0                       ; vmovq         (%rax),%xmm8
   DB  196,66,125,49,192                   ; vpmovzxbd     %xmm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,117,48,0,0         ; vbroadcastss  0x3075(%rip),%ymm9        # 4974 <_sk_callback_hsw+0x238>
+  DB  196,98,125,24,13,241,48,0,0         ; vbroadcastss  0x30f1(%rip),%ymm9        # 49f0 <_sk_callback_hsw+0x237>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,226,61,168,196                  ; vfmadd213ps   %ymm4,%ymm8,%ymm0
@@ -1567,20 +1567,20 @@ _sk_lerp_565_hsw LABEL PROC
   DB  15,133,169,0,0,0                    ; jne           1a0b <_sk_lerp_565_hsw+0xb7>
   DB  196,65,122,111,4,122                ; vmovdqu       (%r10,%rdi,2),%xmm8
   DB  196,66,125,51,192                   ; vpmovzxwd     %xmm8,%ymm8
-  DB  196,98,125,88,13,2,48,0,0           ; vpbroadcastd  0x3002(%rip),%ymm9        # 4978 <_sk_callback_hsw+0x23c>
+  DB  196,98,125,88,13,126,48,0,0         ; vpbroadcastd  0x307e(%rip),%ymm9        # 49f4 <_sk_callback_hsw+0x23b>
   DB  196,65,61,219,201                   ; vpand         %ymm9,%ymm8,%ymm9
   DB  196,65,124,91,201                   ; vcvtdq2ps     %ymm9,%ymm9
-  DB  196,98,125,24,21,243,47,0,0         ; vbroadcastss  0x2ff3(%rip),%ymm10        # 497c <_sk_callback_hsw+0x240>
+  DB  196,98,125,24,21,111,48,0,0         ; vbroadcastss  0x306f(%rip),%ymm10        # 49f8 <_sk_callback_hsw+0x23f>
   DB  196,65,52,89,202                    ; vmulps        %ymm10,%ymm9,%ymm9
-  DB  196,98,125,88,21,233,47,0,0         ; vpbroadcastd  0x2fe9(%rip),%ymm10        # 4980 <_sk_callback_hsw+0x244>
+  DB  196,98,125,88,21,101,48,0,0         ; vpbroadcastd  0x3065(%rip),%ymm10        # 49fc <_sk_callback_hsw+0x243>
   DB  196,65,61,219,210                   ; vpand         %ymm10,%ymm8,%ymm10
   DB  196,65,124,91,210                   ; vcvtdq2ps     %ymm10,%ymm10
-  DB  196,98,125,24,29,218,47,0,0         ; vbroadcastss  0x2fda(%rip),%ymm11        # 4984 <_sk_callback_hsw+0x248>
+  DB  196,98,125,24,29,86,48,0,0          ; vbroadcastss  0x3056(%rip),%ymm11        # 4a00 <_sk_callback_hsw+0x247>
   DB  196,65,44,89,211                    ; vmulps        %ymm11,%ymm10,%ymm10
-  DB  196,98,125,88,29,208,47,0,0         ; vpbroadcastd  0x2fd0(%rip),%ymm11        # 4988 <_sk_callback_hsw+0x24c>
+  DB  196,98,125,88,29,76,48,0,0          ; vpbroadcastd  0x304c(%rip),%ymm11        # 4a04 <_sk_callback_hsw+0x24b>
   DB  196,65,61,219,195                   ; vpand         %ymm11,%ymm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,29,193,47,0,0         ; vbroadcastss  0x2fc1(%rip),%ymm11        # 498c <_sk_callback_hsw+0x250>
+  DB  196,98,125,24,29,61,48,0,0          ; vbroadcastss  0x303d(%rip),%ymm11        # 4a08 <_sk_callback_hsw+0x24f>
   DB  196,65,60,89,195                    ; vmulps        %ymm11,%ymm8,%ymm8
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,226,53,168,196                  ; vfmadd213ps   %ymm4,%ymm9,%ymm0
@@ -1621,7 +1621,7 @@ _sk_lerp_565_hsw LABEL PROC
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  233,255,255,255,225                 ; jmpq          ffffffffe2001a84 <_sk_callback_hsw+0xffffffffe1ffd348>
+  DB  233,255,255,255,225                 ; jmpq          ffffffffe2001a84 <_sk_callback_hsw+0xffffffffe1ffd2cb>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
@@ -1648,21 +1648,21 @@ _sk_load_tables_hsw LABEL PROC
   DB  77,133,192                          ; test          %r8,%r8
   DB  117,105                             ; jne           1b16 <_sk_load_tables_hsw+0x7e>
   DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
-  DB  197,229,219,13,134,49,0,0           ; vpand         0x3186(%rip),%ymm3,%ymm1        # 4c40 <_sk_callback_hsw+0x504>
+  DB  197,229,219,13,6,50,0,0             ; vpand         0x3206(%rip),%ymm3,%ymm1        # 4cc0 <_sk_callback_hsw+0x507>
   DB  196,65,61,118,192                   ; vpcmpeqd      %ymm8,%ymm8,%ymm8
   DB  72,139,72,8                         ; mov           0x8(%rax),%rcx
   DB  76,139,72,16                        ; mov           0x10(%rax),%r9
   DB  197,237,118,210                     ; vpcmpeqd      %ymm2,%ymm2,%ymm2
   DB  196,226,109,146,4,137               ; vgatherdps    %ymm2,(%rcx,%ymm1,4),%ymm0
-  DB  196,226,101,0,21,134,49,0,0         ; vpshufb       0x3186(%rip),%ymm3,%ymm2        # 4c60 <_sk_callback_hsw+0x524>
+  DB  196,226,101,0,21,6,50,0,0           ; vpshufb       0x3206(%rip),%ymm3,%ymm2        # 4ce0 <_sk_callback_hsw+0x527>
   DB  196,65,53,118,201                   ; vpcmpeqd      %ymm9,%ymm9,%ymm9
   DB  196,194,53,146,12,145               ; vgatherdps    %ymm9,(%r9,%ymm2,4),%ymm1
   DB  72,139,64,24                        ; mov           0x18(%rax),%rax
-  DB  196,98,101,0,13,142,49,0,0          ; vpshufb       0x318e(%rip),%ymm3,%ymm9        # 4c80 <_sk_callback_hsw+0x544>
+  DB  196,98,101,0,13,14,50,0,0           ; vpshufb       0x320e(%rip),%ymm3,%ymm9        # 4d00 <_sk_callback_hsw+0x547>
   DB  196,162,61,146,20,136               ; vgatherdps    %ymm8,(%rax,%ymm9,4),%ymm2
   DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,134,46,0,0          ; vbroadcastss  0x2e86(%rip),%ymm8        # 4990 <_sk_callback_hsw+0x254>
+  DB  196,98,125,24,5,2,47,0,0            ; vbroadcastss  0x2f02(%rip),%ymm8        # 4a0c <_sk_callback_hsw+0x253>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,137,193                          ; mov           %r8,%rcx
@@ -1699,7 +1699,7 @@ _sk_load_tables_u16_be_hsw LABEL PROC
   DB  197,185,108,200                     ; vpunpcklqdq   %xmm0,%xmm8,%xmm1
   DB  197,185,109,208                     ; vpunpckhqdq   %xmm0,%xmm8,%xmm2
   DB  197,49,108,195                      ; vpunpcklqdq   %xmm3,%xmm9,%xmm8
-  DB  197,121,111,21,26,50,0,0            ; vmovdqa       0x321a(%rip),%xmm10        # 4dc0 <_sk_callback_hsw+0x684>
+  DB  197,121,111,21,154,50,0,0           ; vmovdqa       0x329a(%rip),%xmm10        # 4e40 <_sk_callback_hsw+0x687>
   DB  196,193,113,219,194                 ; vpand         %xmm10,%xmm1,%xmm0
   DB  196,226,125,51,200                  ; vpmovzxwd     %xmm0,%ymm1
   DB  196,65,37,118,219                   ; vpcmpeqd      %ymm11,%ymm11,%ymm11
@@ -1721,7 +1721,7 @@ _sk_load_tables_u16_be_hsw LABEL PROC
   DB  197,185,235,219                     ; vpor          %xmm3,%xmm8,%xmm3
   DB  196,226,125,51,219                  ; vpmovzxwd     %xmm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,127,45,0,0          ; vbroadcastss  0x2d7f(%rip),%ymm8        # 4994 <_sk_callback_hsw+0x258>
+  DB  196,98,125,24,5,251,45,0,0          ; vbroadcastss  0x2dfb(%rip),%ymm8        # 4a10 <_sk_callback_hsw+0x257>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -1779,7 +1779,7 @@ _sk_load_tables_rgb_u16_be_hsw LABEL PROC
   DB  197,185,108,218                     ; vpunpcklqdq   %xmm2,%xmm8,%xmm3
   DB  197,185,109,210                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm2
   DB  197,121,108,193                     ; vpunpcklqdq   %xmm1,%xmm0,%xmm8
-  DB  197,121,111,13,186,48,0,0           ; vmovdqa       0x30ba(%rip),%xmm9        # 4dd0 <_sk_callback_hsw+0x694>
+  DB  197,121,111,13,58,49,0,0            ; vmovdqa       0x313a(%rip),%xmm9        # 4e50 <_sk_callback_hsw+0x697>
   DB  196,193,97,219,193                  ; vpand         %xmm9,%xmm3,%xmm0
   DB  196,226,125,51,200                  ; vpmovzxwd     %xmm0,%ymm1
   DB  197,229,118,219                     ; vpcmpeqd      %ymm3,%ymm3,%ymm3
@@ -1796,7 +1796,7 @@ _sk_load_tables_rgb_u16_be_hsw LABEL PROC
   DB  196,98,125,51,194                   ; vpmovzxwd     %xmm2,%ymm8
   DB  196,162,101,146,20,128              ; vgatherdps    %ymm3,(%rax,%ymm8,4),%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,45,44,0,0         ; vbroadcastss  0x2c2d(%rip),%ymm3        # 4998 <_sk_callback_hsw+0x25c>
+  DB  196,226,125,24,29,169,44,0,0        ; vbroadcastss  0x2ca9(%rip),%ymm3        # 4a14 <_sk_callback_hsw+0x25b>
   DB  255,224                             ; jmpq          *%rax
   DB  196,129,121,110,4,72                ; vmovd         (%r8,%r9,2),%xmm0
   DB  196,129,121,196,68,72,4,2           ; vpinsrw       $0x2,0x4(%r8,%r9,2),%xmm0,%xmm0
@@ -1841,7 +1841,7 @@ _sk_byte_tables_hsw LABEL PROC
   DB  65,84                               ; push          %r12
   DB  83                                  ; push          %rbx
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,107,43,0,0          ; vbroadcastss  0x2b6b(%rip),%ymm8        # 499c <_sk_callback_hsw+0x260>
+  DB  196,98,125,24,5,231,43,0,0          ; vbroadcastss  0x2be7(%rip),%ymm8        # 4a18 <_sk_callback_hsw+0x25f>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,195,249,22,192,1                ; vpextrq       $0x1,%xmm0,%r8
@@ -1878,7 +1878,7 @@ _sk_byte_tables_hsw LABEL PROC
   DB  196,227,121,32,197,7                ; vpinsrb       $0x7,%ebp,%xmm0,%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,188,42,0,0         ; vbroadcastss  0x2abc(%rip),%ymm9        # 49a0 <_sk_callback_hsw+0x264>
+  DB  196,98,125,24,13,56,43,0,0          ; vbroadcastss  0x2b38(%rip),%ymm9        # 4a1c <_sk_callback_hsw+0x263>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -2037,7 +2037,7 @@ _sk_byte_tables_rgb_hsw LABEL PROC
   DB  196,227,121,32,197,7                ; vpinsrb       $0x7,%ebp,%xmm0,%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,245,39,0,0         ; vbroadcastss  0x27f5(%rip),%ymm9        # 49a4 <_sk_callback_hsw+0x268>
+  DB  196,98,125,24,13,113,40,0,0         ; vbroadcastss  0x2871(%rip),%ymm9        # 4a20 <_sk_callback_hsw+0x267>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -2190,33 +2190,33 @@ _sk_parametric_r_hsw LABEL PROC
   DB  196,66,125,168,211                  ; vfmadd213ps   %ymm11,%ymm0,%ymm10
   DB  196,226,125,24,0                    ; vbroadcastss  (%rax),%ymm0
   DB  196,65,124,91,218                   ; vcvtdq2ps     %ymm10,%ymm11
-  DB  196,98,125,24,37,168,37,0,0         ; vbroadcastss  0x25a8(%rip),%ymm12        # 49a8 <_sk_callback_hsw+0x26c>
-  DB  196,98,125,24,45,163,37,0,0         ; vbroadcastss  0x25a3(%rip),%ymm13        # 49ac <_sk_callback_hsw+0x270>
+  DB  196,98,125,24,37,36,38,0,0          ; vbroadcastss  0x2624(%rip),%ymm12        # 4a24 <_sk_callback_hsw+0x26b>
+  DB  196,98,125,24,45,31,38,0,0          ; vbroadcastss  0x261f(%rip),%ymm13        # 4a28 <_sk_callback_hsw+0x26f>
   DB  196,65,44,84,213                    ; vandps        %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,153,37,0,0         ; vbroadcastss  0x2599(%rip),%ymm13        # 49b0 <_sk_callback_hsw+0x274>
+  DB  196,98,125,24,45,21,38,0,0          ; vbroadcastss  0x2615(%rip),%ymm13        # 4a2c <_sk_callback_hsw+0x273>
   DB  196,65,44,86,213                    ; vorps         %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,143,37,0,0         ; vbroadcastss  0x258f(%rip),%ymm13        # 49b4 <_sk_callback_hsw+0x278>
+  DB  196,98,125,24,45,11,38,0,0          ; vbroadcastss  0x260b(%rip),%ymm13        # 4a30 <_sk_callback_hsw+0x277>
   DB  196,66,37,184,236                   ; vfmadd231ps   %ymm12,%ymm11,%ymm13
-  DB  196,98,125,24,29,133,37,0,0         ; vbroadcastss  0x2585(%rip),%ymm11        # 49b8 <_sk_callback_hsw+0x27c>
+  DB  196,98,125,24,29,1,38,0,0           ; vbroadcastss  0x2601(%rip),%ymm11        # 4a34 <_sk_callback_hsw+0x27b>
   DB  196,66,45,172,221                   ; vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  DB  196,98,125,24,37,123,37,0,0         ; vbroadcastss  0x257b(%rip),%ymm12        # 49bc <_sk_callback_hsw+0x280>
+  DB  196,98,125,24,37,247,37,0,0         ; vbroadcastss  0x25f7(%rip),%ymm12        # 4a38 <_sk_callback_hsw+0x27f>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,113,37,0,0         ; vbroadcastss  0x2571(%rip),%ymm12        # 49c0 <_sk_callback_hsw+0x284>
+  DB  196,98,125,24,37,237,37,0,0         ; vbroadcastss  0x25ed(%rip),%ymm12        # 4a3c <_sk_callback_hsw+0x283>
   DB  196,65,28,94,210                    ; vdivps        %ymm10,%ymm12,%ymm10
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  196,99,125,8,208,1                  ; vroundps      $0x1,%ymm0,%ymm10
   DB  196,65,124,92,210                   ; vsubps        %ymm10,%ymm0,%ymm10
-  DB  196,98,125,24,29,82,37,0,0          ; vbroadcastss  0x2552(%rip),%ymm11        # 49c4 <_sk_callback_hsw+0x288>
+  DB  196,98,125,24,29,206,37,0,0         ; vbroadcastss  0x25ce(%rip),%ymm11        # 4a40 <_sk_callback_hsw+0x287>
   DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
-  DB  196,98,125,24,29,72,37,0,0          ; vbroadcastss  0x2548(%rip),%ymm11        # 49c8 <_sk_callback_hsw+0x28c>
+  DB  196,98,125,24,29,196,37,0,0         ; vbroadcastss  0x25c4(%rip),%ymm11        # 4a44 <_sk_callback_hsw+0x28b>
   DB  196,98,45,172,216                   ; vfnmadd213ps  %ymm0,%ymm10,%ymm11
-  DB  196,226,125,24,5,62,37,0,0          ; vbroadcastss  0x253e(%rip),%ymm0        # 49cc <_sk_callback_hsw+0x290>
+  DB  196,226,125,24,5,186,37,0,0         ; vbroadcastss  0x25ba(%rip),%ymm0        # 4a48 <_sk_callback_hsw+0x28f>
   DB  196,193,124,92,194                  ; vsubps        %ymm10,%ymm0,%ymm0
-  DB  196,98,125,24,21,52,37,0,0          ; vbroadcastss  0x2534(%rip),%ymm10        # 49d0 <_sk_callback_hsw+0x294>
+  DB  196,98,125,24,21,176,37,0,0         ; vbroadcastss  0x25b0(%rip),%ymm10        # 4a4c <_sk_callback_hsw+0x293>
   DB  197,172,94,192                      ; vdivps        %ymm0,%ymm10,%ymm0
   DB  197,164,88,192                      ; vaddps        %ymm0,%ymm11,%ymm0
-  DB  196,98,125,24,21,39,37,0,0          ; vbroadcastss  0x2527(%rip),%ymm10        # 49d4 <_sk_callback_hsw+0x298>
+  DB  196,98,125,24,21,163,37,0,0         ; vbroadcastss  0x25a3(%rip),%ymm10        # 4a50 <_sk_callback_hsw+0x297>
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -2224,7 +2224,7 @@ _sk_parametric_r_hsw LABEL PROC
   DB  196,195,125,74,193,128              ; vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,254,36,0,0          ; vbroadcastss  0x24fe(%rip),%ymm8        # 49d8 <_sk_callback_hsw+0x29c>
+  DB  196,98,125,24,5,122,37,0,0          ; vbroadcastss  0x257a(%rip),%ymm8        # 4a54 <_sk_callback_hsw+0x29b>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2242,33 +2242,33 @@ _sk_parametric_g_hsw LABEL PROC
   DB  196,66,117,168,211                  ; vfmadd213ps   %ymm11,%ymm1,%ymm10
   DB  196,226,125,24,8                    ; vbroadcastss  (%rax),%ymm1
   DB  196,65,124,91,218                   ; vcvtdq2ps     %ymm10,%ymm11
-  DB  196,98,125,24,37,182,36,0,0         ; vbroadcastss  0x24b6(%rip),%ymm12        # 49dc <_sk_callback_hsw+0x2a0>
-  DB  196,98,125,24,45,177,36,0,0         ; vbroadcastss  0x24b1(%rip),%ymm13        # 49e0 <_sk_callback_hsw+0x2a4>
+  DB  196,98,125,24,37,50,37,0,0          ; vbroadcastss  0x2532(%rip),%ymm12        # 4a58 <_sk_callback_hsw+0x29f>
+  DB  196,98,125,24,45,45,37,0,0          ; vbroadcastss  0x252d(%rip),%ymm13        # 4a5c <_sk_callback_hsw+0x2a3>
   DB  196,65,44,84,213                    ; vandps        %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,167,36,0,0         ; vbroadcastss  0x24a7(%rip),%ymm13        # 49e4 <_sk_callback_hsw+0x2a8>
+  DB  196,98,125,24,45,35,37,0,0          ; vbroadcastss  0x2523(%rip),%ymm13        # 4a60 <_sk_callback_hsw+0x2a7>
   DB  196,65,44,86,213                    ; vorps         %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,157,36,0,0         ; vbroadcastss  0x249d(%rip),%ymm13        # 49e8 <_sk_callback_hsw+0x2ac>
+  DB  196,98,125,24,45,25,37,0,0          ; vbroadcastss  0x2519(%rip),%ymm13        # 4a64 <_sk_callback_hsw+0x2ab>
   DB  196,66,37,184,236                   ; vfmadd231ps   %ymm12,%ymm11,%ymm13
-  DB  196,98,125,24,29,147,36,0,0         ; vbroadcastss  0x2493(%rip),%ymm11        # 49ec <_sk_callback_hsw+0x2b0>
+  DB  196,98,125,24,29,15,37,0,0          ; vbroadcastss  0x250f(%rip),%ymm11        # 4a68 <_sk_callback_hsw+0x2af>
   DB  196,66,45,172,221                   ; vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  DB  196,98,125,24,37,137,36,0,0         ; vbroadcastss  0x2489(%rip),%ymm12        # 49f0 <_sk_callback_hsw+0x2b4>
+  DB  196,98,125,24,37,5,37,0,0           ; vbroadcastss  0x2505(%rip),%ymm12        # 4a6c <_sk_callback_hsw+0x2b3>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,127,36,0,0         ; vbroadcastss  0x247f(%rip),%ymm12        # 49f4 <_sk_callback_hsw+0x2b8>
+  DB  196,98,125,24,37,251,36,0,0         ; vbroadcastss  0x24fb(%rip),%ymm12        # 4a70 <_sk_callback_hsw+0x2b7>
   DB  196,65,28,94,210                    ; vdivps        %ymm10,%ymm12,%ymm10
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
   DB  196,193,116,89,202                  ; vmulps        %ymm10,%ymm1,%ymm1
   DB  196,99,125,8,209,1                  ; vroundps      $0x1,%ymm1,%ymm10
   DB  196,65,116,92,210                   ; vsubps        %ymm10,%ymm1,%ymm10
-  DB  196,98,125,24,29,96,36,0,0          ; vbroadcastss  0x2460(%rip),%ymm11        # 49f8 <_sk_callback_hsw+0x2bc>
+  DB  196,98,125,24,29,220,36,0,0         ; vbroadcastss  0x24dc(%rip),%ymm11        # 4a74 <_sk_callback_hsw+0x2bb>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,86,36,0,0          ; vbroadcastss  0x2456(%rip),%ymm11        # 49fc <_sk_callback_hsw+0x2c0>
+  DB  196,98,125,24,29,210,36,0,0         ; vbroadcastss  0x24d2(%rip),%ymm11        # 4a78 <_sk_callback_hsw+0x2bf>
   DB  196,98,45,172,217                   ; vfnmadd213ps  %ymm1,%ymm10,%ymm11
-  DB  196,226,125,24,13,76,36,0,0         ; vbroadcastss  0x244c(%rip),%ymm1        # 4a00 <_sk_callback_hsw+0x2c4>
+  DB  196,226,125,24,13,200,36,0,0        ; vbroadcastss  0x24c8(%rip),%ymm1        # 4a7c <_sk_callback_hsw+0x2c3>
   DB  196,193,116,92,202                  ; vsubps        %ymm10,%ymm1,%ymm1
-  DB  196,98,125,24,21,66,36,0,0          ; vbroadcastss  0x2442(%rip),%ymm10        # 4a04 <_sk_callback_hsw+0x2c8>
+  DB  196,98,125,24,21,190,36,0,0         ; vbroadcastss  0x24be(%rip),%ymm10        # 4a80 <_sk_callback_hsw+0x2c7>
   DB  197,172,94,201                      ; vdivps        %ymm1,%ymm10,%ymm1
   DB  197,164,88,201                      ; vaddps        %ymm1,%ymm11,%ymm1
-  DB  196,98,125,24,21,53,36,0,0          ; vbroadcastss  0x2435(%rip),%ymm10        # 4a08 <_sk_callback_hsw+0x2cc>
+  DB  196,98,125,24,21,177,36,0,0         ; vbroadcastss  0x24b1(%rip),%ymm10        # 4a84 <_sk_callback_hsw+0x2cb>
   DB  196,193,116,89,202                  ; vmulps        %ymm10,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -2276,7 +2276,7 @@ _sk_parametric_g_hsw LABEL PROC
   DB  196,195,117,74,201,128              ; vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
-  DB  196,98,125,24,5,12,36,0,0           ; vbroadcastss  0x240c(%rip),%ymm8        # 4a0c <_sk_callback_hsw+0x2d0>
+  DB  196,98,125,24,5,136,36,0,0          ; vbroadcastss  0x2488(%rip),%ymm8        # 4a88 <_sk_callback_hsw+0x2cf>
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2294,33 +2294,33 @@ _sk_parametric_b_hsw LABEL PROC
   DB  196,66,109,168,211                  ; vfmadd213ps   %ymm11,%ymm2,%ymm10
   DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
   DB  196,65,124,91,218                   ; vcvtdq2ps     %ymm10,%ymm11
-  DB  196,98,125,24,37,196,35,0,0         ; vbroadcastss  0x23c4(%rip),%ymm12        # 4a10 <_sk_callback_hsw+0x2d4>
-  DB  196,98,125,24,45,191,35,0,0         ; vbroadcastss  0x23bf(%rip),%ymm13        # 4a14 <_sk_callback_hsw+0x2d8>
+  DB  196,98,125,24,37,64,36,0,0          ; vbroadcastss  0x2440(%rip),%ymm12        # 4a8c <_sk_callback_hsw+0x2d3>
+  DB  196,98,125,24,45,59,36,0,0          ; vbroadcastss  0x243b(%rip),%ymm13        # 4a90 <_sk_callback_hsw+0x2d7>
   DB  196,65,44,84,213                    ; vandps        %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,181,35,0,0         ; vbroadcastss  0x23b5(%rip),%ymm13        # 4a18 <_sk_callback_hsw+0x2dc>
+  DB  196,98,125,24,45,49,36,0,0          ; vbroadcastss  0x2431(%rip),%ymm13        # 4a94 <_sk_callback_hsw+0x2db>
   DB  196,65,44,86,213                    ; vorps         %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,171,35,0,0         ; vbroadcastss  0x23ab(%rip),%ymm13        # 4a1c <_sk_callback_hsw+0x2e0>
+  DB  196,98,125,24,45,39,36,0,0          ; vbroadcastss  0x2427(%rip),%ymm13        # 4a98 <_sk_callback_hsw+0x2df>
   DB  196,66,37,184,236                   ; vfmadd231ps   %ymm12,%ymm11,%ymm13
-  DB  196,98,125,24,29,161,35,0,0         ; vbroadcastss  0x23a1(%rip),%ymm11        # 4a20 <_sk_callback_hsw+0x2e4>
+  DB  196,98,125,24,29,29,36,0,0          ; vbroadcastss  0x241d(%rip),%ymm11        # 4a9c <_sk_callback_hsw+0x2e3>
   DB  196,66,45,172,221                   ; vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  DB  196,98,125,24,37,151,35,0,0         ; vbroadcastss  0x2397(%rip),%ymm12        # 4a24 <_sk_callback_hsw+0x2e8>
+  DB  196,98,125,24,37,19,36,0,0          ; vbroadcastss  0x2413(%rip),%ymm12        # 4aa0 <_sk_callback_hsw+0x2e7>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,141,35,0,0         ; vbroadcastss  0x238d(%rip),%ymm12        # 4a28 <_sk_callback_hsw+0x2ec>
+  DB  196,98,125,24,37,9,36,0,0           ; vbroadcastss  0x2409(%rip),%ymm12        # 4aa4 <_sk_callback_hsw+0x2eb>
   DB  196,65,28,94,210                    ; vdivps        %ymm10,%ymm12,%ymm10
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
   DB  196,193,108,89,210                  ; vmulps        %ymm10,%ymm2,%ymm2
   DB  196,99,125,8,210,1                  ; vroundps      $0x1,%ymm2,%ymm10
   DB  196,65,108,92,210                   ; vsubps        %ymm10,%ymm2,%ymm10
-  DB  196,98,125,24,29,110,35,0,0         ; vbroadcastss  0x236e(%rip),%ymm11        # 4a2c <_sk_callback_hsw+0x2f0>
+  DB  196,98,125,24,29,234,35,0,0         ; vbroadcastss  0x23ea(%rip),%ymm11        # 4aa8 <_sk_callback_hsw+0x2ef>
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
-  DB  196,98,125,24,29,100,35,0,0         ; vbroadcastss  0x2364(%rip),%ymm11        # 4a30 <_sk_callback_hsw+0x2f4>
+  DB  196,98,125,24,29,224,35,0,0         ; vbroadcastss  0x23e0(%rip),%ymm11        # 4aac <_sk_callback_hsw+0x2f3>
   DB  196,98,45,172,218                   ; vfnmadd213ps  %ymm2,%ymm10,%ymm11
-  DB  196,226,125,24,21,90,35,0,0         ; vbroadcastss  0x235a(%rip),%ymm2        # 4a34 <_sk_callback_hsw+0x2f8>
+  DB  196,226,125,24,21,214,35,0,0        ; vbroadcastss  0x23d6(%rip),%ymm2        # 4ab0 <_sk_callback_hsw+0x2f7>
   DB  196,193,108,92,210                  ; vsubps        %ymm10,%ymm2,%ymm2
-  DB  196,98,125,24,21,80,35,0,0          ; vbroadcastss  0x2350(%rip),%ymm10        # 4a38 <_sk_callback_hsw+0x2fc>
+  DB  196,98,125,24,21,204,35,0,0         ; vbroadcastss  0x23cc(%rip),%ymm10        # 4ab4 <_sk_callback_hsw+0x2fb>
   DB  197,172,94,210                      ; vdivps        %ymm2,%ymm10,%ymm2
   DB  197,164,88,210                      ; vaddps        %ymm2,%ymm11,%ymm2
-  DB  196,98,125,24,21,67,35,0,0          ; vbroadcastss  0x2343(%rip),%ymm10        # 4a3c <_sk_callback_hsw+0x300>
+  DB  196,98,125,24,21,191,35,0,0         ; vbroadcastss  0x23bf(%rip),%ymm10        # 4ab8 <_sk_callback_hsw+0x2ff>
   DB  196,193,108,89,210                  ; vmulps        %ymm10,%ymm2,%ymm2
   DB  197,253,91,210                      ; vcvtps2dq     %ymm2,%ymm2
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -2328,7 +2328,7 @@ _sk_parametric_b_hsw LABEL PROC
   DB  196,195,109,74,209,128              ; vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,26,35,0,0           ; vbroadcastss  0x231a(%rip),%ymm8        # 4a40 <_sk_callback_hsw+0x304>
+  DB  196,98,125,24,5,150,35,0,0          ; vbroadcastss  0x2396(%rip),%ymm8        # 4abc <_sk_callback_hsw+0x303>
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2346,33 +2346,33 @@ _sk_parametric_a_hsw LABEL PROC
   DB  196,66,101,168,211                  ; vfmadd213ps   %ymm11,%ymm3,%ymm10
   DB  196,226,125,24,24                   ; vbroadcastss  (%rax),%ymm3
   DB  196,65,124,91,218                   ; vcvtdq2ps     %ymm10,%ymm11
-  DB  196,98,125,24,37,210,34,0,0         ; vbroadcastss  0x22d2(%rip),%ymm12        # 4a44 <_sk_callback_hsw+0x308>
-  DB  196,98,125,24,45,205,34,0,0         ; vbroadcastss  0x22cd(%rip),%ymm13        # 4a48 <_sk_callback_hsw+0x30c>
+  DB  196,98,125,24,37,78,35,0,0          ; vbroadcastss  0x234e(%rip),%ymm12        # 4ac0 <_sk_callback_hsw+0x307>
+  DB  196,98,125,24,45,73,35,0,0          ; vbroadcastss  0x2349(%rip),%ymm13        # 4ac4 <_sk_callback_hsw+0x30b>
   DB  196,65,44,84,213                    ; vandps        %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,195,34,0,0         ; vbroadcastss  0x22c3(%rip),%ymm13        # 4a4c <_sk_callback_hsw+0x310>
+  DB  196,98,125,24,45,63,35,0,0          ; vbroadcastss  0x233f(%rip),%ymm13        # 4ac8 <_sk_callback_hsw+0x30f>
   DB  196,65,44,86,213                    ; vorps         %ymm13,%ymm10,%ymm10
-  DB  196,98,125,24,45,185,34,0,0         ; vbroadcastss  0x22b9(%rip),%ymm13        # 4a50 <_sk_callback_hsw+0x314>
+  DB  196,98,125,24,45,53,35,0,0          ; vbroadcastss  0x2335(%rip),%ymm13        # 4acc <_sk_callback_hsw+0x313>
   DB  196,66,37,184,236                   ; vfmadd231ps   %ymm12,%ymm11,%ymm13
-  DB  196,98,125,24,29,175,34,0,0         ; vbroadcastss  0x22af(%rip),%ymm11        # 4a54 <_sk_callback_hsw+0x318>
+  DB  196,98,125,24,29,43,35,0,0          ; vbroadcastss  0x232b(%rip),%ymm11        # 4ad0 <_sk_callback_hsw+0x317>
   DB  196,66,45,172,221                   ; vfnmadd213ps  %ymm13,%ymm10,%ymm11
-  DB  196,98,125,24,37,165,34,0,0         ; vbroadcastss  0x22a5(%rip),%ymm12        # 4a58 <_sk_callback_hsw+0x31c>
+  DB  196,98,125,24,37,33,35,0,0          ; vbroadcastss  0x2321(%rip),%ymm12        # 4ad4 <_sk_callback_hsw+0x31b>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,155,34,0,0         ; vbroadcastss  0x229b(%rip),%ymm12        # 4a5c <_sk_callback_hsw+0x320>
+  DB  196,98,125,24,37,23,35,0,0          ; vbroadcastss  0x2317(%rip),%ymm12        # 4ad8 <_sk_callback_hsw+0x31f>
   DB  196,65,28,94,210                    ; vdivps        %ymm10,%ymm12,%ymm10
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
   DB  196,193,100,89,218                  ; vmulps        %ymm10,%ymm3,%ymm3
   DB  196,99,125,8,211,1                  ; vroundps      $0x1,%ymm3,%ymm10
   DB  196,65,100,92,210                   ; vsubps        %ymm10,%ymm3,%ymm10
-  DB  196,98,125,24,29,124,34,0,0         ; vbroadcastss  0x227c(%rip),%ymm11        # 4a60 <_sk_callback_hsw+0x324>
+  DB  196,98,125,24,29,248,34,0,0         ; vbroadcastss  0x22f8(%rip),%ymm11        # 4adc <_sk_callback_hsw+0x323>
   DB  196,193,100,88,219                  ; vaddps        %ymm11,%ymm3,%ymm3
-  DB  196,98,125,24,29,114,34,0,0         ; vbroadcastss  0x2272(%rip),%ymm11        # 4a64 <_sk_callback_hsw+0x328>
+  DB  196,98,125,24,29,238,34,0,0         ; vbroadcastss  0x22ee(%rip),%ymm11        # 4ae0 <_sk_callback_hsw+0x327>
   DB  196,98,45,172,219                   ; vfnmadd213ps  %ymm3,%ymm10,%ymm11
-  DB  196,226,125,24,29,104,34,0,0        ; vbroadcastss  0x2268(%rip),%ymm3        # 4a68 <_sk_callback_hsw+0x32c>
+  DB  196,226,125,24,29,228,34,0,0        ; vbroadcastss  0x22e4(%rip),%ymm3        # 4ae4 <_sk_callback_hsw+0x32b>
   DB  196,193,100,92,218                  ; vsubps        %ymm10,%ymm3,%ymm3
-  DB  196,98,125,24,21,94,34,0,0          ; vbroadcastss  0x225e(%rip),%ymm10        # 4a6c <_sk_callback_hsw+0x330>
+  DB  196,98,125,24,21,218,34,0,0         ; vbroadcastss  0x22da(%rip),%ymm10        # 4ae8 <_sk_callback_hsw+0x32f>
   DB  197,172,94,219                      ; vdivps        %ymm3,%ymm10,%ymm3
   DB  197,164,88,219                      ; vaddps        %ymm3,%ymm11,%ymm3
-  DB  196,98,125,24,21,81,34,0,0          ; vbroadcastss  0x2251(%rip),%ymm10        # 4a70 <_sk_callback_hsw+0x334>
+  DB  196,98,125,24,21,205,34,0,0         ; vbroadcastss  0x22cd(%rip),%ymm10        # 4aec <_sk_callback_hsw+0x333>
   DB  196,193,100,89,218                  ; vmulps        %ymm10,%ymm3,%ymm3
   DB  197,253,91,219                      ; vcvtps2dq     %ymm3,%ymm3
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -2380,33 +2380,33 @@ _sk_parametric_a_hsw LABEL PROC
   DB  196,195,101,74,217,128              ; vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
-  DB  196,98,125,24,5,40,34,0,0           ; vbroadcastss  0x2228(%rip),%ymm8        # 4a74 <_sk_callback_hsw+0x338>
+  DB  196,98,125,24,5,164,34,0,0          ; vbroadcastss  0x22a4(%rip),%ymm8        # 4af0 <_sk_callback_hsw+0x337>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_lab_to_xyz_hsw
 _sk_lab_to_xyz_hsw LABEL PROC
-  DB  196,98,125,24,5,26,34,0,0           ; vbroadcastss  0x221a(%rip),%ymm8        # 4a78 <_sk_callback_hsw+0x33c>
-  DB  196,98,125,24,13,21,34,0,0          ; vbroadcastss  0x2215(%rip),%ymm9        # 4a7c <_sk_callback_hsw+0x340>
-  DB  196,98,125,24,21,16,34,0,0          ; vbroadcastss  0x2210(%rip),%ymm10        # 4a80 <_sk_callback_hsw+0x344>
+  DB  196,98,125,24,5,150,34,0,0          ; vbroadcastss  0x2296(%rip),%ymm8        # 4af4 <_sk_callback_hsw+0x33b>
+  DB  196,98,125,24,13,145,34,0,0         ; vbroadcastss  0x2291(%rip),%ymm9        # 4af8 <_sk_callback_hsw+0x33f>
+  DB  196,98,125,24,21,140,34,0,0         ; vbroadcastss  0x228c(%rip),%ymm10        # 4afc <_sk_callback_hsw+0x343>
   DB  196,194,53,168,202                  ; vfmadd213ps   %ymm10,%ymm9,%ymm1
   DB  196,194,53,168,210                  ; vfmadd213ps   %ymm10,%ymm9,%ymm2
-  DB  196,98,125,24,13,1,34,0,0           ; vbroadcastss  0x2201(%rip),%ymm9        # 4a84 <_sk_callback_hsw+0x348>
+  DB  196,98,125,24,13,125,34,0,0         ; vbroadcastss  0x227d(%rip),%ymm9        # 4b00 <_sk_callback_hsw+0x347>
   DB  196,66,125,184,200                  ; vfmadd231ps   %ymm8,%ymm0,%ymm9
-  DB  196,226,125,24,5,247,33,0,0         ; vbroadcastss  0x21f7(%rip),%ymm0        # 4a88 <_sk_callback_hsw+0x34c>
+  DB  196,226,125,24,5,115,34,0,0         ; vbroadcastss  0x2273(%rip),%ymm0        # 4b04 <_sk_callback_hsw+0x34b>
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
-  DB  196,98,125,24,5,238,33,0,0          ; vbroadcastss  0x21ee(%rip),%ymm8        # 4a8c <_sk_callback_hsw+0x350>
+  DB  196,98,125,24,5,106,34,0,0          ; vbroadcastss  0x226a(%rip),%ymm8        # 4b08 <_sk_callback_hsw+0x34f>
   DB  196,98,117,168,192                  ; vfmadd213ps   %ymm0,%ymm1,%ymm8
-  DB  196,98,125,24,13,228,33,0,0         ; vbroadcastss  0x21e4(%rip),%ymm9        # 4a90 <_sk_callback_hsw+0x354>
+  DB  196,98,125,24,13,96,34,0,0          ; vbroadcastss  0x2260(%rip),%ymm9        # 4b0c <_sk_callback_hsw+0x353>
   DB  196,98,109,172,200                  ; vfnmadd213ps  %ymm0,%ymm2,%ymm9
   DB  196,193,60,89,200                   ; vmulps        %ymm8,%ymm8,%ymm1
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
-  DB  196,226,125,24,21,209,33,0,0        ; vbroadcastss  0x21d1(%rip),%ymm2        # 4a94 <_sk_callback_hsw+0x358>
+  DB  196,226,125,24,21,77,34,0,0         ; vbroadcastss  0x224d(%rip),%ymm2        # 4b10 <_sk_callback_hsw+0x357>
   DB  197,108,194,209,1                   ; vcmpltps      %ymm1,%ymm2,%ymm10
-  DB  196,98,125,24,29,199,33,0,0         ; vbroadcastss  0x21c7(%rip),%ymm11        # 4a98 <_sk_callback_hsw+0x35c>
+  DB  196,98,125,24,29,67,34,0,0          ; vbroadcastss  0x2243(%rip),%ymm11        # 4b14 <_sk_callback_hsw+0x35b>
   DB  196,65,60,88,195                    ; vaddps        %ymm11,%ymm8,%ymm8
-  DB  196,98,125,24,37,189,33,0,0         ; vbroadcastss  0x21bd(%rip),%ymm12        # 4a9c <_sk_callback_hsw+0x360>
+  DB  196,98,125,24,37,57,34,0,0          ; vbroadcastss  0x2239(%rip),%ymm12        # 4b18 <_sk_callback_hsw+0x35f>
   DB  196,65,60,89,196                    ; vmulps        %ymm12,%ymm8,%ymm8
   DB  196,99,61,74,193,160                ; vblendvps     %ymm10,%ymm1,%ymm8,%ymm8
   DB  197,252,89,200                      ; vmulps        %ymm0,%ymm0,%ymm1
@@ -2421,9 +2421,9 @@ _sk_lab_to_xyz_hsw LABEL PROC
   DB  196,65,52,88,203                    ; vaddps        %ymm11,%ymm9,%ymm9
   DB  196,65,52,89,204                    ; vmulps        %ymm12,%ymm9,%ymm9
   DB  196,227,53,74,208,32                ; vblendvps     %ymm2,%ymm0,%ymm9,%ymm2
-  DB  196,226,125,24,5,114,33,0,0         ; vbroadcastss  0x2172(%rip),%ymm0        # 4aa0 <_sk_callback_hsw+0x364>
+  DB  196,226,125,24,5,238,33,0,0         ; vbroadcastss  0x21ee(%rip),%ymm0        # 4b1c <_sk_callback_hsw+0x363>
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
-  DB  196,98,125,24,5,105,33,0,0          ; vbroadcastss  0x2169(%rip),%ymm8        # 4aa4 <_sk_callback_hsw+0x368>
+  DB  196,98,125,24,5,229,33,0,0          ; vbroadcastss  0x21e5(%rip),%ymm8        # 4b20 <_sk_callback_hsw+0x367>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -2439,7 +2439,7 @@ _sk_load_a8_hsw LABEL PROC
   DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,62,33,0,0         ; vbroadcastss  0x213e(%rip),%ymm1        # 4aa8 <_sk_callback_hsw+0x36c>
+  DB  196,226,125,24,13,186,33,0,0        ; vbroadcastss  0x21ba(%rip),%ymm1        # 4b24 <_sk_callback_hsw+0x36b>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -2502,7 +2502,7 @@ _sk_gather_a8_hsw LABEL PROC
   DB  196,227,121,32,192,7                ; vpinsrb       $0x7,%eax,%xmm0,%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,73,32,0,0         ; vbroadcastss  0x2049(%rip),%ymm1        # 4aac <_sk_callback_hsw+0x370>
+  DB  196,226,125,24,13,197,32,0,0        ; vbroadcastss  0x20c5(%rip),%ymm1        # 4b28 <_sk_callback_hsw+0x36f>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -2518,7 +2518,7 @@ PUBLIC _sk_store_a8_hsw
 _sk_store_a8_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,36,32,0,0           ; vbroadcastss  0x2024(%rip),%ymm8        # 4ab0 <_sk_callback_hsw+0x374>
+  DB  196,98,125,24,5,160,32,0,0          ; vbroadcastss  0x20a0(%rip),%ymm8        # 4b2c <_sk_callback_hsw+0x373>
   DB  196,65,100,89,192                   ; vmulps        %ymm8,%ymm3,%ymm8
   DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
@@ -2583,10 +2583,10 @@ _sk_load_g8_hsw LABEL PROC
   DB  197,250,126,0                       ; vmovq         (%rax),%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,90,31,0,0         ; vbroadcastss  0x1f5a(%rip),%ymm1        # 4ab4 <_sk_callback_hsw+0x378>
+  DB  196,226,125,24,13,214,31,0,0        ; vbroadcastss  0x1fd6(%rip),%ymm1        # 4b30 <_sk_callback_hsw+0x377>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,79,31,0,0         ; vbroadcastss  0x1f4f(%rip),%ymm3        # 4ab8 <_sk_callback_hsw+0x37c>
+  DB  196,226,125,24,29,203,31,0,0        ; vbroadcastss  0x1fcb(%rip),%ymm3        # 4b34 <_sk_callback_hsw+0x37b>
   DB  76,137,193                          ; mov           %r8,%rcx
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
@@ -2646,10 +2646,10 @@ _sk_gather_g8_hsw LABEL PROC
   DB  196,227,121,32,192,7                ; vpinsrb       $0x7,%eax,%xmm0,%xmm0
   DB  196,226,125,49,192                  ; vpmovzxbd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,100,30,0,0        ; vbroadcastss  0x1e64(%rip),%ymm1        # 4abc <_sk_callback_hsw+0x380>
+  DB  196,226,125,24,13,224,30,0,0        ; vbroadcastss  0x1ee0(%rip),%ymm1        # 4b38 <_sk_callback_hsw+0x37f>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,89,30,0,0         ; vbroadcastss  0x1e59(%rip),%ymm3        # 4ac0 <_sk_callback_hsw+0x384>
+  DB  196,226,125,24,29,213,30,0,0        ; vbroadcastss  0x1ed5(%rip),%ymm3        # 4b3c <_sk_callback_hsw+0x383>
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
   DB  91                                  ; pop           %rbx
@@ -2703,14 +2703,14 @@ _sk_gather_i8_hsw LABEL PROC
   DB  73,139,64,8                         ; mov           0x8(%r8),%rax
   DB  197,245,118,201                     ; vpcmpeqd      %ymm1,%ymm1,%ymm1
   DB  196,226,117,144,28,128              ; vpgatherdd    %ymm1,(%rax,%ymm0,4),%ymm3
-  DB  197,229,219,5,105,31,0,0            ; vpand         0x1f69(%rip),%ymm3,%ymm0        # 4ca0 <_sk_callback_hsw+0x564>
+  DB  197,229,219,5,233,31,0,0            ; vpand         0x1fe9(%rip),%ymm3,%ymm0        # 4d20 <_sk_callback_hsw+0x567>
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,5,128,29,0,0          ; vbroadcastss  0x1d80(%rip),%ymm8        # 4ac4 <_sk_callback_hsw+0x388>
+  DB  196,98,125,24,5,252,29,0,0          ; vbroadcastss  0x1dfc(%rip),%ymm8        # 4b40 <_sk_callback_hsw+0x387>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,226,101,0,13,110,31,0,0         ; vpshufb       0x1f6e(%rip),%ymm3,%ymm1        # 4cc0 <_sk_callback_hsw+0x584>
+  DB  196,226,101,0,13,238,31,0,0         ; vpshufb       0x1fee(%rip),%ymm3,%ymm1        # 4d40 <_sk_callback_hsw+0x587>
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  196,226,101,0,21,124,31,0,0         ; vpshufb       0x1f7c(%rip),%ymm3,%ymm2        # 4ce0 <_sk_callback_hsw+0x5a4>
+  DB  196,226,101,0,21,252,31,0,0         ; vpshufb       0x1ffc(%rip),%ymm3,%ymm2        # 4d60 <_sk_callback_hsw+0x5a7>
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
@@ -2732,23 +2732,23 @@ _sk_load_565_hsw LABEL PROC
   DB  117,114                             ; jne           2e04 <_sk_load_565_hsw+0x7c>
   DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
   DB  196,226,125,51,208                  ; vpmovzxwd     %xmm0,%ymm2
-  DB  196,226,125,88,5,34,29,0,0          ; vpbroadcastd  0x1d22(%rip),%ymm0        # 4ac8 <_sk_callback_hsw+0x38c>
+  DB  196,226,125,88,5,158,29,0,0         ; vpbroadcastd  0x1d9e(%rip),%ymm0        # 4b44 <_sk_callback_hsw+0x38b>
   DB  197,237,219,192                     ; vpand         %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,21,29,0,0         ; vbroadcastss  0x1d15(%rip),%ymm1        # 4acc <_sk_callback_hsw+0x390>
+  DB  196,226,125,24,13,145,29,0,0        ; vbroadcastss  0x1d91(%rip),%ymm1        # 4b48 <_sk_callback_hsw+0x38f>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,88,13,12,29,0,0         ; vpbroadcastd  0x1d0c(%rip),%ymm1        # 4ad0 <_sk_callback_hsw+0x394>
+  DB  196,226,125,88,13,136,29,0,0        ; vpbroadcastd  0x1d88(%rip),%ymm1        # 4b4c <_sk_callback_hsw+0x393>
   DB  197,237,219,201                     ; vpand         %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,255,28,0,0        ; vbroadcastss  0x1cff(%rip),%ymm3        # 4ad4 <_sk_callback_hsw+0x398>
+  DB  196,226,125,24,29,123,29,0,0        ; vbroadcastss  0x1d7b(%rip),%ymm3        # 4b50 <_sk_callback_hsw+0x397>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,88,29,246,28,0,0        ; vpbroadcastd  0x1cf6(%rip),%ymm3        # 4ad8 <_sk_callback_hsw+0x39c>
+  DB  196,226,125,88,29,114,29,0,0        ; vpbroadcastd  0x1d72(%rip),%ymm3        # 4b54 <_sk_callback_hsw+0x39b>
   DB  197,237,219,211                     ; vpand         %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,233,28,0,0        ; vbroadcastss  0x1ce9(%rip),%ymm3        # 4adc <_sk_callback_hsw+0x3a0>
+  DB  196,226,125,24,29,101,29,0,0        ; vbroadcastss  0x1d65(%rip),%ymm3        # 4b58 <_sk_callback_hsw+0x39f>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,222,28,0,0        ; vbroadcastss  0x1cde(%rip),%ymm3        # 4ae0 <_sk_callback_hsw+0x3a4>
+  DB  196,226,125,24,29,90,29,0,0         ; vbroadcastss  0x1d5a(%rip),%ymm3        # 4b5c <_sk_callback_hsw+0x3a3>
   DB  255,224                             ; jmpq          *%rax
   DB  65,137,200                          ; mov           %ecx,%r8d
   DB  65,128,224,7                        ; and           $0x7,%r8b
@@ -2837,23 +2837,23 @@ _sk_gather_565_hsw LABEL PROC
   DB  65,15,183,4,88                      ; movzwl        (%r8,%rbx,2),%eax
   DB  197,249,196,192,7                   ; vpinsrw       $0x7,%eax,%xmm0,%xmm0
   DB  196,226,125,51,208                  ; vpmovzxwd     %xmm0,%ymm2
-  DB  196,226,125,88,5,161,27,0,0         ; vpbroadcastd  0x1ba1(%rip),%ymm0        # 4ae4 <_sk_callback_hsw+0x3a8>
+  DB  196,226,125,88,5,29,28,0,0          ; vpbroadcastd  0x1c1d(%rip),%ymm0        # 4b60 <_sk_callback_hsw+0x3a7>
   DB  197,237,219,192                     ; vpand         %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,148,27,0,0        ; vbroadcastss  0x1b94(%rip),%ymm1        # 4ae8 <_sk_callback_hsw+0x3ac>
+  DB  196,226,125,24,13,16,28,0,0         ; vbroadcastss  0x1c10(%rip),%ymm1        # 4b64 <_sk_callback_hsw+0x3ab>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,88,13,139,27,0,0        ; vpbroadcastd  0x1b8b(%rip),%ymm1        # 4aec <_sk_callback_hsw+0x3b0>
+  DB  196,226,125,88,13,7,28,0,0          ; vpbroadcastd  0x1c07(%rip),%ymm1        # 4b68 <_sk_callback_hsw+0x3af>
   DB  197,237,219,201                     ; vpand         %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,126,27,0,0        ; vbroadcastss  0x1b7e(%rip),%ymm3        # 4af0 <_sk_callback_hsw+0x3b4>
+  DB  196,226,125,24,29,250,27,0,0        ; vbroadcastss  0x1bfa(%rip),%ymm3        # 4b6c <_sk_callback_hsw+0x3b3>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,88,29,117,27,0,0        ; vpbroadcastd  0x1b75(%rip),%ymm3        # 4af4 <_sk_callback_hsw+0x3b8>
+  DB  196,226,125,88,29,241,27,0,0        ; vpbroadcastd  0x1bf1(%rip),%ymm3        # 4b70 <_sk_callback_hsw+0x3b7>
   DB  197,237,219,211                     ; vpand         %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,104,27,0,0        ; vbroadcastss  0x1b68(%rip),%ymm3        # 4af8 <_sk_callback_hsw+0x3bc>
+  DB  196,226,125,24,29,228,27,0,0        ; vbroadcastss  0x1be4(%rip),%ymm3        # 4b74 <_sk_callback_hsw+0x3bb>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,93,27,0,0         ; vbroadcastss  0x1b5d(%rip),%ymm3        # 4afc <_sk_callback_hsw+0x3c0>
+  DB  196,226,125,24,29,217,27,0,0        ; vbroadcastss  0x1bd9(%rip),%ymm3        # 4b78 <_sk_callback_hsw+0x3bf>
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,94                               ; pop           %r14
@@ -2864,11 +2864,11 @@ PUBLIC _sk_store_565_hsw
 _sk_store_565_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,74,27,0,0           ; vbroadcastss  0x1b4a(%rip),%ymm8        # 4b00 <_sk_callback_hsw+0x3c4>
+  DB  196,98,125,24,5,198,27,0,0          ; vbroadcastss  0x1bc6(%rip),%ymm8        # 4b7c <_sk_callback_hsw+0x3c3>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,53,114,241,11               ; vpslld        $0xb,%ymm9,%ymm9
-  DB  196,98,125,24,21,53,27,0,0          ; vbroadcastss  0x1b35(%rip),%ymm10        # 4b04 <_sk_callback_hsw+0x3c8>
+  DB  196,98,125,24,21,177,27,0,0         ; vbroadcastss  0x1bb1(%rip),%ymm10        # 4b80 <_sk_callback_hsw+0x3c7>
   DB  196,65,116,89,210                   ; vmulps        %ymm10,%ymm1,%ymm10
   DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
   DB  196,193,45,114,242,5                ; vpslld        $0x5,%ymm10,%ymm10
@@ -2934,25 +2934,25 @@ _sk_load_4444_hsw LABEL PROC
   DB  15,133,138,0,0,0                    ; jne           3120 <_sk_load_4444_hsw+0x98>
   DB  196,193,122,111,4,122               ; vmovdqu       (%r10,%rdi,2),%xmm0
   DB  196,226,125,51,216                  ; vpmovzxwd     %xmm0,%ymm3
-  DB  196,226,125,88,5,94,26,0,0          ; vpbroadcastd  0x1a5e(%rip),%ymm0        # 4b08 <_sk_callback_hsw+0x3cc>
+  DB  196,226,125,88,5,218,26,0,0         ; vpbroadcastd  0x1ada(%rip),%ymm0        # 4b84 <_sk_callback_hsw+0x3cb>
   DB  197,229,219,192                     ; vpand         %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,81,26,0,0         ; vbroadcastss  0x1a51(%rip),%ymm1        # 4b0c <_sk_callback_hsw+0x3d0>
+  DB  196,226,125,24,13,205,26,0,0        ; vbroadcastss  0x1acd(%rip),%ymm1        # 4b88 <_sk_callback_hsw+0x3cf>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,88,13,72,26,0,0         ; vpbroadcastd  0x1a48(%rip),%ymm1        # 4b10 <_sk_callback_hsw+0x3d4>
+  DB  196,226,125,88,13,196,26,0,0        ; vpbroadcastd  0x1ac4(%rip),%ymm1        # 4b8c <_sk_callback_hsw+0x3d3>
   DB  197,229,219,201                     ; vpand         %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,59,26,0,0         ; vbroadcastss  0x1a3b(%rip),%ymm2        # 4b14 <_sk_callback_hsw+0x3d8>
+  DB  196,226,125,24,21,183,26,0,0        ; vbroadcastss  0x1ab7(%rip),%ymm2        # 4b90 <_sk_callback_hsw+0x3d7>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,88,21,50,26,0,0         ; vpbroadcastd  0x1a32(%rip),%ymm2        # 4b18 <_sk_callback_hsw+0x3dc>
+  DB  196,226,125,88,21,174,26,0,0        ; vpbroadcastd  0x1aae(%rip),%ymm2        # 4b94 <_sk_callback_hsw+0x3db>
   DB  197,229,219,210                     ; vpand         %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,37,26,0,0           ; vbroadcastss  0x1a25(%rip),%ymm8        # 4b1c <_sk_callback_hsw+0x3e0>
+  DB  196,98,125,24,5,161,26,0,0          ; vbroadcastss  0x1aa1(%rip),%ymm8        # 4b98 <_sk_callback_hsw+0x3df>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,88,5,27,26,0,0           ; vpbroadcastd  0x1a1b(%rip),%ymm8        # 4b20 <_sk_callback_hsw+0x3e4>
+  DB  196,98,125,88,5,151,26,0,0          ; vpbroadcastd  0x1a97(%rip),%ymm8        # 4b9c <_sk_callback_hsw+0x3e3>
   DB  196,193,101,219,216                 ; vpand         %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,13,26,0,0           ; vbroadcastss  0x1a0d(%rip),%ymm8        # 4b24 <_sk_callback_hsw+0x3e8>
+  DB  196,98,125,24,5,137,26,0,0          ; vbroadcastss  0x1a89(%rip),%ymm8        # 4ba0 <_sk_callback_hsw+0x3e7>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -3043,25 +3043,25 @@ _sk_gather_4444_hsw LABEL PROC
   DB  65,15,183,4,88                      ; movzwl        (%r8,%rbx,2),%eax
   DB  197,249,196,192,7                   ; vpinsrw       $0x7,%eax,%xmm0,%xmm0
   DB  196,226,125,51,216                  ; vpmovzxwd     %xmm0,%ymm3
-  DB  196,226,125,88,5,197,24,0,0         ; vpbroadcastd  0x18c5(%rip),%ymm0        # 4b28 <_sk_callback_hsw+0x3ec>
+  DB  196,226,125,88,5,65,25,0,0          ; vpbroadcastd  0x1941(%rip),%ymm0        # 4ba4 <_sk_callback_hsw+0x3eb>
   DB  197,229,219,192                     ; vpand         %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,184,24,0,0        ; vbroadcastss  0x18b8(%rip),%ymm1        # 4b2c <_sk_callback_hsw+0x3f0>
+  DB  196,226,125,24,13,52,25,0,0         ; vbroadcastss  0x1934(%rip),%ymm1        # 4ba8 <_sk_callback_hsw+0x3ef>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,88,13,175,24,0,0        ; vpbroadcastd  0x18af(%rip),%ymm1        # 4b30 <_sk_callback_hsw+0x3f4>
+  DB  196,226,125,88,13,43,25,0,0         ; vpbroadcastd  0x192b(%rip),%ymm1        # 4bac <_sk_callback_hsw+0x3f3>
   DB  197,229,219,201                     ; vpand         %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,162,24,0,0        ; vbroadcastss  0x18a2(%rip),%ymm2        # 4b34 <_sk_callback_hsw+0x3f8>
+  DB  196,226,125,24,21,30,25,0,0         ; vbroadcastss  0x191e(%rip),%ymm2        # 4bb0 <_sk_callback_hsw+0x3f7>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,88,21,153,24,0,0        ; vpbroadcastd  0x1899(%rip),%ymm2        # 4b38 <_sk_callback_hsw+0x3fc>
+  DB  196,226,125,88,21,21,25,0,0         ; vpbroadcastd  0x1915(%rip),%ymm2        # 4bb4 <_sk_callback_hsw+0x3fb>
   DB  197,229,219,210                     ; vpand         %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,140,24,0,0          ; vbroadcastss  0x188c(%rip),%ymm8        # 4b3c <_sk_callback_hsw+0x400>
+  DB  196,98,125,24,5,8,25,0,0            ; vbroadcastss  0x1908(%rip),%ymm8        # 4bb8 <_sk_callback_hsw+0x3ff>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,88,5,130,24,0,0          ; vpbroadcastd  0x1882(%rip),%ymm8        # 4b40 <_sk_callback_hsw+0x404>
+  DB  196,98,125,88,5,254,24,0,0          ; vpbroadcastd  0x18fe(%rip),%ymm8        # 4bbc <_sk_callback_hsw+0x403>
   DB  196,193,101,219,216                 ; vpand         %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,116,24,0,0          ; vbroadcastss  0x1874(%rip),%ymm8        # 4b44 <_sk_callback_hsw+0x408>
+  DB  196,98,125,24,5,240,24,0,0          ; vbroadcastss  0x18f0(%rip),%ymm8        # 4bc0 <_sk_callback_hsw+0x407>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -3074,7 +3074,7 @@ PUBLIC _sk_store_4444_hsw
 _sk_store_4444_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,90,24,0,0           ; vbroadcastss  0x185a(%rip),%ymm8        # 4b48 <_sk_callback_hsw+0x40c>
+  DB  196,98,125,24,5,214,24,0,0          ; vbroadcastss  0x18d6(%rip),%ymm8        # 4bc4 <_sk_callback_hsw+0x40b>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,53,114,241,12               ; vpslld        $0xc,%ymm9,%ymm9
@@ -3148,14 +3148,14 @@ _sk_load_8888_hsw LABEL PROC
   DB  77,133,192                          ; test          %r8,%r8
   DB  117,88                              ; jne           3439 <_sk_load_8888_hsw+0x6d>
   DB  196,193,126,111,25                  ; vmovdqu       (%r9),%ymm3
-  DB  197,229,219,5,18,25,0,0             ; vpand         0x1912(%rip),%ymm3,%ymm0        # 4d00 <_sk_callback_hsw+0x5c4>
+  DB  197,229,219,5,146,25,0,0            ; vpand         0x1992(%rip),%ymm3,%ymm0        # 4d80 <_sk_callback_hsw+0x5c7>
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,5,81,23,0,0           ; vbroadcastss  0x1751(%rip),%ymm8        # 4b4c <_sk_callback_hsw+0x410>
+  DB  196,98,125,24,5,205,23,0,0          ; vbroadcastss  0x17cd(%rip),%ymm8        # 4bc8 <_sk_callback_hsw+0x40f>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,226,101,0,13,23,25,0,0          ; vpshufb       0x1917(%rip),%ymm3,%ymm1        # 4d20 <_sk_callback_hsw+0x5e4>
+  DB  196,226,101,0,13,151,25,0,0         ; vpshufb       0x1997(%rip),%ymm3,%ymm1        # 4da0 <_sk_callback_hsw+0x5e7>
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  196,226,101,0,21,37,25,0,0          ; vpshufb       0x1925(%rip),%ymm3,%ymm2        # 4d40 <_sk_callback_hsw+0x604>
+  DB  196,226,101,0,21,165,25,0,0         ; vpshufb       0x19a5(%rip),%ymm3,%ymm2        # 4dc0 <_sk_callback_hsw+0x607>
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
@@ -3185,14 +3185,14 @@ _sk_gather_8888_hsw LABEL PROC
   DB  197,245,254,192                     ; vpaddd        %ymm0,%ymm1,%ymm0
   DB  197,245,118,201                     ; vpcmpeqd      %ymm1,%ymm1,%ymm1
   DB  196,194,117,144,28,128              ; vpgatherdd    %ymm1,(%r8,%ymm0,4),%ymm3
-  DB  197,229,219,5,211,24,0,0            ; vpand         0x18d3(%rip),%ymm3,%ymm0        # 4d60 <_sk_callback_hsw+0x624>
+  DB  197,229,219,5,83,25,0,0             ; vpand         0x1953(%rip),%ymm3,%ymm0        # 4de0 <_sk_callback_hsw+0x627>
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,5,182,22,0,0          ; vbroadcastss  0x16b6(%rip),%ymm8        # 4b50 <_sk_callback_hsw+0x414>
+  DB  196,98,125,24,5,50,23,0,0           ; vbroadcastss  0x1732(%rip),%ymm8        # 4bcc <_sk_callback_hsw+0x413>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,226,101,0,13,216,24,0,0         ; vpshufb       0x18d8(%rip),%ymm3,%ymm1        # 4d80 <_sk_callback_hsw+0x644>
+  DB  196,226,101,0,13,88,25,0,0          ; vpshufb       0x1958(%rip),%ymm3,%ymm1        # 4e00 <_sk_callback_hsw+0x647>
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  196,226,101,0,21,230,24,0,0         ; vpshufb       0x18e6(%rip),%ymm3,%ymm2        # 4da0 <_sk_callback_hsw+0x664>
+  DB  196,226,101,0,21,102,25,0,0         ; vpshufb       0x1966(%rip),%ymm3,%ymm2        # 4e20 <_sk_callback_hsw+0x667>
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  197,229,114,211,24                  ; vpsrld        $0x18,%ymm3,%ymm3
@@ -3207,7 +3207,7 @@ _sk_store_8888_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,141,12,189,0,0,0,0               ; lea           0x0(,%rdi,4),%r9
   DB  76,3,8                              ; add           (%rax),%r9
-  DB  196,98,125,24,5,102,22,0,0          ; vbroadcastss  0x1666(%rip),%ymm8        # 4b54 <_sk_callback_hsw+0x418>
+  DB  196,98,125,24,5,226,22,0,0          ; vbroadcastss  0x16e2(%rip),%ymm8        # 4bd0 <_sk_callback_hsw+0x417>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,65,116,89,208                   ; vmulps        %ymm8,%ymm1,%ymm10
@@ -3396,7 +3396,7 @@ _sk_load_u16_be_hsw LABEL PROC
   DB  197,241,235,192                     ; vpor          %xmm0,%xmm1,%xmm0
   DB  196,226,125,51,192                  ; vpmovzxwd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,21,93,19,0,0          ; vbroadcastss  0x135d(%rip),%ymm10        # 4b58 <_sk_callback_hsw+0x41c>
+  DB  196,98,125,24,21,217,19,0,0         ; vbroadcastss  0x13d9(%rip),%ymm10        # 4bd4 <_sk_callback_hsw+0x41b>
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  197,185,109,202                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -3478,7 +3478,7 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
   DB  197,241,235,192                     ; vpor          %xmm0,%xmm1,%xmm0
   DB  196,226,125,51,192                  ; vpmovzxwd     %xmm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,21,238,17,0,0         ; vbroadcastss  0x11ee(%rip),%ymm10        # 4b5c <_sk_callback_hsw+0x420>
+  DB  196,98,125,24,21,106,18,0,0         ; vbroadcastss  0x126a(%rip),%ymm10        # 4bd8 <_sk_callback_hsw+0x41f>
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  197,185,109,202                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -3495,7 +3495,7 @@ _sk_load_rgb_u16_be_hsw LABEL PROC
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,210                  ; vmulps        %ymm10,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,162,17,0,0        ; vbroadcastss  0x11a2(%rip),%ymm3        # 4b60 <_sk_callback_hsw+0x424>
+  DB  196,226,125,24,29,30,18,0,0         ; vbroadcastss  0x121e(%rip),%ymm3        # 4bdc <_sk_callback_hsw+0x423>
   DB  255,224                             ; jmpq          *%rax
   DB  196,193,121,110,4,64                ; vmovd         (%r8,%rax,2),%xmm0
   DB  196,193,121,196,68,64,4,2           ; vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
@@ -3536,7 +3536,7 @@ _sk_store_u16_be_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
-  DB  196,98,125,24,5,223,16,0,0          ; vbroadcastss  0x10df(%rip),%ymm8        # 4b64 <_sk_callback_hsw+0x428>
+  DB  196,98,125,24,5,91,17,0,0           ; vbroadcastss  0x115b(%rip),%ymm8        # 4be0 <_sk_callback_hsw+0x427>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,67,125,25,202,1                 ; vextractf128  $0x1,%ymm9,%xmm10
@@ -3778,11 +3778,11 @@ _sk_mirror_y_hsw LABEL PROC
 
 PUBLIC _sk_luminance_to_alpha_hsw
 _sk_luminance_to_alpha_hsw LABEL PROC
-  DB  196,226,125,24,29,47,13,0,0         ; vbroadcastss  0xd2f(%rip),%ymm3        # 4b68 <_sk_callback_hsw+0x42c>
-  DB  196,98,125,24,5,42,13,0,0           ; vbroadcastss  0xd2a(%rip),%ymm8        # 4b6c <_sk_callback_hsw+0x430>
+  DB  196,226,125,24,29,171,13,0,0        ; vbroadcastss  0xdab(%rip),%ymm3        # 4be4 <_sk_callback_hsw+0x42b>
+  DB  196,98,125,24,5,166,13,0,0          ; vbroadcastss  0xda6(%rip),%ymm8        # 4be8 <_sk_callback_hsw+0x42f>
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  196,226,125,184,203                 ; vfmadd231ps   %ymm3,%ymm0,%ymm1
-  DB  196,226,125,24,29,27,13,0,0         ; vbroadcastss  0xd1b(%rip),%ymm3        # 4b70 <_sk_callback_hsw+0x434>
+  DB  196,226,125,24,29,151,13,0,0        ; vbroadcastss  0xd97(%rip),%ymm3        # 4bec <_sk_callback_hsw+0x433>
   DB  196,226,109,168,217                 ; vfmadd213ps   %ymm1,%ymm2,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -3884,6 +3884,34 @@ _sk_matrix_4x5_hsw LABEL PROC
   DB  197,124,41,219                      ; vmovaps       %ymm11,%ymm3
   DB  255,224                             ; jmpq          *%rax
 
+PUBLIC _sk_matrix_4x3_hsw
+_sk_matrix_4x3_hsw LABEL PROC
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
+  DB  196,226,125,24,88,16                ; vbroadcastss  0x10(%rax),%ymm3
+  DB  196,98,125,24,64,32                 ; vbroadcastss  0x20(%rax),%ymm8
+  DB  196,98,117,184,195                  ; vfmadd231ps   %ymm3,%ymm1,%ymm8
+  DB  196,98,125,184,194                  ; vfmadd231ps   %ymm2,%ymm0,%ymm8
+  DB  196,226,125,24,80,4                 ; vbroadcastss  0x4(%rax),%ymm2
+  DB  196,226,125,24,88,20                ; vbroadcastss  0x14(%rax),%ymm3
+  DB  196,98,125,24,72,36                 ; vbroadcastss  0x24(%rax),%ymm9
+  DB  196,98,117,184,203                  ; vfmadd231ps   %ymm3,%ymm1,%ymm9
+  DB  196,98,125,184,202                  ; vfmadd231ps   %ymm2,%ymm0,%ymm9
+  DB  196,226,125,24,88,8                 ; vbroadcastss  0x8(%rax),%ymm3
+  DB  196,98,125,24,80,24                 ; vbroadcastss  0x18(%rax),%ymm10
+  DB  196,226,125,24,80,40                ; vbroadcastss  0x28(%rax),%ymm2
+  DB  196,194,117,184,210                 ; vfmadd231ps   %ymm10,%ymm1,%ymm2
+  DB  196,226,125,184,211                 ; vfmadd231ps   %ymm3,%ymm0,%ymm2
+  DB  196,98,125,24,80,12                 ; vbroadcastss  0xc(%rax),%ymm10
+  DB  196,98,125,24,88,28                 ; vbroadcastss  0x1c(%rax),%ymm11
+  DB  196,226,125,24,88,44                ; vbroadcastss  0x2c(%rax),%ymm3
+  DB  196,194,117,184,219                 ; vfmadd231ps   %ymm11,%ymm1,%ymm3
+  DB  196,194,125,184,218                 ; vfmadd231ps   %ymm10,%ymm0,%ymm3
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
+  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
+  DB  255,224                             ; jmpq          *%rax
+
 PUBLIC _sk_matrix_perspective_hsw
 _sk_matrix_perspective_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -3915,9 +3943,9 @@ _sk_evenly_spaced_gradient_hsw LABEL PROC
   DB  76,139,64,8                         ; mov           0x8(%rax),%r8
   DB  77,137,202                          ; mov           %r9,%r10
   DB  73,255,202                          ; dec           %r10
-  DB  120,7                               ; js            4090 <_sk_evenly_spaced_gradient_hsw+0x18>
+  DB  120,7                               ; js            410d <_sk_evenly_spaced_gradient_hsw+0x18>
   DB  196,193,242,42,202                  ; vcvtsi2ss     %r10,%xmm1,%xmm1
-  DB  235,22                              ; jmp           40a6 <_sk_evenly_spaced_gradient_hsw+0x2e>
+  DB  235,22                              ; jmp           4123 <_sk_evenly_spaced_gradient_hsw+0x2e>
   DB  77,137,211                          ; mov           %r10,%r11
   DB  73,209,235                          ; shr           %r11
   DB  65,131,226,1                        ; and           $0x1,%r10d
@@ -3928,7 +3956,7 @@ _sk_evenly_spaced_gradient_hsw LABEL PROC
   DB  197,244,89,200                      ; vmulps        %ymm0,%ymm1,%ymm1
   DB  197,126,91,217                      ; vcvttps2dq    %ymm1,%ymm11
   DB  73,131,249,8                        ; cmp           $0x8,%r9
-  DB  119,70                              ; ja            40ff <_sk_evenly_spaced_gradient_hsw+0x87>
+  DB  119,70                              ; ja            417c <_sk_evenly_spaced_gradient_hsw+0x87>
   DB  196,66,37,22,0                      ; vpermps       (%r8),%ymm11,%ymm8
   DB  76,139,64,40                        ; mov           0x28(%rax),%r8
   DB  196,66,37,22,8                      ; vpermps       (%r8),%ymm11,%ymm9
@@ -3944,7 +3972,7 @@ _sk_evenly_spaced_gradient_hsw LABEL PROC
   DB  196,194,37,22,24                    ; vpermps       (%r8),%ymm11,%ymm3
   DB  72,139,64,64                        ; mov           0x40(%rax),%rax
   DB  196,98,37,22,40                     ; vpermps       (%rax),%ymm11,%ymm13
-  DB  235,110                             ; jmp           416d <_sk_evenly_spaced_gradient_hsw+0xf5>
+  DB  235,110                             ; jmp           41ea <_sk_evenly_spaced_gradient_hsw+0xf5>
   DB  196,65,13,118,246                   ; vpcmpeqd      %ymm14,%ymm14,%ymm14
   DB  197,245,118,201                     ; vpcmpeqd      %ymm1,%ymm1,%ymm1
   DB  196,2,117,146,4,152                 ; vgatherdps    %ymm1,(%r8,%ymm11,4),%ymm8
@@ -3981,11 +4009,11 @@ _sk_gradient_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  73,131,248,1                        ; cmp           $0x1,%r8
-  DB  15,134,180,0,0,0                    ; jbe           424c <_sk_gradient_hsw+0xc3>
+  DB  15,134,180,0,0,0                    ; jbe           42c9 <_sk_gradient_hsw+0xc3>
   DB  76,139,72,72                        ; mov           0x48(%rax),%r9
   DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
   DB  65,186,1,0,0,0                      ; mov           $0x1,%r10d
-  DB  196,226,125,24,21,197,9,0,0         ; vbroadcastss  0x9c5(%rip),%ymm2        # 4b74 <_sk_callback_hsw+0x438>
+  DB  196,226,125,24,21,196,9,0,0         ; vbroadcastss  0x9c4(%rip),%ymm2        # 4bf0 <_sk_callback_hsw+0x437>
   DB  196,65,53,239,201                   ; vpxor         %ymm9,%ymm9,%ymm9
   DB  196,130,125,24,28,145               ; vbroadcastss  (%r9,%r10,4),%ymm3
   DB  197,228,194,216,2                   ; vcmpleps      %ymm0,%ymm3,%ymm3
@@ -3993,10 +4021,10 @@ _sk_gradient_hsw LABEL PROC
   DB  196,65,101,254,201                  ; vpaddd        %ymm9,%ymm3,%ymm9
   DB  73,255,194                          ; inc           %r10
   DB  77,57,208                           ; cmp           %r10,%r8
-  DB  117,226                             ; jne           41b4 <_sk_gradient_hsw+0x2b>
+  DB  117,226                             ; jne           4231 <_sk_gradient_hsw+0x2b>
   DB  76,139,72,8                         ; mov           0x8(%rax),%r9
   DB  73,131,248,8                        ; cmp           $0x8,%r8
-  DB  118,121                             ; jbe           4255 <_sk_gradient_hsw+0xcc>
+  DB  118,121                             ; jbe           42d2 <_sk_gradient_hsw+0xcc>
   DB  196,65,13,118,246                   ; vpcmpeqd      %ymm14,%ymm14,%ymm14
   DB  197,245,118,201                     ; vpcmpeqd      %ymm1,%ymm1,%ymm1
   DB  196,2,117,146,4,137                 ; vgatherdps    %ymm1,(%r9,%ymm9,4),%ymm8
@@ -4020,7 +4048,7 @@ _sk_gradient_hsw LABEL PROC
   DB  196,130,21,146,28,136               ; vgatherdps    %ymm13,(%r8,%ymm9,4),%ymm3
   DB  72,139,64,64                        ; mov           0x40(%rax),%rax
   DB  196,34,13,146,44,136                ; vgatherdps    %ymm14,(%rax,%ymm9,4),%ymm13
-  DB  235,77                              ; jmp           4299 <_sk_gradient_hsw+0x110>
+  DB  235,77                              ; jmp           4316 <_sk_gradient_hsw+0x110>
   DB  76,139,72,8                         ; mov           0x8(%rax),%r9
   DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
   DB  196,66,53,22,1                      ; vpermps       (%r9),%ymm9,%ymm8
@@ -4076,24 +4104,24 @@ _sk_xy_to_unit_angle_hsw LABEL PROC
   DB  196,65,52,95,226                    ; vmaxps        %ymm10,%ymm9,%ymm12
   DB  196,65,36,94,220                    ; vdivps        %ymm12,%ymm11,%ymm11
   DB  196,65,36,89,227                    ; vmulps        %ymm11,%ymm11,%ymm12
-  DB  196,98,125,24,45,68,8,0,0           ; vbroadcastss  0x844(%rip),%ymm13        # 4b78 <_sk_callback_hsw+0x43c>
-  DB  196,98,125,24,53,63,8,0,0           ; vbroadcastss  0x83f(%rip),%ymm14        # 4b7c <_sk_callback_hsw+0x440>
+  DB  196,98,125,24,45,67,8,0,0           ; vbroadcastss  0x843(%rip),%ymm13        # 4bf4 <_sk_callback_hsw+0x43b>
+  DB  196,98,125,24,53,62,8,0,0           ; vbroadcastss  0x83e(%rip),%ymm14        # 4bf8 <_sk_callback_hsw+0x43f>
   DB  196,66,29,184,245                   ; vfmadd231ps   %ymm13,%ymm12,%ymm14
-  DB  196,98,125,24,45,53,8,0,0           ; vbroadcastss  0x835(%rip),%ymm13        # 4b80 <_sk_callback_hsw+0x444>
+  DB  196,98,125,24,45,52,8,0,0           ; vbroadcastss  0x834(%rip),%ymm13        # 4bfc <_sk_callback_hsw+0x443>
   DB  196,66,29,184,238                   ; vfmadd231ps   %ymm14,%ymm12,%ymm13
-  DB  196,98,125,24,53,43,8,0,0           ; vbroadcastss  0x82b(%rip),%ymm14        # 4b84 <_sk_callback_hsw+0x448>
+  DB  196,98,125,24,53,42,8,0,0           ; vbroadcastss  0x82a(%rip),%ymm14        # 4c00 <_sk_callback_hsw+0x447>
   DB  196,66,29,184,245                   ; vfmadd231ps   %ymm13,%ymm12,%ymm14
   DB  196,65,36,89,222                    ; vmulps        %ymm14,%ymm11,%ymm11
   DB  196,65,52,194,202,1                 ; vcmpltps      %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,22,8,0,0           ; vbroadcastss  0x816(%rip),%ymm10        # 4b88 <_sk_callback_hsw+0x44c>
+  DB  196,98,125,24,21,21,8,0,0           ; vbroadcastss  0x815(%rip),%ymm10        # 4c04 <_sk_callback_hsw+0x44b>
   DB  196,65,44,92,211                    ; vsubps        %ymm11,%ymm10,%ymm10
   DB  196,67,37,74,202,144                ; vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   DB  196,193,124,194,192,1               ; vcmpltps      %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,21,0,8,0,0            ; vbroadcastss  0x800(%rip),%ymm10        # 4b8c <_sk_callback_hsw+0x450>
+  DB  196,98,125,24,21,255,7,0,0          ; vbroadcastss  0x7ff(%rip),%ymm10        # 4c08 <_sk_callback_hsw+0x44f>
   DB  196,65,44,92,209                    ; vsubps        %ymm9,%ymm10,%ymm10
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  196,65,116,194,200,1                ; vcmpltps      %ymm8,%ymm1,%ymm9
-  DB  196,98,125,24,21,234,7,0,0          ; vbroadcastss  0x7ea(%rip),%ymm10        # 4b90 <_sk_callback_hsw+0x454>
+  DB  196,98,125,24,21,233,7,0,0          ; vbroadcastss  0x7e9(%rip),%ymm10        # 4c0c <_sk_callback_hsw+0x453>
   DB  197,44,92,208                       ; vsubps        %ymm0,%ymm10,%ymm10
   DB  196,195,125,74,194,144              ; vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   DB  196,65,124,194,200,3                ; vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -4112,7 +4140,7 @@ _sk_xy_to_radius_hsw LABEL PROC
 PUBLIC _sk_save_xy_hsw
 _sk_save_xy_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,183,7,0,0           ; vbroadcastss  0x7b7(%rip),%ymm8        # 4b94 <_sk_callback_hsw+0x458>
+  DB  196,98,125,24,5,182,7,0,0           ; vbroadcastss  0x7b6(%rip),%ymm8        # 4c10 <_sk_callback_hsw+0x457>
   DB  196,65,124,88,200                   ; vaddps        %ymm8,%ymm0,%ymm9
   DB  196,67,125,8,209,1                  ; vroundps      $0x1,%ymm9,%ymm10
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
@@ -4142,9 +4170,9 @@ _sk_accumulate_hsw LABEL PROC
 PUBLIC _sk_bilinear_nx_hsw
 _sk_bilinear_nx_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,75,7,0,0           ; vbroadcastss  0x74b(%rip),%ymm0        # 4b98 <_sk_callback_hsw+0x45c>
+  DB  196,226,125,24,5,74,7,0,0           ; vbroadcastss  0x74a(%rip),%ymm0        # 4c14 <_sk_callback_hsw+0x45b>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,66,7,0,0            ; vbroadcastss  0x742(%rip),%ymm8        # 4b9c <_sk_callback_hsw+0x460>
+  DB  196,98,125,24,5,65,7,0,0            ; vbroadcastss  0x741(%rip),%ymm8        # 4c18 <_sk_callback_hsw+0x45f>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -4153,7 +4181,7 @@ _sk_bilinear_nx_hsw LABEL PROC
 PUBLIC _sk_bilinear_px_hsw
 _sk_bilinear_px_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,42,7,0,0           ; vbroadcastss  0x72a(%rip),%ymm0        # 4ba0 <_sk_callback_hsw+0x464>
+  DB  196,226,125,24,5,41,7,0,0           ; vbroadcastss  0x729(%rip),%ymm0        # 4c1c <_sk_callback_hsw+0x463>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -4163,9 +4191,9 @@ _sk_bilinear_px_hsw LABEL PROC
 PUBLIC _sk_bilinear_ny_hsw
 _sk_bilinear_ny_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,14,7,0,0          ; vbroadcastss  0x70e(%rip),%ymm1        # 4ba4 <_sk_callback_hsw+0x468>
+  DB  196,226,125,24,13,13,7,0,0          ; vbroadcastss  0x70d(%rip),%ymm1        # 4c20 <_sk_callback_hsw+0x467>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,4,7,0,0             ; vbroadcastss  0x704(%rip),%ymm8        # 4ba8 <_sk_callback_hsw+0x46c>
+  DB  196,98,125,24,5,3,7,0,0             ; vbroadcastss  0x703(%rip),%ymm8        # 4c24 <_sk_callback_hsw+0x46b>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -4174,7 +4202,7 @@ _sk_bilinear_ny_hsw LABEL PROC
 PUBLIC _sk_bilinear_py_hsw
 _sk_bilinear_py_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,236,6,0,0         ; vbroadcastss  0x6ec(%rip),%ymm1        # 4bac <_sk_callback_hsw+0x470>
+  DB  196,226,125,24,13,235,6,0,0         ; vbroadcastss  0x6eb(%rip),%ymm1        # 4c28 <_sk_callback_hsw+0x46f>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -4184,13 +4212,13 @@ _sk_bilinear_py_hsw LABEL PROC
 PUBLIC _sk_bicubic_n3x_hsw
 _sk_bicubic_n3x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,207,6,0,0          ; vbroadcastss  0x6cf(%rip),%ymm0        # 4bb0 <_sk_callback_hsw+0x474>
+  DB  196,226,125,24,5,206,6,0,0          ; vbroadcastss  0x6ce(%rip),%ymm0        # 4c2c <_sk_callback_hsw+0x473>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,198,6,0,0           ; vbroadcastss  0x6c6(%rip),%ymm8        # 4bb4 <_sk_callback_hsw+0x478>
+  DB  196,98,125,24,5,197,6,0,0           ; vbroadcastss  0x6c5(%rip),%ymm8        # 4c30 <_sk_callback_hsw+0x477>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,183,6,0,0          ; vbroadcastss  0x6b7(%rip),%ymm10        # 4bb8 <_sk_callback_hsw+0x47c>
-  DB  196,98,125,24,29,178,6,0,0          ; vbroadcastss  0x6b2(%rip),%ymm11        # 4bbc <_sk_callback_hsw+0x480>
+  DB  196,98,125,24,21,182,6,0,0          ; vbroadcastss  0x6b6(%rip),%ymm10        # 4c34 <_sk_callback_hsw+0x47b>
+  DB  196,98,125,24,29,177,6,0,0          ; vbroadcastss  0x6b1(%rip),%ymm11        # 4c38 <_sk_callback_hsw+0x47f>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,36,89,193                    ; vmulps        %ymm9,%ymm11,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -4200,16 +4228,16 @@ _sk_bicubic_n3x_hsw LABEL PROC
 PUBLIC _sk_bicubic_n1x_hsw
 _sk_bicubic_n1x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,149,6,0,0          ; vbroadcastss  0x695(%rip),%ymm0        # 4bc0 <_sk_callback_hsw+0x484>
+  DB  196,226,125,24,5,148,6,0,0          ; vbroadcastss  0x694(%rip),%ymm0        # 4c3c <_sk_callback_hsw+0x483>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,140,6,0,0           ; vbroadcastss  0x68c(%rip),%ymm8        # 4bc4 <_sk_callback_hsw+0x488>
+  DB  196,98,125,24,5,139,6,0,0           ; vbroadcastss  0x68b(%rip),%ymm8        # 4c40 <_sk_callback_hsw+0x487>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,130,6,0,0          ; vbroadcastss  0x682(%rip),%ymm9        # 4bc8 <_sk_callback_hsw+0x48c>
-  DB  196,98,125,24,21,125,6,0,0          ; vbroadcastss  0x67d(%rip),%ymm10        # 4bcc <_sk_callback_hsw+0x490>
+  DB  196,98,125,24,13,129,6,0,0          ; vbroadcastss  0x681(%rip),%ymm9        # 4c44 <_sk_callback_hsw+0x48b>
+  DB  196,98,125,24,21,124,6,0,0          ; vbroadcastss  0x67c(%rip),%ymm10        # 4c48 <_sk_callback_hsw+0x48f>
   DB  196,66,61,168,209                   ; vfmadd213ps   %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,13,115,6,0,0          ; vbroadcastss  0x673(%rip),%ymm9        # 4bd0 <_sk_callback_hsw+0x494>
+  DB  196,98,125,24,13,114,6,0,0          ; vbroadcastss  0x672(%rip),%ymm9        # 4c4c <_sk_callback_hsw+0x493>
   DB  196,66,61,184,202                   ; vfmadd231ps   %ymm10,%ymm8,%ymm9
-  DB  196,98,125,24,21,105,6,0,0          ; vbroadcastss  0x669(%rip),%ymm10        # 4bd4 <_sk_callback_hsw+0x498>
+  DB  196,98,125,24,21,104,6,0,0          ; vbroadcastss  0x668(%rip),%ymm10        # 4c50 <_sk_callback_hsw+0x497>
   DB  196,66,61,184,209                   ; vfmadd231ps   %ymm9,%ymm8,%ymm10
   DB  197,124,17,144,128,0,0,0            ; vmovups       %ymm10,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -4218,14 +4246,14 @@ _sk_bicubic_n1x_hsw LABEL PROC
 PUBLIC _sk_bicubic_p1x_hsw
 _sk_bicubic_p1x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,81,6,0,0            ; vbroadcastss  0x651(%rip),%ymm8        # 4bd8 <_sk_callback_hsw+0x49c>
+  DB  196,98,125,24,5,80,6,0,0            ; vbroadcastss  0x650(%rip),%ymm8        # 4c54 <_sk_callback_hsw+0x49b>
   DB  197,188,88,0                        ; vaddps        (%rax),%ymm8,%ymm0
   DB  197,124,16,72,64                    ; vmovups       0x40(%rax),%ymm9
-  DB  196,98,125,24,21,67,6,0,0           ; vbroadcastss  0x643(%rip),%ymm10        # 4bdc <_sk_callback_hsw+0x4a0>
-  DB  196,98,125,24,29,62,6,0,0           ; vbroadcastss  0x63e(%rip),%ymm11        # 4be0 <_sk_callback_hsw+0x4a4>
+  DB  196,98,125,24,21,66,6,0,0           ; vbroadcastss  0x642(%rip),%ymm10        # 4c58 <_sk_callback_hsw+0x49f>
+  DB  196,98,125,24,29,61,6,0,0           ; vbroadcastss  0x63d(%rip),%ymm11        # 4c5c <_sk_callback_hsw+0x4a3>
   DB  196,66,53,168,218                   ; vfmadd213ps   %ymm10,%ymm9,%ymm11
   DB  196,66,53,168,216                   ; vfmadd213ps   %ymm8,%ymm9,%ymm11
-  DB  196,98,125,24,5,47,6,0,0            ; vbroadcastss  0x62f(%rip),%ymm8        # 4be4 <_sk_callback_hsw+0x4a8>
+  DB  196,98,125,24,5,46,6,0,0            ; vbroadcastss  0x62e(%rip),%ymm8        # 4c60 <_sk_callback_hsw+0x4a7>
   DB  196,66,53,184,195                   ; vfmadd231ps   %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -4234,12 +4262,12 @@ _sk_bicubic_p1x_hsw LABEL PROC
 PUBLIC _sk_bicubic_p3x_hsw
 _sk_bicubic_p3x_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,23,6,0,0           ; vbroadcastss  0x617(%rip),%ymm0        # 4be8 <_sk_callback_hsw+0x4ac>
+  DB  196,226,125,24,5,22,6,0,0           ; vbroadcastss  0x616(%rip),%ymm0        # 4c64 <_sk_callback_hsw+0x4ab>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,4,6,0,0            ; vbroadcastss  0x604(%rip),%ymm10        # 4bec <_sk_callback_hsw+0x4b0>
-  DB  196,98,125,24,29,255,5,0,0          ; vbroadcastss  0x5ff(%rip),%ymm11        # 4bf0 <_sk_callback_hsw+0x4b4>
+  DB  196,98,125,24,21,3,6,0,0            ; vbroadcastss  0x603(%rip),%ymm10        # 4c68 <_sk_callback_hsw+0x4af>
+  DB  196,98,125,24,29,254,5,0,0          ; vbroadcastss  0x5fe(%rip),%ymm11        # 4c6c <_sk_callback_hsw+0x4b3>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,52,89,195                    ; vmulps        %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -4249,13 +4277,13 @@ _sk_bicubic_p3x_hsw LABEL PROC
 PUBLIC _sk_bicubic_n3y_hsw
 _sk_bicubic_n3y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,226,5,0,0         ; vbroadcastss  0x5e2(%rip),%ymm1        # 4bf4 <_sk_callback_hsw+0x4b8>
+  DB  196,226,125,24,13,225,5,0,0         ; vbroadcastss  0x5e1(%rip),%ymm1        # 4c70 <_sk_callback_hsw+0x4b7>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,216,5,0,0           ; vbroadcastss  0x5d8(%rip),%ymm8        # 4bf8 <_sk_callback_hsw+0x4bc>
+  DB  196,98,125,24,5,215,5,0,0           ; vbroadcastss  0x5d7(%rip),%ymm8        # 4c74 <_sk_callback_hsw+0x4bb>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,201,5,0,0          ; vbroadcastss  0x5c9(%rip),%ymm10        # 4bfc <_sk_callback_hsw+0x4c0>
-  DB  196,98,125,24,29,196,5,0,0          ; vbroadcastss  0x5c4(%rip),%ymm11        # 4c00 <_sk_callback_hsw+0x4c4>
+  DB  196,98,125,24,21,200,5,0,0          ; vbroadcastss  0x5c8(%rip),%ymm10        # 4c78 <_sk_callback_hsw+0x4bf>
+  DB  196,98,125,24,29,195,5,0,0          ; vbroadcastss  0x5c3(%rip),%ymm11        # 4c7c <_sk_callback_hsw+0x4c3>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,36,89,193                    ; vmulps        %ymm9,%ymm11,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -4265,16 +4293,16 @@ _sk_bicubic_n3y_hsw LABEL PROC
 PUBLIC _sk_bicubic_n1y_hsw
 _sk_bicubic_n1y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,167,5,0,0         ; vbroadcastss  0x5a7(%rip),%ymm1        # 4c04 <_sk_callback_hsw+0x4c8>
+  DB  196,226,125,24,13,166,5,0,0         ; vbroadcastss  0x5a6(%rip),%ymm1        # 4c80 <_sk_callback_hsw+0x4c7>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,157,5,0,0           ; vbroadcastss  0x59d(%rip),%ymm8        # 4c08 <_sk_callback_hsw+0x4cc>
+  DB  196,98,125,24,5,156,5,0,0           ; vbroadcastss  0x59c(%rip),%ymm8        # 4c84 <_sk_callback_hsw+0x4cb>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,147,5,0,0          ; vbroadcastss  0x593(%rip),%ymm9        # 4c0c <_sk_callback_hsw+0x4d0>
-  DB  196,98,125,24,21,142,5,0,0          ; vbroadcastss  0x58e(%rip),%ymm10        # 4c10 <_sk_callback_hsw+0x4d4>
+  DB  196,98,125,24,13,146,5,0,0          ; vbroadcastss  0x592(%rip),%ymm9        # 4c88 <_sk_callback_hsw+0x4cf>
+  DB  196,98,125,24,21,141,5,0,0          ; vbroadcastss  0x58d(%rip),%ymm10        # 4c8c <_sk_callback_hsw+0x4d3>
   DB  196,66,61,168,209                   ; vfmadd213ps   %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,13,132,5,0,0          ; vbroadcastss  0x584(%rip),%ymm9        # 4c14 <_sk_callback_hsw+0x4d8>
+  DB  196,98,125,24,13,131,5,0,0          ; vbroadcastss  0x583(%rip),%ymm9        # 4c90 <_sk_callback_hsw+0x4d7>
   DB  196,66,61,184,202                   ; vfmadd231ps   %ymm10,%ymm8,%ymm9
-  DB  196,98,125,24,21,122,5,0,0          ; vbroadcastss  0x57a(%rip),%ymm10        # 4c18 <_sk_callback_hsw+0x4dc>
+  DB  196,98,125,24,21,121,5,0,0          ; vbroadcastss  0x579(%rip),%ymm10        # 4c94 <_sk_callback_hsw+0x4db>
   DB  196,66,61,184,209                   ; vfmadd231ps   %ymm9,%ymm8,%ymm10
   DB  197,124,17,144,160,0,0,0            ; vmovups       %ymm10,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -4283,14 +4311,14 @@ _sk_bicubic_n1y_hsw LABEL PROC
 PUBLIC _sk_bicubic_p1y_hsw
 _sk_bicubic_p1y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,98,5,0,0            ; vbroadcastss  0x562(%rip),%ymm8        # 4c1c <_sk_callback_hsw+0x4e0>
+  DB  196,98,125,24,5,97,5,0,0            ; vbroadcastss  0x561(%rip),%ymm8        # 4c98 <_sk_callback_hsw+0x4df>
   DB  197,188,88,72,32                    ; vaddps        0x20(%rax),%ymm8,%ymm1
   DB  197,124,16,72,96                    ; vmovups       0x60(%rax),%ymm9
-  DB  196,98,125,24,21,83,5,0,0           ; vbroadcastss  0x553(%rip),%ymm10        # 4c20 <_sk_callback_hsw+0x4e4>
-  DB  196,98,125,24,29,78,5,0,0           ; vbroadcastss  0x54e(%rip),%ymm11        # 4c24 <_sk_callback_hsw+0x4e8>
+  DB  196,98,125,24,21,82,5,0,0           ; vbroadcastss  0x552(%rip),%ymm10        # 4c9c <_sk_callback_hsw+0x4e3>
+  DB  196,98,125,24,29,77,5,0,0           ; vbroadcastss  0x54d(%rip),%ymm11        # 4ca0 <_sk_callback_hsw+0x4e7>
   DB  196,66,53,168,218                   ; vfmadd213ps   %ymm10,%ymm9,%ymm11
   DB  196,66,53,168,216                   ; vfmadd213ps   %ymm8,%ymm9,%ymm11
-  DB  196,98,125,24,5,63,5,0,0            ; vbroadcastss  0x53f(%rip),%ymm8        # 4c28 <_sk_callback_hsw+0x4ec>
+  DB  196,98,125,24,5,62,5,0,0            ; vbroadcastss  0x53e(%rip),%ymm8        # 4ca4 <_sk_callback_hsw+0x4eb>
   DB  196,66,53,184,195                   ; vfmadd231ps   %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -4299,12 +4327,12 @@ _sk_bicubic_p1y_hsw LABEL PROC
 PUBLIC _sk_bicubic_p3y_hsw
 _sk_bicubic_p3y_hsw LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,39,5,0,0          ; vbroadcastss  0x527(%rip),%ymm1        # 4c2c <_sk_callback_hsw+0x4f0>
+  DB  196,226,125,24,13,38,5,0,0          ; vbroadcastss  0x526(%rip),%ymm1        # 4ca8 <_sk_callback_hsw+0x4ef>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,19,5,0,0           ; vbroadcastss  0x513(%rip),%ymm10        # 4c30 <_sk_callback_hsw+0x4f4>
-  DB  196,98,125,24,29,14,5,0,0           ; vbroadcastss  0x50e(%rip),%ymm11        # 4c34 <_sk_callback_hsw+0x4f8>
+  DB  196,98,125,24,21,18,5,0,0           ; vbroadcastss  0x512(%rip),%ymm10        # 4cac <_sk_callback_hsw+0x4f3>
+  DB  196,98,125,24,29,13,5,0,0           ; vbroadcastss  0x50d(%rip),%ymm11        # 4cb0 <_sk_callback_hsw+0x4f7>
   DB  196,66,61,168,218                   ; vfmadd213ps   %ymm10,%ymm8,%ymm11
   DB  196,65,52,89,195                    ; vmulps        %ymm11,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -4418,25 +4446,25 @@ ALIGN 4
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 4909 <.literal4+0xb1>
+  DB  71,225,61                           ; rex.RXB       loope 4985 <.literal4+0xb1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 4919 <.literal4+0xc1>
+  DB  71,225,61                           ; rex.RXB       loope 4995 <.literal4+0xc1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 4929 <.literal4+0xd1>
+  DB  71,225,61                           ; rex.RXB       loope 49a5 <.literal4+0xd1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 4939 <.literal4+0xe1>
+  DB  71,225,61                           ; rex.RXB       loope 49b5 <.literal4+0xe1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -4483,7 +4511,7 @@ ALIGN 4
   DB  190,129,128,128,59                  ; mov           $0x3b808081,%esi
   DB  129,128,128,59,0,248,0,0,8,33       ; addl          $0x21080000,-0x7ffc480(%rax)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        4989 <.literal4+0x131>
+  DB  224,7                               ; loopne        4a05 <.literal4+0x131>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -4499,10 +4527,10 @@ ALIGN 4
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
   DB  0,52,255                            ; add           %dh,(%rdi,%rdi,8)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            49b0 <.literal4+0x158>
+  DB  127,0                               ; jg            4a2c <.literal4+0x158>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4a29 <.literal4+0x1d1>
+  DB  119,115                             ; ja            4aa5 <.literal4+0x1d1>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -4516,10 +4544,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            49e4 <.literal4+0x18c>
+  DB  127,0                               ; jg            4a60 <.literal4+0x18c>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4a5d <.literal4+0x205>
+  DB  119,115                             ; ja            4ad9 <.literal4+0x205>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -4533,10 +4561,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4a18 <.literal4+0x1c0>
+  DB  127,0                               ; jg            4a94 <.literal4+0x1c0>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4a91 <.literal4+0x239>
+  DB  119,115                             ; ja            4b0d <.literal4+0x239>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -4550,10 +4578,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4a4c <.literal4+0x1f4>
+  DB  127,0                               ; jg            4ac8 <.literal4+0x1f4>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4ac5 <.literal4+0x26d>
+  DB  119,115                             ; ja            4b41 <.literal4+0x26d>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -4566,7 +4594,7 @@ ALIGN 4
   DB  0,75,0                              ; add           %cl,0x0(%rbx)
   DB  0,128,63,0,0,200                    ; add           %al,-0x37ffffc1(%rax)
   DB  66,0,0                              ; rex.X         add %al,(%rax)
-  DB  127,67                              ; jg            4ac3 <.literal4+0x26b>
+  DB  127,67                              ; jg            4b3f <.literal4+0x26b>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,195                               ; add           %al,%bl
   DB  0,0                                 ; add           %al,(%rax)
@@ -4578,10 +4606,10 @@ ALIGN 4
   DB  190,80,128,3,62                     ; mov           $0x3e038050,%esi
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           4ae3 <.literal4+0x28b>
+  DB  118,63                              ; jbe           4b5f <.literal4+0x28b>
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            4af7 <.literal4+0x29f>
+  DB  127,67                              ; jg            4b73 <.literal4+0x29f>
   DB  129,128,128,59,0,0,128,63,129,128   ; addl          $0x80813f80,0x3b80(%rax)
   DB  128,59,0                            ; cmpb          $0x0,(%rbx)
   DB  0,128,63,129,128,128                ; add           %al,-0x7f7f7ec1(%rax)
@@ -4590,7 +4618,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        4ad9 <.literal4+0x281>
+  DB  224,7                               ; loopne        4b55 <.literal4+0x281>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -4602,7 +4630,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        4af5 <.literal4+0x29d>
+  DB  224,7                               ; loopne        4b71 <.literal4+0x29d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -4613,7 +4641,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  248                                 ; clc
   DB  65,0,0                              ; add           %al,(%r8)
-  DB  124,66                              ; jl            4b4a <.literal4+0x2f2>
+  DB  124,66                              ; jl            4bc6 <.literal4+0x2f2>
   DB  0,240                               ; add           %dh,%al
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,55,0,15                 ; mov           %ecx,0xf003788(%rax)
@@ -4631,9 +4659,9 @@ ALIGN 4
   DB  137,136,136,59,15,0                 ; mov           %ecx,0xf3b88(%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,61,0,0                  ; mov           %ecx,0x3d88(%rax)
-  DB  112,65                              ; jo            4b8d <.literal4+0x335>
+  DB  112,65                              ; jo            4c09 <.literal4+0x335>
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            4b9b <.literal4+0x343>
+  DB  127,67                              ; jg            4c17 <.literal4+0x343>
   DB  128,0,128                           ; addb          $0x80,(%rax)
   DB  55                                  ; (bad)
   DB  128,0,128                           ; addb          $0x80,(%rax)
@@ -4641,7 +4669,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  255                                 ; (bad)
-  DB  127,71                              ; jg            4baf <.literal4+0x357>
+  DB  127,71                              ; jg            4c2b <.literal4+0x357>
   DB  208                                 ; (bad)
   DB  179,89                              ; mov           $0x59,%bl
   DB  62,89                               ; ds            pop %rcx
@@ -4741,16 +4769,16 @@ ALIGN 32
   DB  0,0                                 ; add           %al,(%rax)
   DB  1,255                               ; add           %edi,%edi
   DB  255                                 ; (bad)
-  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004c68 <_sk_callback_hsw+0xa00052c>
+  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004ce8 <_sk_callback_hsw+0xa00052f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004c70 <_sk_callback_hsw+0x12000534>
+  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004cf0 <_sk_callback_hsw+0x12000537>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004c78 <_sk_callback_hsw+0x1a00053c>
+  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004cf8 <_sk_callback_hsw+0x1a00053f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004c80 <_sk_callback_hsw+0x3000544>
+  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004d00 <_sk_callback_hsw+0x3000547>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -4793,16 +4821,16 @@ ALIGN 32
   DB  0,0                                 ; add           %al,(%rax)
   DB  1,255                               ; add           %edi,%edi
   DB  255                                 ; (bad)
-  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004cc8 <_sk_callback_hsw+0xa00058c>
+  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004d48 <_sk_callback_hsw+0xa00058f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004cd0 <_sk_callback_hsw+0x12000594>
+  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004d50 <_sk_callback_hsw+0x12000597>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004cd8 <_sk_callback_hsw+0x1a00059c>
+  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004d58 <_sk_callback_hsw+0x1a00059f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004ce0 <_sk_callback_hsw+0x30005a4>
+  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004d60 <_sk_callback_hsw+0x30005a7>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -4845,16 +4873,16 @@ ALIGN 32
   DB  0,0                                 ; add           %al,(%rax)
   DB  1,255                               ; add           %edi,%edi
   DB  255                                 ; (bad)
-  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004d28 <_sk_callback_hsw+0xa0005ec>
+  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004da8 <_sk_callback_hsw+0xa0005ef>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004d30 <_sk_callback_hsw+0x120005f4>
+  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004db0 <_sk_callback_hsw+0x120005f7>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004d38 <_sk_callback_hsw+0x1a0005fc>
+  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004db8 <_sk_callback_hsw+0x1a0005ff>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004d40 <_sk_callback_hsw+0x3000604>
+  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004dc0 <_sk_callback_hsw+0x3000607>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -4897,16 +4925,16 @@ ALIGN 32
   DB  0,0                                 ; add           %al,(%rax)
   DB  1,255                               ; add           %edi,%edi
   DB  255                                 ; (bad)
-  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004d88 <_sk_callback_hsw+0xa00064c>
+  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004e08 <_sk_callback_hsw+0xa00064f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004d90 <_sk_callback_hsw+0x12000654>
+  DB  255,13,255,255,255,17               ; decl          0x11ffffff(%rip)        # 12004e10 <_sk_callback_hsw+0x12000657>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004d98 <_sk_callback_hsw+0x1a00065c>
+  DB  255,21,255,255,255,25               ; callq         *0x19ffffff(%rip)        # 1a004e18 <_sk_callback_hsw+0x1a00065f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004da0 <_sk_callback_hsw+0x3000664>
+  DB  255,29,255,255,255,2                ; lcall         *0x2ffffff(%rip)        # 3004e20 <_sk_callback_hsw+0x3000667>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -5048,14 +5076,14 @@ _sk_seed_shader_avx LABEL PROC
   DB  197,249,112,192,0                   ; vpshufd       $0x0,%xmm0,%xmm0
   DB  196,227,125,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,248,98,0,0        ; vbroadcastss  0x62f8(%rip),%ymm1        # 6458 <_sk_callback_avx+0x11a>
+  DB  196,226,125,24,13,144,99,0,0        ; vbroadcastss  0x6390(%rip),%ymm1        # 64f0 <_sk_callback_avx+0x119>
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
   DB  197,252,88,2                        ; vaddps        (%rdx),%ymm0,%ymm0
   DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  197,236,88,201                      ; vaddps        %ymm1,%ymm2,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,21,220,98,0,0        ; vbroadcastss  0x62dc(%rip),%ymm2        # 645c <_sk_callback_avx+0x11e>
+  DB  196,226,125,24,21,116,99,0,0        ; vbroadcastss  0x6374(%rip),%ymm2        # 64f4 <_sk_callback_avx+0x11d>
   DB  197,228,87,219                      ; vxorps        %ymm3,%ymm3,%ymm3
   DB  197,220,87,228                      ; vxorps        %ymm4,%ymm4,%ymm4
   DB  197,212,87,237                      ; vxorps        %ymm5,%ymm5,%ymm5
@@ -5075,7 +5103,7 @@ _sk_dither_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  196,66,125,24,8                     ; vbroadcastss  (%r8),%ymm9
   DB  196,65,60,87,209                    ; vxorps        %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,29,148,98,0,0         ; vbroadcastss  0x6294(%rip),%ymm11        # 6460 <_sk_callback_avx+0x122>
+  DB  196,98,125,24,29,44,99,0,0          ; vbroadcastss  0x632c(%rip),%ymm11        # 64f8 <_sk_callback_avx+0x121>
   DB  196,65,44,84,203                    ; vandps        %ymm11,%ymm10,%ymm9
   DB  196,193,25,114,241,5                ; vpslld        $0x5,%xmm9,%xmm12
   DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
@@ -5086,8 +5114,8 @@ _sk_dither_avx LABEL PROC
   DB  196,67,125,25,219,1                 ; vextractf128  $0x1,%ymm11,%xmm11
   DB  196,193,33,114,243,4                ; vpslld        $0x4,%xmm11,%xmm11
   DB  196,67,29,24,219,1                  ; vinsertf128   $0x1,%xmm11,%ymm12,%ymm11
-  DB  196,98,125,24,37,85,98,0,0          ; vbroadcastss  0x6255(%rip),%ymm12        # 6464 <_sk_callback_avx+0x126>
-  DB  196,98,125,24,45,80,98,0,0          ; vbroadcastss  0x6250(%rip),%ymm13        # 6468 <_sk_callback_avx+0x12a>
+  DB  196,98,125,24,37,237,98,0,0         ; vbroadcastss  0x62ed(%rip),%ymm12        # 64fc <_sk_callback_avx+0x125>
+  DB  196,98,125,24,45,232,98,0,0         ; vbroadcastss  0x62e8(%rip),%ymm13        # 6500 <_sk_callback_avx+0x129>
   DB  196,65,44,84,245                    ; vandps        %ymm13,%ymm10,%ymm14
   DB  196,193,1,114,246,2                 ; vpslld        $0x2,%xmm14,%xmm15
   DB  196,67,125,25,246,1                 ; vextractf128  $0x1,%ymm14,%xmm14
@@ -5114,9 +5142,9 @@ _sk_dither_avx LABEL PROC
   DB  196,65,60,86,193                    ; vorps         %ymm9,%ymm8,%ymm8
   DB  196,65,60,86,194                    ; vorps         %ymm10,%ymm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,187,97,0,0         ; vbroadcastss  0x61bb(%rip),%ymm9        # 646c <_sk_callback_avx+0x12e>
+  DB  196,98,125,24,13,83,98,0,0          ; vbroadcastss  0x6253(%rip),%ymm9        # 6504 <_sk_callback_avx+0x12d>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,177,97,0,0         ; vbroadcastss  0x61b1(%rip),%ymm9        # 6470 <_sk_callback_avx+0x132>
+  DB  196,98,125,24,13,73,98,0,0          ; vbroadcastss  0x6249(%rip),%ymm9        # 6508 <_sk_callback_avx+0x131>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  196,98,125,24,72,8                  ; vbroadcastss  0x8(%rax),%ymm9
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
@@ -5175,7 +5203,7 @@ _sk_clear_avx LABEL PROC
 PUBLIC _sk_srcatop_avx
 _sk_srcatop_avx LABEL PROC
   DB  197,252,89,199                      ; vmulps        %ymm7,%ymm0,%ymm0
-  DB  196,98,125,24,5,8,97,0,0            ; vbroadcastss  0x6108(%rip),%ymm8        # 6474 <_sk_callback_avx+0x136>
+  DB  196,98,125,24,5,160,97,0,0          ; vbroadcastss  0x61a0(%rip),%ymm8        # 650c <_sk_callback_avx+0x135>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -5194,7 +5222,7 @@ _sk_srcatop_avx LABEL PROC
 PUBLIC _sk_dstatop_avx
 _sk_dstatop_avx LABEL PROC
   DB  197,100,89,196                      ; vmulps        %ymm4,%ymm3,%ymm8
-  DB  196,98,125,24,13,202,96,0,0         ; vbroadcastss  0x60ca(%rip),%ymm9        # 6478 <_sk_callback_avx+0x13a>
+  DB  196,98,125,24,13,98,97,0,0          ; vbroadcastss  0x6162(%rip),%ymm9        # 6510 <_sk_callback_avx+0x139>
   DB  197,52,92,207                       ; vsubps        %ymm7,%ymm9,%ymm9
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
   DB  197,188,88,192                      ; vaddps        %ymm0,%ymm8,%ymm0
@@ -5230,7 +5258,7 @@ _sk_dstin_avx LABEL PROC
 
 PUBLIC _sk_srcout_avx
 _sk_srcout_avx LABEL PROC
-  DB  196,98,125,24,5,105,96,0,0          ; vbroadcastss  0x6069(%rip),%ymm8        # 647c <_sk_callback_avx+0x13e>
+  DB  196,98,125,24,5,1,97,0,0            ; vbroadcastss  0x6101(%rip),%ymm8        # 6514 <_sk_callback_avx+0x13d>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -5241,7 +5269,7 @@ _sk_srcout_avx LABEL PROC
 
 PUBLIC _sk_dstout_avx
 _sk_dstout_avx LABEL PROC
-  DB  196,226,125,24,5,76,96,0,0          ; vbroadcastss  0x604c(%rip),%ymm0        # 6480 <_sk_callback_avx+0x142>
+  DB  196,226,125,24,5,228,96,0,0         ; vbroadcastss  0x60e4(%rip),%ymm0        # 6518 <_sk_callback_avx+0x141>
   DB  197,252,92,219                      ; vsubps        %ymm3,%ymm0,%ymm3
   DB  197,228,89,196                      ; vmulps        %ymm4,%ymm3,%ymm0
   DB  197,228,89,205                      ; vmulps        %ymm5,%ymm3,%ymm1
@@ -5252,7 +5280,7 @@ _sk_dstout_avx LABEL PROC
 
 PUBLIC _sk_srcover_avx
 _sk_srcover_avx LABEL PROC
-  DB  196,98,125,24,5,47,96,0,0           ; vbroadcastss  0x602f(%rip),%ymm8        # 6484 <_sk_callback_avx+0x146>
+  DB  196,98,125,24,5,199,96,0,0          ; vbroadcastss  0x60c7(%rip),%ymm8        # 651c <_sk_callback_avx+0x145>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,204                       ; vmulps        %ymm4,%ymm8,%ymm9
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -5267,7 +5295,7 @@ _sk_srcover_avx LABEL PROC
 
 PUBLIC _sk_dstover_avx
 _sk_dstover_avx LABEL PROC
-  DB  196,98,125,24,5,2,96,0,0            ; vbroadcastss  0x6002(%rip),%ymm8        # 6488 <_sk_callback_avx+0x14a>
+  DB  196,98,125,24,5,154,96,0,0          ; vbroadcastss  0x609a(%rip),%ymm8        # 6520 <_sk_callback_avx+0x149>
   DB  197,60,92,199                       ; vsubps        %ymm7,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,252,88,196                      ; vaddps        %ymm4,%ymm0,%ymm0
@@ -5291,7 +5319,7 @@ _sk_modulate_avx LABEL PROC
 
 PUBLIC _sk_multiply_avx
 _sk_multiply_avx LABEL PROC
-  DB  196,98,125,24,5,193,95,0,0          ; vbroadcastss  0x5fc1(%rip),%ymm8        # 648c <_sk_callback_avx+0x14e>
+  DB  196,98,125,24,5,89,96,0,0           ; vbroadcastss  0x6059(%rip),%ymm8        # 6524 <_sk_callback_avx+0x14d>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,208                       ; vmulps        %ymm0,%ymm9,%ymm10
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5345,7 +5373,7 @@ _sk_screen_avx LABEL PROC
 
 PUBLIC _sk_xor__avx
 _sk_xor__avx LABEL PROC
-  DB  196,98,125,24,5,16,95,0,0           ; vbroadcastss  0x5f10(%rip),%ymm8        # 6490 <_sk_callback_avx+0x152>
+  DB  196,98,125,24,5,168,95,0,0          ; vbroadcastss  0x5fa8(%rip),%ymm8        # 6528 <_sk_callback_avx+0x151>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,192                      ; vmulps        %ymm0,%ymm9,%ymm0
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5380,7 +5408,7 @@ _sk_darken_avx LABEL PROC
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,95,209                  ; vmaxps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,144,94,0,0          ; vbroadcastss  0x5e90(%rip),%ymm8        # 6494 <_sk_callback_avx+0x156>
+  DB  196,98,125,24,5,40,95,0,0           ; vbroadcastss  0x5f28(%rip),%ymm8        # 652c <_sk_callback_avx+0x155>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5404,7 +5432,7 @@ _sk_lighten_avx LABEL PROC
   DB  197,100,89,206                      ; vmulps        %ymm6,%ymm3,%ymm9
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,60,94,0,0           ; vbroadcastss  0x5e3c(%rip),%ymm8        # 6498 <_sk_callback_avx+0x15a>
+  DB  196,98,125,24,5,212,94,0,0          ; vbroadcastss  0x5ed4(%rip),%ymm8        # 6530 <_sk_callback_avx+0x159>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5431,7 +5459,7 @@ _sk_difference_avx LABEL PROC
   DB  196,193,108,93,209                  ; vminps        %ymm9,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,220,93,0,0          ; vbroadcastss  0x5ddc(%rip),%ymm8        # 649c <_sk_callback_avx+0x15e>
+  DB  196,98,125,24,5,116,94,0,0          ; vbroadcastss  0x5e74(%rip),%ymm8        # 6534 <_sk_callback_avx+0x15d>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5452,7 +5480,7 @@ _sk_exclusion_avx LABEL PROC
   DB  197,236,89,214                      ; vmulps        %ymm6,%ymm2,%ymm2
   DB  197,236,88,210                      ; vaddps        %ymm2,%ymm2,%ymm2
   DB  197,188,92,210                      ; vsubps        %ymm2,%ymm8,%ymm2
-  DB  196,98,125,24,5,151,93,0,0          ; vbroadcastss  0x5d97(%rip),%ymm8        # 64a0 <_sk_callback_avx+0x162>
+  DB  196,98,125,24,5,47,94,0,0           ; vbroadcastss  0x5e2f(%rip),%ymm8        # 6538 <_sk_callback_avx+0x161>
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
   DB  197,60,89,199                       ; vmulps        %ymm7,%ymm8,%ymm8
   DB  197,188,88,219                      ; vaddps        %ymm3,%ymm8,%ymm3
@@ -5461,7 +5489,7 @@ _sk_exclusion_avx LABEL PROC
 
 PUBLIC _sk_colorburn_avx
 _sk_colorburn_avx LABEL PROC
-  DB  196,98,125,24,5,130,93,0,0          ; vbroadcastss  0x5d82(%rip),%ymm8        # 64a4 <_sk_callback_avx+0x166>
+  DB  196,98,125,24,5,26,94,0,0           ; vbroadcastss  0x5e1a(%rip),%ymm8        # 653c <_sk_callback_avx+0x165>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,52,89,216                       ; vmulps        %ymm0,%ymm9,%ymm11
   DB  196,65,44,87,210                    ; vxorps        %ymm10,%ymm10,%ymm10
@@ -5521,7 +5549,7 @@ _sk_colorburn_avx LABEL PROC
 PUBLIC _sk_colordodge_avx
 _sk_colordodge_avx LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
-  DB  196,98,125,24,13,126,92,0,0         ; vbroadcastss  0x5c7e(%rip),%ymm9        # 64a8 <_sk_callback_avx+0x16a>
+  DB  196,98,125,24,13,22,93,0,0          ; vbroadcastss  0x5d16(%rip),%ymm9        # 6540 <_sk_callback_avx+0x169>
   DB  197,52,92,215                       ; vsubps        %ymm7,%ymm9,%ymm10
   DB  197,44,89,216                       ; vmulps        %ymm0,%ymm10,%ymm11
   DB  197,52,92,203                       ; vsubps        %ymm3,%ymm9,%ymm9
@@ -5576,7 +5604,7 @@ _sk_colordodge_avx LABEL PROC
 
 PUBLIC _sk_hardlight_avx
 _sk_hardlight_avx LABEL PROC
-  DB  196,98,125,24,5,144,91,0,0          ; vbroadcastss  0x5b90(%rip),%ymm8        # 64ac <_sk_callback_avx+0x16e>
+  DB  196,98,125,24,5,40,92,0,0           ; vbroadcastss  0x5c28(%rip),%ymm8        # 6544 <_sk_callback_avx+0x16d>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,200                       ; vmulps        %ymm0,%ymm10,%ymm9
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5629,7 +5657,7 @@ _sk_hardlight_avx LABEL PROC
 
 PUBLIC _sk_overlay_avx
 _sk_overlay_avx LABEL PROC
-  DB  196,98,125,24,5,185,90,0,0          ; vbroadcastss  0x5ab9(%rip),%ymm8        # 64b0 <_sk_callback_avx+0x172>
+  DB  196,98,125,24,5,81,91,0,0           ; vbroadcastss  0x5b51(%rip),%ymm8        # 6548 <_sk_callback_avx+0x171>
   DB  197,60,92,215                       ; vsubps        %ymm7,%ymm8,%ymm10
   DB  197,44,89,200                       ; vmulps        %ymm0,%ymm10,%ymm9
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5694,10 +5722,10 @@ _sk_softlight_avx LABEL PROC
   DB  196,65,60,88,192                    ; vaddps        %ymm8,%ymm8,%ymm8
   DB  196,65,60,89,216                    ; vmulps        %ymm8,%ymm8,%ymm11
   DB  196,65,60,88,195                    ; vaddps        %ymm11,%ymm8,%ymm8
-  DB  196,98,125,24,29,172,89,0,0         ; vbroadcastss  0x59ac(%rip),%ymm11        # 64b8 <_sk_callback_avx+0x17a>
+  DB  196,98,125,24,29,68,90,0,0          ; vbroadcastss  0x5a44(%rip),%ymm11        # 6550 <_sk_callback_avx+0x179>
   DB  196,65,28,88,235                    ; vaddps        %ymm11,%ymm12,%ymm13
   DB  196,65,20,89,192                    ; vmulps        %ymm8,%ymm13,%ymm8
-  DB  196,98,125,24,45,157,89,0,0         ; vbroadcastss  0x599d(%rip),%ymm13        # 64bc <_sk_callback_avx+0x17e>
+  DB  196,98,125,24,45,53,90,0,0          ; vbroadcastss  0x5a35(%rip),%ymm13        # 6554 <_sk_callback_avx+0x17d>
   DB  196,65,28,89,245                    ; vmulps        %ymm13,%ymm12,%ymm14
   DB  196,65,12,88,192                    ; vaddps        %ymm8,%ymm14,%ymm8
   DB  196,65,124,82,244                   ; vrsqrtps      %ymm12,%ymm14
@@ -5708,7 +5736,7 @@ _sk_softlight_avx LABEL PROC
   DB  197,4,194,255,2                     ; vcmpleps      %ymm7,%ymm15,%ymm15
   DB  196,67,13,74,240,240                ; vblendvps     %ymm15,%ymm8,%ymm14,%ymm14
   DB  197,116,88,249                      ; vaddps        %ymm1,%ymm1,%ymm15
-  DB  196,98,125,24,5,91,89,0,0           ; vbroadcastss  0x595b(%rip),%ymm8        # 64b4 <_sk_callback_avx+0x176>
+  DB  196,98,125,24,5,243,89,0,0          ; vbroadcastss  0x59f3(%rip),%ymm8        # 654c <_sk_callback_avx+0x175>
   DB  196,65,60,92,228                    ; vsubps        %ymm12,%ymm8,%ymm12
   DB  197,132,92,195                      ; vsubps        %ymm3,%ymm15,%ymm0
   DB  196,65,124,89,228                   ; vmulps        %ymm12,%ymm0,%ymm12
@@ -5835,12 +5863,12 @@ _sk_hue_avx LABEL PROC
   DB  196,65,28,89,219                    ; vmulps        %ymm11,%ymm12,%ymm11
   DB  196,65,36,94,222                    ; vdivps        %ymm14,%ymm11,%ymm11
   DB  196,67,37,74,224,240                ; vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  DB  196,98,125,24,53,37,87,0,0          ; vbroadcastss  0x5725(%rip),%ymm14        # 64c0 <_sk_callback_avx+0x182>
+  DB  196,98,125,24,53,189,87,0,0         ; vbroadcastss  0x57bd(%rip),%ymm14        # 6558 <_sk_callback_avx+0x181>
   DB  196,65,92,89,222                    ; vmulps        %ymm14,%ymm4,%ymm11
-  DB  196,98,125,24,61,27,87,0,0          ; vbroadcastss  0x571b(%rip),%ymm15        # 64c4 <_sk_callback_avx+0x186>
+  DB  196,98,125,24,61,179,87,0,0         ; vbroadcastss  0x57b3(%rip),%ymm15        # 655c <_sk_callback_avx+0x185>
   DB  196,65,84,89,239                    ; vmulps        %ymm15,%ymm5,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
-  DB  196,226,125,24,5,12,87,0,0          ; vbroadcastss  0x570c(%rip),%ymm0        # 64c8 <_sk_callback_avx+0x18a>
+  DB  196,226,125,24,5,164,87,0,0         ; vbroadcastss  0x57a4(%rip),%ymm0        # 6560 <_sk_callback_avx+0x189>
   DB  197,76,89,232                       ; vmulps        %ymm0,%ymm6,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
   DB  196,65,52,89,238                    ; vmulps        %ymm14,%ymm9,%ymm13
@@ -5901,7 +5929,7 @@ _sk_hue_avx LABEL PROC
   DB  196,65,36,95,208                    ; vmaxps        %ymm8,%ymm11,%ymm10
   DB  196,195,109,74,209,240              ; vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,229,85,0,0          ; vbroadcastss  0x55e5(%rip),%ymm8        # 64cc <_sk_callback_avx+0x18e>
+  DB  196,98,125,24,5,125,86,0,0          ; vbroadcastss  0x567d(%rip),%ymm8        # 6564 <_sk_callback_avx+0x18d>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -5958,12 +5986,12 @@ _sk_saturation_avx LABEL PROC
   DB  196,65,28,89,219                    ; vmulps        %ymm11,%ymm12,%ymm11
   DB  196,65,36,94,222                    ; vdivps        %ymm14,%ymm11,%ymm11
   DB  196,67,37,74,224,240                ; vblendvps     %ymm15,%ymm8,%ymm11,%ymm12
-  DB  196,98,125,24,53,237,84,0,0         ; vbroadcastss  0x54ed(%rip),%ymm14        # 64d0 <_sk_callback_avx+0x192>
+  DB  196,98,125,24,53,133,85,0,0         ; vbroadcastss  0x5585(%rip),%ymm14        # 6568 <_sk_callback_avx+0x191>
   DB  196,65,92,89,222                    ; vmulps        %ymm14,%ymm4,%ymm11
-  DB  196,98,125,24,61,227,84,0,0         ; vbroadcastss  0x54e3(%rip),%ymm15        # 64d4 <_sk_callback_avx+0x196>
+  DB  196,98,125,24,61,123,85,0,0         ; vbroadcastss  0x557b(%rip),%ymm15        # 656c <_sk_callback_avx+0x195>
   DB  196,65,84,89,239                    ; vmulps        %ymm15,%ymm5,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
-  DB  196,226,125,24,5,212,84,0,0         ; vbroadcastss  0x54d4(%rip),%ymm0        # 64d8 <_sk_callback_avx+0x19a>
+  DB  196,226,125,24,5,108,85,0,0         ; vbroadcastss  0x556c(%rip),%ymm0        # 6570 <_sk_callback_avx+0x199>
   DB  197,76,89,232                       ; vmulps        %ymm0,%ymm6,%ymm13
   DB  196,65,36,88,221                    ; vaddps        %ymm13,%ymm11,%ymm11
   DB  196,65,52,89,238                    ; vmulps        %ymm14,%ymm9,%ymm13
@@ -6024,7 +6052,7 @@ _sk_saturation_avx LABEL PROC
   DB  196,65,36,95,208                    ; vmaxps        %ymm8,%ymm11,%ymm10
   DB  196,195,109,74,209,240              ; vblendvps     %ymm15,%ymm9,%ymm2,%ymm2
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,173,83,0,0          ; vbroadcastss  0x53ad(%rip),%ymm8        # 64dc <_sk_callback_avx+0x19e>
+  DB  196,98,125,24,5,69,84,0,0           ; vbroadcastss  0x5445(%rip),%ymm8        # 6574 <_sk_callback_avx+0x19d>
   DB  197,60,92,207                       ; vsubps        %ymm7,%ymm8,%ymm9
   DB  197,180,89,201                      ; vmulps        %ymm1,%ymm9,%ymm1
   DB  197,60,92,195                       ; vsubps        %ymm3,%ymm8,%ymm8
@@ -6053,12 +6081,12 @@ _sk_color_avx LABEL PROC
   DB  197,252,17,68,36,32                 ; vmovups       %ymm0,0x20(%rsp)
   DB  197,124,89,199                      ; vmulps        %ymm7,%ymm0,%ymm8
   DB  197,116,89,207                      ; vmulps        %ymm7,%ymm1,%ymm9
-  DB  196,98,125,24,45,61,83,0,0          ; vbroadcastss  0x533d(%rip),%ymm13        # 64e0 <_sk_callback_avx+0x1a2>
+  DB  196,98,125,24,45,213,83,0,0         ; vbroadcastss  0x53d5(%rip),%ymm13        # 6578 <_sk_callback_avx+0x1a1>
   DB  196,65,92,89,213                    ; vmulps        %ymm13,%ymm4,%ymm10
-  DB  196,98,125,24,53,51,83,0,0          ; vbroadcastss  0x5333(%rip),%ymm14        # 64e4 <_sk_callback_avx+0x1a6>
+  DB  196,98,125,24,53,203,83,0,0         ; vbroadcastss  0x53cb(%rip),%ymm14        # 657c <_sk_callback_avx+0x1a5>
   DB  196,65,84,89,222                    ; vmulps        %ymm14,%ymm5,%ymm11
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
-  DB  196,98,125,24,61,36,83,0,0          ; vbroadcastss  0x5324(%rip),%ymm15        # 64e8 <_sk_callback_avx+0x1aa>
+  DB  196,98,125,24,61,188,83,0,0         ; vbroadcastss  0x53bc(%rip),%ymm15        # 6580 <_sk_callback_avx+0x1a9>
   DB  196,65,76,89,223                    ; vmulps        %ymm15,%ymm6,%ymm11
   DB  196,193,44,88,195                   ; vaddps        %ymm11,%ymm10,%ymm0
   DB  196,65,60,89,221                    ; vmulps        %ymm13,%ymm8,%ymm11
@@ -6121,7 +6149,7 @@ _sk_color_avx LABEL PROC
   DB  196,65,44,95,207                    ; vmaxps        %ymm15,%ymm10,%ymm9
   DB  196,195,37,74,192,0                 ; vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   DB  196,65,124,95,199                   ; vmaxps        %ymm15,%ymm0,%ymm8
-  DB  196,226,125,24,5,235,81,0,0         ; vbroadcastss  0x51eb(%rip),%ymm0        # 64ec <_sk_callback_avx+0x1ae>
+  DB  196,226,125,24,5,131,82,0,0         ; vbroadcastss  0x5283(%rip),%ymm0        # 6584 <_sk_callback_avx+0x1ad>
   DB  197,124,92,215                      ; vsubps        %ymm7,%ymm0,%ymm10
   DB  197,172,89,84,36,32                 ; vmulps        0x20(%rsp),%ymm10,%ymm2
   DB  197,124,92,219                      ; vsubps        %ymm3,%ymm0,%ymm11
@@ -6151,12 +6179,12 @@ _sk_luminosity_avx LABEL PROC
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
   DB  197,100,89,196                      ; vmulps        %ymm4,%ymm3,%ymm8
   DB  197,100,89,205                      ; vmulps        %ymm5,%ymm3,%ymm9
-  DB  196,98,125,24,45,119,81,0,0         ; vbroadcastss  0x5177(%rip),%ymm13        # 64f0 <_sk_callback_avx+0x1b2>
+  DB  196,98,125,24,45,15,82,0,0          ; vbroadcastss  0x520f(%rip),%ymm13        # 6588 <_sk_callback_avx+0x1b1>
   DB  196,65,108,89,213                   ; vmulps        %ymm13,%ymm2,%ymm10
-  DB  196,98,125,24,53,109,81,0,0         ; vbroadcastss  0x516d(%rip),%ymm14        # 64f4 <_sk_callback_avx+0x1b6>
+  DB  196,98,125,24,53,5,82,0,0           ; vbroadcastss  0x5205(%rip),%ymm14        # 658c <_sk_callback_avx+0x1b5>
   DB  196,65,116,89,222                   ; vmulps        %ymm14,%ymm1,%ymm11
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
-  DB  196,98,125,24,61,94,81,0,0          ; vbroadcastss  0x515e(%rip),%ymm15        # 64f8 <_sk_callback_avx+0x1ba>
+  DB  196,98,125,24,61,246,81,0,0         ; vbroadcastss  0x51f6(%rip),%ymm15        # 6590 <_sk_callback_avx+0x1b9>
   DB  196,65,28,89,223                    ; vmulps        %ymm15,%ymm12,%ymm11
   DB  196,193,44,88,195                   ; vaddps        %ymm11,%ymm10,%ymm0
   DB  196,65,60,89,221                    ; vmulps        %ymm13,%ymm8,%ymm11
@@ -6219,7 +6247,7 @@ _sk_luminosity_avx LABEL PROC
   DB  196,65,44,95,207                    ; vmaxps        %ymm15,%ymm10,%ymm9
   DB  196,195,37,74,192,0                 ; vblendvps     %ymm0,%ymm8,%ymm11,%ymm0
   DB  196,65,124,95,199                   ; vmaxps        %ymm15,%ymm0,%ymm8
-  DB  196,226,125,24,5,37,80,0,0          ; vbroadcastss  0x5025(%rip),%ymm0        # 64fc <_sk_callback_avx+0x1be>
+  DB  196,226,125,24,5,189,80,0,0         ; vbroadcastss  0x50bd(%rip),%ymm0        # 6594 <_sk_callback_avx+0x1bd>
   DB  197,124,92,215                      ; vsubps        %ymm7,%ymm0,%ymm10
   DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
   DB  197,124,92,219                      ; vsubps        %ymm3,%ymm0,%ymm11
@@ -6252,7 +6280,7 @@ _sk_clamp_0_avx LABEL PROC
 
 PUBLIC _sk_clamp_1_avx
 _sk_clamp_1_avx LABEL PROC
-  DB  196,98,125,24,5,181,79,0,0          ; vbroadcastss  0x4fb5(%rip),%ymm8        # 6500 <_sk_callback_avx+0x1c2>
+  DB  196,98,125,24,5,77,80,0,0           ; vbroadcastss  0x504d(%rip),%ymm8        # 6598 <_sk_callback_avx+0x1c1>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
@@ -6262,7 +6290,7 @@ _sk_clamp_1_avx LABEL PROC
 
 PUBLIC _sk_clamp_a_avx
 _sk_clamp_a_avx LABEL PROC
-  DB  196,98,125,24,5,152,79,0,0          ; vbroadcastss  0x4f98(%rip),%ymm8        # 6504 <_sk_callback_avx+0x1c6>
+  DB  196,98,125,24,5,48,80,0,0           ; vbroadcastss  0x5030(%rip),%ymm8        # 659c <_sk_callback_avx+0x1c5>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  197,252,93,195                      ; vminps        %ymm3,%ymm0,%ymm0
   DB  197,244,93,203                      ; vminps        %ymm3,%ymm1,%ymm1
@@ -6334,7 +6362,7 @@ PUBLIC _sk_unpremul_avx
 _sk_unpremul_avx LABEL PROC
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,65,100,194,200,0                ; vcmpeqps      %ymm8,%ymm3,%ymm9
-  DB  196,98,125,24,21,224,78,0,0         ; vbroadcastss  0x4ee0(%rip),%ymm10        # 6508 <_sk_callback_avx+0x1ca>
+  DB  196,98,125,24,21,120,79,0,0         ; vbroadcastss  0x4f78(%rip),%ymm10        # 65a0 <_sk_callback_avx+0x1c9>
   DB  197,44,94,211                       ; vdivps        %ymm3,%ymm10,%ymm10
   DB  196,67,45,74,192,144                ; vblendvps     %ymm9,%ymm8,%ymm10,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
@@ -6345,17 +6373,17 @@ _sk_unpremul_avx LABEL PROC
 
 PUBLIC _sk_from_srgb_avx
 _sk_from_srgb_avx LABEL PROC
-  DB  196,98,125,24,5,193,78,0,0          ; vbroadcastss  0x4ec1(%rip),%ymm8        # 650c <_sk_callback_avx+0x1ce>
+  DB  196,98,125,24,5,89,79,0,0           ; vbroadcastss  0x4f59(%rip),%ymm8        # 65a4 <_sk_callback_avx+0x1cd>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  197,124,89,208                      ; vmulps        %ymm0,%ymm0,%ymm10
-  DB  196,98,125,24,29,179,78,0,0         ; vbroadcastss  0x4eb3(%rip),%ymm11        # 6510 <_sk_callback_avx+0x1d2>
+  DB  196,98,125,24,29,75,79,0,0          ; vbroadcastss  0x4f4b(%rip),%ymm11        # 65a8 <_sk_callback_avx+0x1d1>
   DB  196,65,124,89,227                   ; vmulps        %ymm11,%ymm0,%ymm12
-  DB  196,98,125,24,45,169,78,0,0         ; vbroadcastss  0x4ea9(%rip),%ymm13        # 6514 <_sk_callback_avx+0x1d6>
+  DB  196,98,125,24,45,65,79,0,0          ; vbroadcastss  0x4f41(%rip),%ymm13        # 65ac <_sk_callback_avx+0x1d5>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,44,89,212                    ; vmulps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,37,154,78,0,0         ; vbroadcastss  0x4e9a(%rip),%ymm12        # 6518 <_sk_callback_avx+0x1da>
+  DB  196,98,125,24,37,50,79,0,0          ; vbroadcastss  0x4f32(%rip),%ymm12        # 65b0 <_sk_callback_avx+0x1d9>
   DB  196,65,44,88,212                    ; vaddps        %ymm12,%ymm10,%ymm10
-  DB  196,98,125,24,53,144,78,0,0         ; vbroadcastss  0x4e90(%rip),%ymm14        # 651c <_sk_callback_avx+0x1de>
+  DB  196,98,125,24,53,40,79,0,0          ; vbroadcastss  0x4f28(%rip),%ymm14        # 65b4 <_sk_callback_avx+0x1dd>
   DB  196,193,124,194,198,1               ; vcmpltps      %ymm14,%ymm0,%ymm0
   DB  196,195,45,74,193,0                 ; vblendvps     %ymm0,%ymm9,%ymm10,%ymm0
   DB  196,65,116,89,200                   ; vmulps        %ymm8,%ymm1,%ymm9
@@ -6380,20 +6408,20 @@ _sk_from_srgb_avx LABEL PROC
 PUBLIC _sk_to_srgb_avx
 _sk_to_srgb_avx LABEL PROC
   DB  197,124,82,200                      ; vrsqrtps      %ymm0,%ymm9
-  DB  196,98,125,24,5,37,78,0,0           ; vbroadcastss  0x4e25(%rip),%ymm8        # 6520 <_sk_callback_avx+0x1e2>
+  DB  196,98,125,24,5,189,78,0,0          ; vbroadcastss  0x4ebd(%rip),%ymm8        # 65b8 <_sk_callback_avx+0x1e1>
   DB  196,65,124,89,208                   ; vmulps        %ymm8,%ymm0,%ymm10
-  DB  196,98,125,24,29,27,78,0,0          ; vbroadcastss  0x4e1b(%rip),%ymm11        # 6524 <_sk_callback_avx+0x1e6>
+  DB  196,98,125,24,29,179,78,0,0         ; vbroadcastss  0x4eb3(%rip),%ymm11        # 65bc <_sk_callback_avx+0x1e5>
   DB  196,65,52,89,227                    ; vmulps        %ymm11,%ymm9,%ymm12
-  DB  196,98,125,24,45,17,78,0,0          ; vbroadcastss  0x4e11(%rip),%ymm13        # 6528 <_sk_callback_avx+0x1ea>
+  DB  196,98,125,24,45,169,78,0,0         ; vbroadcastss  0x4ea9(%rip),%ymm13        # 65c0 <_sk_callback_avx+0x1e9>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,52,89,228                    ; vmulps        %ymm12,%ymm9,%ymm12
-  DB  196,98,125,24,53,2,78,0,0           ; vbroadcastss  0x4e02(%rip),%ymm14        # 652c <_sk_callback_avx+0x1ee>
+  DB  196,98,125,24,53,154,78,0,0         ; vbroadcastss  0x4e9a(%rip),%ymm14        # 65c4 <_sk_callback_avx+0x1ed>
   DB  196,65,28,88,230                    ; vaddps        %ymm14,%ymm12,%ymm12
-  DB  196,98,125,24,61,248,77,0,0         ; vbroadcastss  0x4df8(%rip),%ymm15        # 6530 <_sk_callback_avx+0x1f2>
+  DB  196,98,125,24,61,144,78,0,0         ; vbroadcastss  0x4e90(%rip),%ymm15        # 65c8 <_sk_callback_avx+0x1f1>
   DB  196,65,52,88,207                    ; vaddps        %ymm15,%ymm9,%ymm9
   DB  196,65,124,83,201                   ; vrcpps        %ymm9,%ymm9
   DB  196,65,52,89,204                    ; vmulps        %ymm12,%ymm9,%ymm9
-  DB  196,98,125,24,37,228,77,0,0         ; vbroadcastss  0x4de4(%rip),%ymm12        # 6534 <_sk_callback_avx+0x1f6>
+  DB  196,98,125,24,37,124,78,0,0         ; vbroadcastss  0x4e7c(%rip),%ymm12        # 65cc <_sk_callback_avx+0x1f5>
   DB  196,193,124,194,196,1               ; vcmpltps      %ymm12,%ymm0,%ymm0
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  197,124,82,201                      ; vrsqrtps      %ymm1,%ymm9
@@ -6428,7 +6456,7 @@ _sk_rgb_to_hsl_avx LABEL PROC
   DB  197,124,93,201                      ; vminps        %ymm1,%ymm0,%ymm9
   DB  197,52,93,202                       ; vminps        %ymm2,%ymm9,%ymm9
   DB  196,65,60,92,209                    ; vsubps        %ymm9,%ymm8,%ymm10
-  DB  196,98,125,24,29,74,77,0,0          ; vbroadcastss  0x4d4a(%rip),%ymm11        # 6538 <_sk_callback_avx+0x1fa>
+  DB  196,98,125,24,29,226,77,0,0         ; vbroadcastss  0x4de2(%rip),%ymm11        # 65d0 <_sk_callback_avx+0x1f9>
   DB  196,65,36,94,218                    ; vdivps        %ymm10,%ymm11,%ymm11
   DB  197,116,92,226                      ; vsubps        %ymm2,%ymm1,%ymm12
   DB  196,65,28,89,227                    ; vmulps        %ymm11,%ymm12,%ymm12
@@ -6438,19 +6466,19 @@ _sk_rgb_to_hsl_avx LABEL PROC
   DB  196,193,108,89,211                  ; vmulps        %ymm11,%ymm2,%ymm2
   DB  197,252,92,201                      ; vsubps        %ymm1,%ymm0,%ymm1
   DB  196,193,116,89,203                  ; vmulps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,35,77,0,0          ; vbroadcastss  0x4d23(%rip),%ymm11        # 6544 <_sk_callback_avx+0x206>
+  DB  196,98,125,24,29,187,77,0,0         ; vbroadcastss  0x4dbb(%rip),%ymm11        # 65dc <_sk_callback_avx+0x205>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,17,77,0,0          ; vbroadcastss  0x4d11(%rip),%ymm11        # 6540 <_sk_callback_avx+0x202>
+  DB  196,98,125,24,29,169,77,0,0         ; vbroadcastss  0x4da9(%rip),%ymm11        # 65d8 <_sk_callback_avx+0x201>
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,227,117,74,202,224              ; vblendvps     %ymm14,%ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,249,76,0,0        ; vbroadcastss  0x4cf9(%rip),%ymm2        # 653c <_sk_callback_avx+0x1fe>
+  DB  196,226,125,24,21,145,77,0,0        ; vbroadcastss  0x4d91(%rip),%ymm2        # 65d4 <_sk_callback_avx+0x1fd>
   DB  196,65,12,87,246                    ; vxorps        %ymm14,%ymm14,%ymm14
   DB  196,227,13,74,210,208               ; vblendvps     %ymm13,%ymm2,%ymm14,%ymm2
   DB  197,188,194,192,0                   ; vcmpeqps      %ymm0,%ymm8,%ymm0
   DB  196,193,108,88,212                  ; vaddps        %ymm12,%ymm2,%ymm2
   DB  196,227,117,74,194,0                ; vblendvps     %ymm0,%ymm2,%ymm1,%ymm0
   DB  196,193,60,88,201                   ; vaddps        %ymm9,%ymm8,%ymm1
-  DB  196,98,125,24,37,224,76,0,0         ; vbroadcastss  0x4ce0(%rip),%ymm12        # 654c <_sk_callback_avx+0x20e>
+  DB  196,98,125,24,37,120,77,0,0         ; vbroadcastss  0x4d78(%rip),%ymm12        # 65e4 <_sk_callback_avx+0x20d>
   DB  196,193,116,89,212                  ; vmulps        %ymm12,%ymm1,%ymm2
   DB  197,28,194,226,1                    ; vcmpltps      %ymm2,%ymm12,%ymm12
   DB  196,65,36,92,216                    ; vsubps        %ymm8,%ymm11,%ymm11
@@ -6460,7 +6488,7 @@ _sk_rgb_to_hsl_avx LABEL PROC
   DB  197,172,94,201                      ; vdivps        %ymm1,%ymm10,%ymm1
   DB  196,195,125,74,198,128              ; vblendvps     %ymm8,%ymm14,%ymm0,%ymm0
   DB  196,195,117,74,206,128              ; vblendvps     %ymm8,%ymm14,%ymm1,%ymm1
-  DB  196,98,125,24,5,163,76,0,0          ; vbroadcastss  0x4ca3(%rip),%ymm8        # 6548 <_sk_callback_avx+0x20a>
+  DB  196,98,125,24,5,59,77,0,0           ; vbroadcastss  0x4d3b(%rip),%ymm8        # 65e0 <_sk_callback_avx+0x209>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -6475,7 +6503,7 @@ _sk_hsl_to_rgb_avx LABEL PROC
   DB  197,252,17,28,36                    ; vmovups       %ymm3,(%rsp)
   DB  197,252,40,225                      ; vmovaps       %ymm1,%ymm4
   DB  197,252,40,216                      ; vmovaps       %ymm0,%ymm3
-  DB  196,98,125,24,5,106,76,0,0          ; vbroadcastss  0x4c6a(%rip),%ymm8        # 6550 <_sk_callback_avx+0x212>
+  DB  196,98,125,24,5,2,77,0,0            ; vbroadcastss  0x4d02(%rip),%ymm8        # 65e8 <_sk_callback_avx+0x211>
   DB  197,60,194,202,2                    ; vcmpleps      %ymm2,%ymm8,%ymm9
   DB  197,92,89,210                       ; vmulps        %ymm2,%ymm4,%ymm10
   DB  196,65,92,92,218                    ; vsubps        %ymm10,%ymm4,%ymm11
@@ -6483,23 +6511,23 @@ _sk_hsl_to_rgb_avx LABEL PROC
   DB  197,52,88,210                       ; vaddps        %ymm2,%ymm9,%ymm10
   DB  197,108,88,202                      ; vaddps        %ymm2,%ymm2,%ymm9
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,29,68,76,0,0          ; vbroadcastss  0x4c44(%rip),%ymm11        # 6554 <_sk_callback_avx+0x216>
+  DB  196,98,125,24,29,220,76,0,0         ; vbroadcastss  0x4cdc(%rip),%ymm11        # 65ec <_sk_callback_avx+0x215>
   DB  196,65,100,88,219                   ; vaddps        %ymm11,%ymm3,%ymm11
   DB  196,67,125,8,227,1                  ; vroundps      $0x1,%ymm11,%ymm12
   DB  196,65,36,92,252                    ; vsubps        %ymm12,%ymm11,%ymm15
   DB  196,65,44,92,217                    ; vsubps        %ymm9,%ymm10,%ymm11
-  DB  196,98,125,24,37,46,76,0,0          ; vbroadcastss  0x4c2e(%rip),%ymm12        # 655c <_sk_callback_avx+0x21e>
+  DB  196,98,125,24,37,198,76,0,0         ; vbroadcastss  0x4cc6(%rip),%ymm12        # 65f4 <_sk_callback_avx+0x21d>
   DB  196,193,4,89,196                    ; vmulps        %ymm12,%ymm15,%ymm0
-  DB  196,98,125,24,45,36,76,0,0          ; vbroadcastss  0x4c24(%rip),%ymm13        # 6560 <_sk_callback_avx+0x222>
+  DB  196,98,125,24,45,188,76,0,0         ; vbroadcastss  0x4cbc(%rip),%ymm13        # 65f8 <_sk_callback_avx+0x221>
   DB  197,20,92,240                       ; vsubps        %ymm0,%ymm13,%ymm14
   DB  196,65,36,89,246                    ; vmulps        %ymm14,%ymm11,%ymm14
   DB  196,65,52,88,246                    ; vaddps        %ymm14,%ymm9,%ymm14
-  DB  196,226,125,24,13,5,76,0,0          ; vbroadcastss  0x4c05(%rip),%ymm1        # 6558 <_sk_callback_avx+0x21a>
+  DB  196,226,125,24,13,157,76,0,0        ; vbroadcastss  0x4c9d(%rip),%ymm1        # 65f0 <_sk_callback_avx+0x219>
   DB  196,193,116,194,255,2               ; vcmpleps      %ymm15,%ymm1,%ymm7
   DB  196,195,13,74,249,112               ; vblendvps     %ymm7,%ymm9,%ymm14,%ymm7
   DB  196,65,60,194,247,2                 ; vcmpleps      %ymm15,%ymm8,%ymm14
   DB  196,227,45,74,255,224               ; vblendvps     %ymm14,%ymm7,%ymm10,%ymm7
-  DB  196,98,125,24,53,240,75,0,0         ; vbroadcastss  0x4bf0(%rip),%ymm14        # 6564 <_sk_callback_avx+0x226>
+  DB  196,98,125,24,53,136,76,0,0         ; vbroadcastss  0x4c88(%rip),%ymm14        # 65fc <_sk_callback_avx+0x225>
   DB  196,65,12,194,255,2                 ; vcmpleps      %ymm15,%ymm14,%ymm15
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,180,88,192                      ; vaddps        %ymm0,%ymm9,%ymm0
@@ -6518,7 +6546,7 @@ _sk_hsl_to_rgb_avx LABEL PROC
   DB  197,164,89,247                      ; vmulps        %ymm7,%ymm11,%ymm6
   DB  197,180,88,246                      ; vaddps        %ymm6,%ymm9,%ymm6
   DB  196,227,77,74,237,0                 ; vblendvps     %ymm0,%ymm5,%ymm6,%ymm5
-  DB  196,226,125,24,5,146,75,0,0         ; vbroadcastss  0x4b92(%rip),%ymm0        # 6568 <_sk_callback_avx+0x22a>
+  DB  196,226,125,24,5,42,76,0,0          ; vbroadcastss  0x4c2a(%rip),%ymm0        # 6600 <_sk_callback_avx+0x229>
   DB  197,228,88,192                      ; vaddps        %ymm0,%ymm3,%ymm0
   DB  196,227,125,8,216,1                 ; vroundps      $0x1,%ymm0,%ymm3
   DB  197,252,92,195                      ; vsubps        %ymm3,%ymm0,%ymm0
@@ -6573,7 +6601,7 @@ _sk_scale_u8_avx LABEL PROC
   DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
   DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,181,74,0,0         ; vbroadcastss  0x4ab5(%rip),%ymm9        # 656c <_sk_callback_avx+0x22e>
+  DB  196,98,125,24,13,77,75,0,0          ; vbroadcastss  0x4b4d(%rip),%ymm9        # 6604 <_sk_callback_avx+0x22d>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
@@ -6628,7 +6656,7 @@ _sk_lerp_u8_avx LABEL PROC
   DB  196,66,121,49,192                   ; vpmovzxbd     %xmm8,%xmm8
   DB  196,67,53,24,192,1                  ; vinsertf128   $0x1,%xmm8,%ymm9,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,13,1,74,0,0           ; vbroadcastss  0x4a01(%rip),%ymm9        # 6570 <_sk_callback_avx+0x232>
+  DB  196,98,125,24,13,153,74,0,0         ; vbroadcastss  0x4a99(%rip),%ymm9        # 6608 <_sk_callback_avx+0x231>
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
@@ -6669,20 +6697,20 @@ _sk_lerp_565_avx LABEL PROC
   DB  196,65,57,105,201                   ; vpunpckhwd    %xmm9,%xmm8,%xmm9
   DB  196,66,121,51,192                   ; vpmovzxwd     %xmm8,%xmm8
   DB  196,67,61,24,193,1                  ; vinsertf128   $0x1,%xmm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,107,73,0,0         ; vbroadcastss  0x496b(%rip),%ymm9        # 6574 <_sk_callback_avx+0x236>
+  DB  196,98,125,24,13,3,74,0,0           ; vbroadcastss  0x4a03(%rip),%ymm9        # 660c <_sk_callback_avx+0x235>
   DB  196,65,60,84,201                    ; vandps        %ymm9,%ymm8,%ymm9
   DB  196,65,124,91,201                   ; vcvtdq2ps     %ymm9,%ymm9
-  DB  196,98,125,24,21,92,73,0,0          ; vbroadcastss  0x495c(%rip),%ymm10        # 6578 <_sk_callback_avx+0x23a>
+  DB  196,98,125,24,21,244,73,0,0         ; vbroadcastss  0x49f4(%rip),%ymm10        # 6610 <_sk_callback_avx+0x239>
   DB  196,65,52,89,202                    ; vmulps        %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,82,73,0,0          ; vbroadcastss  0x4952(%rip),%ymm10        # 657c <_sk_callback_avx+0x23e>
+  DB  196,98,125,24,21,234,73,0,0         ; vbroadcastss  0x49ea(%rip),%ymm10        # 6614 <_sk_callback_avx+0x23d>
   DB  196,65,60,84,210                    ; vandps        %ymm10,%ymm8,%ymm10
   DB  196,65,124,91,210                   ; vcvtdq2ps     %ymm10,%ymm10
-  DB  196,98,125,24,29,67,73,0,0          ; vbroadcastss  0x4943(%rip),%ymm11        # 6580 <_sk_callback_avx+0x242>
+  DB  196,98,125,24,29,219,73,0,0         ; vbroadcastss  0x49db(%rip),%ymm11        # 6618 <_sk_callback_avx+0x241>
   DB  196,65,44,89,211                    ; vmulps        %ymm11,%ymm10,%ymm10
-  DB  196,98,125,24,29,57,73,0,0          ; vbroadcastss  0x4939(%rip),%ymm11        # 6584 <_sk_callback_avx+0x246>
+  DB  196,98,125,24,29,209,73,0,0         ; vbroadcastss  0x49d1(%rip),%ymm11        # 661c <_sk_callback_avx+0x245>
   DB  196,65,60,84,195                    ; vandps        %ymm11,%ymm8,%ymm8
   DB  196,65,124,91,192                   ; vcvtdq2ps     %ymm8,%ymm8
-  DB  196,98,125,24,29,42,73,0,0          ; vbroadcastss  0x492a(%rip),%ymm11        # 6588 <_sk_callback_avx+0x24a>
+  DB  196,98,125,24,29,194,73,0,0         ; vbroadcastss  0x49c2(%rip),%ymm11        # 6620 <_sk_callback_avx+0x249>
   DB  196,65,60,89,195                    ; vmulps        %ymm11,%ymm8,%ymm8
   DB  197,252,92,196                      ; vsubps        %ymm4,%ymm0,%ymm0
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
@@ -6761,7 +6789,7 @@ _sk_load_tables_avx LABEL PROC
   DB  65,85                               ; push          %r13
   DB  65,84                               ; push          %r12
   DB  83                                  ; push          %rbx
-  DB  197,124,40,13,250,74,0,0            ; vmovaps       0x4afa(%rip),%ymm9        # 6860 <_sk_callback_avx+0x522>
+  DB  197,124,40,13,154,75,0,0            ; vmovaps       0x4b9a(%rip),%ymm9        # 6900 <_sk_callback_avx+0x529>
   DB  196,193,60,84,193                   ; vandps        %ymm9,%ymm8,%ymm0
   DB  196,193,249,126,193                 ; vmovq         %xmm0,%r9
   DB  69,137,203                          ; mov           %r9d,%r11d
@@ -6853,7 +6881,7 @@ _sk_load_tables_avx LABEL PROC
   DB  196,193,97,114,210,24               ; vpsrld        $0x18,%xmm10,%xmm3
   DB  196,227,61,24,219,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,55,70,0,0           ; vbroadcastss  0x4637(%rip),%ymm8        # 658c <_sk_callback_avx+0x24e>
+  DB  196,98,125,24,5,207,70,0,0          ; vbroadcastss  0x46cf(%rip),%ymm8        # 6624 <_sk_callback_avx+0x24d>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -6943,7 +6971,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
   DB  197,177,108,208                     ; vpunpcklqdq   %xmm0,%xmm9,%xmm2
   DB  197,177,109,200                     ; vpunpckhqdq   %xmm0,%xmm9,%xmm1
   DB  196,65,57,108,212                   ; vpunpcklqdq   %xmm12,%xmm8,%xmm10
-  DB  197,121,111,29,58,72,0,0            ; vmovdqa       0x483a(%rip),%xmm11        # 68e0 <_sk_callback_avx+0x5a2>
+  DB  197,121,111,29,218,72,0,0           ; vmovdqa       0x48da(%rip),%xmm11        # 6980 <_sk_callback_avx+0x5a9>
   DB  196,193,105,219,195                 ; vpand         %xmm11,%xmm2,%xmm0
   DB  196,65,49,239,201                   ; vpxor         %xmm9,%xmm9,%xmm9
   DB  196,193,121,105,209                 ; vpunpckhwd    %xmm9,%xmm0,%xmm2
@@ -7042,7 +7070,7 @@ _sk_load_tables_u16_be_avx LABEL PROC
   DB  196,226,121,51,219                  ; vpmovzxwd     %xmm3,%xmm3
   DB  196,195,101,24,216,1                ; vinsertf128   $0x1,%xmm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,232,66,0,0          ; vbroadcastss  0x42e8(%rip),%ymm8        # 6590 <_sk_callback_avx+0x252>
+  DB  196,98,125,24,5,128,67,0,0          ; vbroadcastss  0x4380(%rip),%ymm8        # 6628 <_sk_callback_avx+0x251>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -7112,7 +7140,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
   DB  197,185,108,202                     ; vpunpcklqdq   %xmm2,%xmm8,%xmm1
   DB  197,185,109,210                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm2
   DB  197,121,108,195                     ; vpunpcklqdq   %xmm3,%xmm0,%xmm8
-  DB  197,121,111,13,51,69,0,0            ; vmovdqa       0x4533(%rip),%xmm9        # 68f0 <_sk_callback_avx+0x5b2>
+  DB  197,121,111,13,211,69,0,0           ; vmovdqa       0x45d3(%rip),%xmm9        # 6990 <_sk_callback_avx+0x5b9>
   DB  196,193,113,219,193                 ; vpand         %xmm9,%xmm1,%xmm0
   DB  196,65,41,239,210                   ; vpxor         %xmm10,%xmm10,%xmm10
   DB  196,193,121,105,202                 ; vpunpckhwd    %xmm10,%xmm0,%xmm1
@@ -7204,7 +7232,7 @@ _sk_load_tables_rgb_u16_be_avx LABEL PROC
   DB  196,227,105,33,211,48               ; vinsertps     $0x30,%xmm3,%xmm2,%xmm2
   DB  196,195,109,24,208,1                ; vinsertf128   $0x1,%xmm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,250,63,0,0        ; vbroadcastss  0x3ffa(%rip),%ymm3        # 6594 <_sk_callback_avx+0x256>
+  DB  196,226,125,24,29,146,64,0,0        ; vbroadcastss  0x4092(%rip),%ymm3        # 662c <_sk_callback_avx+0x255>
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,93                               ; pop           %r13
@@ -7255,7 +7283,7 @@ _sk_byte_tables_avx LABEL PROC
   DB  65,84                               ; push          %r12
   DB  83                                  ; push          %rbx
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,46,63,0,0           ; vbroadcastss  0x3f2e(%rip),%ymm8        # 6598 <_sk_callback_avx+0x25a>
+  DB  196,98,125,24,5,198,63,0,0          ; vbroadcastss  0x3fc6(%rip),%ymm8        # 6630 <_sk_callback_avx+0x259>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,195,249,22,192,1                ; vpextrq       $0x1,%xmm0,%r8
@@ -7292,7 +7320,7 @@ _sk_byte_tables_avx LABEL PROC
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,53,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,124,62,0,0         ; vbroadcastss  0x3e7c(%rip),%ymm9        # 659c <_sk_callback_avx+0x25e>
+  DB  196,98,125,24,13,20,63,0,0          ; vbroadcastss  0x3f14(%rip),%ymm9        # 6634 <_sk_callback_avx+0x25d>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -7452,7 +7480,7 @@ _sk_byte_tables_rgb_avx LABEL PROC
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,53,24,192,1                 ; vinsertf128   $0x1,%xmm0,%ymm9,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,162,59,0,0         ; vbroadcastss  0x3ba2(%rip),%ymm9        # 65a0 <_sk_callback_avx+0x262>
+  DB  196,98,125,24,13,58,60,0,0          ; vbroadcastss  0x3c3a(%rip),%ymm9        # 6638 <_sk_callback_avx+0x261>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  197,188,89,201                      ; vmulps        %ymm1,%ymm8,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
@@ -7739,36 +7767,36 @@ _sk_parametric_r_avx LABEL PROC
   DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,216                      ; vcvtdq2ps     %ymm0,%ymm11
-  DB  196,98,125,24,37,0,55,0,0           ; vbroadcastss  0x3700(%rip),%ymm12        # 65a4 <_sk_callback_avx+0x266>
+  DB  196,98,125,24,37,152,55,0,0         ; vbroadcastss  0x3798(%rip),%ymm12        # 663c <_sk_callback_avx+0x265>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,246,54,0,0         ; vbroadcastss  0x36f6(%rip),%ymm12        # 65a8 <_sk_callback_avx+0x26a>
+  DB  196,98,125,24,37,142,55,0,0         ; vbroadcastss  0x378e(%rip),%ymm12        # 6640 <_sk_callback_avx+0x269>
   DB  196,193,124,84,196                  ; vandps        %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,236,54,0,0         ; vbroadcastss  0x36ec(%rip),%ymm12        # 65ac <_sk_callback_avx+0x26e>
+  DB  196,98,125,24,37,132,55,0,0         ; vbroadcastss  0x3784(%rip),%ymm12        # 6644 <_sk_callback_avx+0x26d>
   DB  196,193,124,86,196                  ; vorps         %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,226,54,0,0         ; vbroadcastss  0x36e2(%rip),%ymm12        # 65b0 <_sk_callback_avx+0x272>
+  DB  196,98,125,24,37,122,55,0,0         ; vbroadcastss  0x377a(%rip),%ymm12        # 6648 <_sk_callback_avx+0x271>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,216,54,0,0         ; vbroadcastss  0x36d8(%rip),%ymm12        # 65b4 <_sk_callback_avx+0x276>
+  DB  196,98,125,24,37,112,55,0,0         ; vbroadcastss  0x3770(%rip),%ymm12        # 664c <_sk_callback_avx+0x275>
   DB  196,65,124,89,228                   ; vmulps        %ymm12,%ymm0,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,201,54,0,0         ; vbroadcastss  0x36c9(%rip),%ymm12        # 65b8 <_sk_callback_avx+0x27a>
+  DB  196,98,125,24,37,97,55,0,0          ; vbroadcastss  0x3761(%rip),%ymm12        # 6650 <_sk_callback_avx+0x279>
   DB  196,193,124,88,196                  ; vaddps        %ymm12,%ymm0,%ymm0
-  DB  196,98,125,24,37,191,54,0,0         ; vbroadcastss  0x36bf(%rip),%ymm12        # 65bc <_sk_callback_avx+0x27e>
+  DB  196,98,125,24,37,87,55,0,0          ; vbroadcastss  0x3757(%rip),%ymm12        # 6654 <_sk_callback_avx+0x27d>
   DB  197,156,94,192                      ; vdivps        %ymm0,%ymm12,%ymm0
   DB  197,164,92,192                      ; vsubps        %ymm0,%ymm11,%ymm0
   DB  197,172,89,192                      ; vmulps        %ymm0,%ymm10,%ymm0
   DB  196,99,125,8,208,1                  ; vroundps      $0x1,%ymm0,%ymm10
   DB  196,65,124,92,210                   ; vsubps        %ymm10,%ymm0,%ymm10
-  DB  196,98,125,24,29,163,54,0,0         ; vbroadcastss  0x36a3(%rip),%ymm11        # 65c0 <_sk_callback_avx+0x282>
+  DB  196,98,125,24,29,59,55,0,0          ; vbroadcastss  0x373b(%rip),%ymm11        # 6658 <_sk_callback_avx+0x281>
   DB  196,193,124,88,195                  ; vaddps        %ymm11,%ymm0,%ymm0
-  DB  196,98,125,24,29,153,54,0,0         ; vbroadcastss  0x3699(%rip),%ymm11        # 65c4 <_sk_callback_avx+0x286>
+  DB  196,98,125,24,29,49,55,0,0          ; vbroadcastss  0x3731(%rip),%ymm11        # 665c <_sk_callback_avx+0x285>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,124,92,195                  ; vsubps        %ymm11,%ymm0,%ymm0
-  DB  196,98,125,24,29,138,54,0,0         ; vbroadcastss  0x368a(%rip),%ymm11        # 65c8 <_sk_callback_avx+0x28a>
+  DB  196,98,125,24,29,34,55,0,0          ; vbroadcastss  0x3722(%rip),%ymm11        # 6660 <_sk_callback_avx+0x289>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,128,54,0,0         ; vbroadcastss  0x3680(%rip),%ymm11        # 65cc <_sk_callback_avx+0x28e>
+  DB  196,98,125,24,29,24,55,0,0          ; vbroadcastss  0x3718(%rip),%ymm11        # 6664 <_sk_callback_avx+0x28d>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,124,88,194                  ; vaddps        %ymm10,%ymm0,%ymm0
-  DB  196,98,125,24,21,113,54,0,0         ; vbroadcastss  0x3671(%rip),%ymm10        # 65d0 <_sk_callback_avx+0x292>
+  DB  196,98,125,24,21,9,55,0,0           ; vbroadcastss  0x3709(%rip),%ymm10        # 6668 <_sk_callback_avx+0x291>
   DB  196,193,124,89,194                  ; vmulps        %ymm10,%ymm0,%ymm0
   DB  197,253,91,192                      ; vcvtps2dq     %ymm0,%ymm0
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -7776,7 +7804,7 @@ _sk_parametric_r_avx LABEL PROC
   DB  196,195,125,74,193,128              ; vblendvps     %ymm8,%ymm9,%ymm0,%ymm0
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,124,95,192                  ; vmaxps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,72,54,0,0           ; vbroadcastss  0x3648(%rip),%ymm8        # 65d4 <_sk_callback_avx+0x296>
+  DB  196,98,125,24,5,224,54,0,0          ; vbroadcastss  0x36e0(%rip),%ymm8        # 666c <_sk_callback_avx+0x295>
   DB  196,193,124,93,192                  ; vminps        %ymm8,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7796,36 +7824,36 @@ _sk_parametric_g_avx LABEL PROC
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,217                      ; vcvtdq2ps     %ymm1,%ymm11
-  DB  196,98,125,24,37,249,53,0,0         ; vbroadcastss  0x35f9(%rip),%ymm12        # 65d8 <_sk_callback_avx+0x29a>
+  DB  196,98,125,24,37,145,54,0,0         ; vbroadcastss  0x3691(%rip),%ymm12        # 6670 <_sk_callback_avx+0x299>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,239,53,0,0         ; vbroadcastss  0x35ef(%rip),%ymm12        # 65dc <_sk_callback_avx+0x29e>
+  DB  196,98,125,24,37,135,54,0,0         ; vbroadcastss  0x3687(%rip),%ymm12        # 6674 <_sk_callback_avx+0x29d>
   DB  196,193,116,84,204                  ; vandps        %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,229,53,0,0         ; vbroadcastss  0x35e5(%rip),%ymm12        # 65e0 <_sk_callback_avx+0x2a2>
+  DB  196,98,125,24,37,125,54,0,0         ; vbroadcastss  0x367d(%rip),%ymm12        # 6678 <_sk_callback_avx+0x2a1>
   DB  196,193,116,86,204                  ; vorps         %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,219,53,0,0         ; vbroadcastss  0x35db(%rip),%ymm12        # 65e4 <_sk_callback_avx+0x2a6>
+  DB  196,98,125,24,37,115,54,0,0         ; vbroadcastss  0x3673(%rip),%ymm12        # 667c <_sk_callback_avx+0x2a5>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,209,53,0,0         ; vbroadcastss  0x35d1(%rip),%ymm12        # 65e8 <_sk_callback_avx+0x2aa>
+  DB  196,98,125,24,37,105,54,0,0         ; vbroadcastss  0x3669(%rip),%ymm12        # 6680 <_sk_callback_avx+0x2a9>
   DB  196,65,116,89,228                   ; vmulps        %ymm12,%ymm1,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,194,53,0,0         ; vbroadcastss  0x35c2(%rip),%ymm12        # 65ec <_sk_callback_avx+0x2ae>
+  DB  196,98,125,24,37,90,54,0,0          ; vbroadcastss  0x365a(%rip),%ymm12        # 6684 <_sk_callback_avx+0x2ad>
   DB  196,193,116,88,204                  ; vaddps        %ymm12,%ymm1,%ymm1
-  DB  196,98,125,24,37,184,53,0,0         ; vbroadcastss  0x35b8(%rip),%ymm12        # 65f0 <_sk_callback_avx+0x2b2>
+  DB  196,98,125,24,37,80,54,0,0          ; vbroadcastss  0x3650(%rip),%ymm12        # 6688 <_sk_callback_avx+0x2b1>
   DB  197,156,94,201                      ; vdivps        %ymm1,%ymm12,%ymm1
   DB  197,164,92,201                      ; vsubps        %ymm1,%ymm11,%ymm1
   DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
   DB  196,99,125,8,209,1                  ; vroundps      $0x1,%ymm1,%ymm10
   DB  196,65,116,92,210                   ; vsubps        %ymm10,%ymm1,%ymm10
-  DB  196,98,125,24,29,156,53,0,0         ; vbroadcastss  0x359c(%rip),%ymm11        # 65f4 <_sk_callback_avx+0x2b6>
+  DB  196,98,125,24,29,52,54,0,0          ; vbroadcastss  0x3634(%rip),%ymm11        # 668c <_sk_callback_avx+0x2b5>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,146,53,0,0         ; vbroadcastss  0x3592(%rip),%ymm11        # 65f8 <_sk_callback_avx+0x2ba>
+  DB  196,98,125,24,29,42,54,0,0          ; vbroadcastss  0x362a(%rip),%ymm11        # 6690 <_sk_callback_avx+0x2b9>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,116,92,203                  ; vsubps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,29,131,53,0,0         ; vbroadcastss  0x3583(%rip),%ymm11        # 65fc <_sk_callback_avx+0x2be>
+  DB  196,98,125,24,29,27,54,0,0          ; vbroadcastss  0x361b(%rip),%ymm11        # 6694 <_sk_callback_avx+0x2bd>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,121,53,0,0         ; vbroadcastss  0x3579(%rip),%ymm11        # 6600 <_sk_callback_avx+0x2c2>
+  DB  196,98,125,24,29,17,54,0,0          ; vbroadcastss  0x3611(%rip),%ymm11        # 6698 <_sk_callback_avx+0x2c1>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,116,88,202                  ; vaddps        %ymm10,%ymm1,%ymm1
-  DB  196,98,125,24,21,106,53,0,0         ; vbroadcastss  0x356a(%rip),%ymm10        # 6604 <_sk_callback_avx+0x2c6>
+  DB  196,98,125,24,21,2,54,0,0           ; vbroadcastss  0x3602(%rip),%ymm10        # 669c <_sk_callback_avx+0x2c5>
   DB  196,193,116,89,202                  ; vmulps        %ymm10,%ymm1,%ymm1
   DB  197,253,91,201                      ; vcvtps2dq     %ymm1,%ymm1
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -7833,7 +7861,7 @@ _sk_parametric_g_avx LABEL PROC
   DB  196,195,117,74,201,128              ; vblendvps     %ymm8,%ymm9,%ymm1,%ymm1
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,116,95,200                  ; vmaxps        %ymm8,%ymm1,%ymm1
-  DB  196,98,125,24,5,65,53,0,0           ; vbroadcastss  0x3541(%rip),%ymm8        # 6608 <_sk_callback_avx+0x2ca>
+  DB  196,98,125,24,5,217,53,0,0          ; vbroadcastss  0x35d9(%rip),%ymm8        # 66a0 <_sk_callback_avx+0x2c9>
   DB  196,193,116,93,200                  ; vminps        %ymm8,%ymm1,%ymm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7853,36 +7881,36 @@ _sk_parametric_b_avx LABEL PROC
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,218                      ; vcvtdq2ps     %ymm2,%ymm11
-  DB  196,98,125,24,37,242,52,0,0         ; vbroadcastss  0x34f2(%rip),%ymm12        # 660c <_sk_callback_avx+0x2ce>
+  DB  196,98,125,24,37,138,53,0,0         ; vbroadcastss  0x358a(%rip),%ymm12        # 66a4 <_sk_callback_avx+0x2cd>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,232,52,0,0         ; vbroadcastss  0x34e8(%rip),%ymm12        # 6610 <_sk_callback_avx+0x2d2>
+  DB  196,98,125,24,37,128,53,0,0         ; vbroadcastss  0x3580(%rip),%ymm12        # 66a8 <_sk_callback_avx+0x2d1>
   DB  196,193,108,84,212                  ; vandps        %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,222,52,0,0         ; vbroadcastss  0x34de(%rip),%ymm12        # 6614 <_sk_callback_avx+0x2d6>
+  DB  196,98,125,24,37,118,53,0,0         ; vbroadcastss  0x3576(%rip),%ymm12        # 66ac <_sk_callback_avx+0x2d5>
   DB  196,193,108,86,212                  ; vorps         %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,212,52,0,0         ; vbroadcastss  0x34d4(%rip),%ymm12        # 6618 <_sk_callback_avx+0x2da>
+  DB  196,98,125,24,37,108,53,0,0         ; vbroadcastss  0x356c(%rip),%ymm12        # 66b0 <_sk_callback_avx+0x2d9>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,202,52,0,0         ; vbroadcastss  0x34ca(%rip),%ymm12        # 661c <_sk_callback_avx+0x2de>
+  DB  196,98,125,24,37,98,53,0,0          ; vbroadcastss  0x3562(%rip),%ymm12        # 66b4 <_sk_callback_avx+0x2dd>
   DB  196,65,108,89,228                   ; vmulps        %ymm12,%ymm2,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,187,52,0,0         ; vbroadcastss  0x34bb(%rip),%ymm12        # 6620 <_sk_callback_avx+0x2e2>
+  DB  196,98,125,24,37,83,53,0,0          ; vbroadcastss  0x3553(%rip),%ymm12        # 66b8 <_sk_callback_avx+0x2e1>
   DB  196,193,108,88,212                  ; vaddps        %ymm12,%ymm2,%ymm2
-  DB  196,98,125,24,37,177,52,0,0         ; vbroadcastss  0x34b1(%rip),%ymm12        # 6624 <_sk_callback_avx+0x2e6>
+  DB  196,98,125,24,37,73,53,0,0          ; vbroadcastss  0x3549(%rip),%ymm12        # 66bc <_sk_callback_avx+0x2e5>
   DB  197,156,94,210                      ; vdivps        %ymm2,%ymm12,%ymm2
   DB  197,164,92,210                      ; vsubps        %ymm2,%ymm11,%ymm2
   DB  197,172,89,210                      ; vmulps        %ymm2,%ymm10,%ymm2
   DB  196,99,125,8,210,1                  ; vroundps      $0x1,%ymm2,%ymm10
   DB  196,65,108,92,210                   ; vsubps        %ymm10,%ymm2,%ymm10
-  DB  196,98,125,24,29,149,52,0,0         ; vbroadcastss  0x3495(%rip),%ymm11        # 6628 <_sk_callback_avx+0x2ea>
+  DB  196,98,125,24,29,45,53,0,0          ; vbroadcastss  0x352d(%rip),%ymm11        # 66c0 <_sk_callback_avx+0x2e9>
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
-  DB  196,98,125,24,29,139,52,0,0         ; vbroadcastss  0x348b(%rip),%ymm11        # 662c <_sk_callback_avx+0x2ee>
+  DB  196,98,125,24,29,35,53,0,0          ; vbroadcastss  0x3523(%rip),%ymm11        # 66c4 <_sk_callback_avx+0x2ed>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,108,92,211                  ; vsubps        %ymm11,%ymm2,%ymm2
-  DB  196,98,125,24,29,124,52,0,0         ; vbroadcastss  0x347c(%rip),%ymm11        # 6630 <_sk_callback_avx+0x2f2>
+  DB  196,98,125,24,29,20,53,0,0          ; vbroadcastss  0x3514(%rip),%ymm11        # 66c8 <_sk_callback_avx+0x2f1>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,114,52,0,0         ; vbroadcastss  0x3472(%rip),%ymm11        # 6634 <_sk_callback_avx+0x2f6>
+  DB  196,98,125,24,29,10,53,0,0          ; vbroadcastss  0x350a(%rip),%ymm11        # 66cc <_sk_callback_avx+0x2f5>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,108,88,210                  ; vaddps        %ymm10,%ymm2,%ymm2
-  DB  196,98,125,24,21,99,52,0,0          ; vbroadcastss  0x3463(%rip),%ymm10        # 6638 <_sk_callback_avx+0x2fa>
+  DB  196,98,125,24,21,251,52,0,0         ; vbroadcastss  0x34fb(%rip),%ymm10        # 66d0 <_sk_callback_avx+0x2f9>
   DB  196,193,108,89,210                  ; vmulps        %ymm10,%ymm2,%ymm2
   DB  197,253,91,210                      ; vcvtps2dq     %ymm2,%ymm2
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -7890,7 +7918,7 @@ _sk_parametric_b_avx LABEL PROC
   DB  196,195,109,74,209,128              ; vblendvps     %ymm8,%ymm9,%ymm2,%ymm2
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,108,95,208                  ; vmaxps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,58,52,0,0           ; vbroadcastss  0x343a(%rip),%ymm8        # 663c <_sk_callback_avx+0x2fe>
+  DB  196,98,125,24,5,210,52,0,0          ; vbroadcastss  0x34d2(%rip),%ymm8        # 66d4 <_sk_callback_avx+0x2fd>
   DB  196,193,108,93,208                  ; vminps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -7910,36 +7938,36 @@ _sk_parametric_a_avx LABEL PROC
   DB  196,193,100,88,219                  ; vaddps        %ymm11,%ymm3,%ymm3
   DB  196,98,125,24,16                    ; vbroadcastss  (%rax),%ymm10
   DB  197,124,91,219                      ; vcvtdq2ps     %ymm3,%ymm11
-  DB  196,98,125,24,37,235,51,0,0         ; vbroadcastss  0x33eb(%rip),%ymm12        # 6640 <_sk_callback_avx+0x302>
+  DB  196,98,125,24,37,131,52,0,0         ; vbroadcastss  0x3483(%rip),%ymm12        # 66d8 <_sk_callback_avx+0x301>
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,225,51,0,0         ; vbroadcastss  0x33e1(%rip),%ymm12        # 6644 <_sk_callback_avx+0x306>
+  DB  196,98,125,24,37,121,52,0,0         ; vbroadcastss  0x3479(%rip),%ymm12        # 66dc <_sk_callback_avx+0x305>
   DB  196,193,100,84,220                  ; vandps        %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,215,51,0,0         ; vbroadcastss  0x33d7(%rip),%ymm12        # 6648 <_sk_callback_avx+0x30a>
+  DB  196,98,125,24,37,111,52,0,0         ; vbroadcastss  0x346f(%rip),%ymm12        # 66e0 <_sk_callback_avx+0x309>
   DB  196,193,100,86,220                  ; vorps         %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,205,51,0,0         ; vbroadcastss  0x33cd(%rip),%ymm12        # 664c <_sk_callback_avx+0x30e>
+  DB  196,98,125,24,37,101,52,0,0         ; vbroadcastss  0x3465(%rip),%ymm12        # 66e4 <_sk_callback_avx+0x30d>
   DB  196,65,36,88,220                    ; vaddps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,195,51,0,0         ; vbroadcastss  0x33c3(%rip),%ymm12        # 6650 <_sk_callback_avx+0x312>
+  DB  196,98,125,24,37,91,52,0,0          ; vbroadcastss  0x345b(%rip),%ymm12        # 66e8 <_sk_callback_avx+0x311>
   DB  196,65,100,89,228                   ; vmulps        %ymm12,%ymm3,%ymm12
   DB  196,65,36,92,220                    ; vsubps        %ymm12,%ymm11,%ymm11
-  DB  196,98,125,24,37,180,51,0,0         ; vbroadcastss  0x33b4(%rip),%ymm12        # 6654 <_sk_callback_avx+0x316>
+  DB  196,98,125,24,37,76,52,0,0          ; vbroadcastss  0x344c(%rip),%ymm12        # 66ec <_sk_callback_avx+0x315>
   DB  196,193,100,88,220                  ; vaddps        %ymm12,%ymm3,%ymm3
-  DB  196,98,125,24,37,170,51,0,0         ; vbroadcastss  0x33aa(%rip),%ymm12        # 6658 <_sk_callback_avx+0x31a>
+  DB  196,98,125,24,37,66,52,0,0          ; vbroadcastss  0x3442(%rip),%ymm12        # 66f0 <_sk_callback_avx+0x319>
   DB  197,156,94,219                      ; vdivps        %ymm3,%ymm12,%ymm3
   DB  197,164,92,219                      ; vsubps        %ymm3,%ymm11,%ymm3
   DB  197,172,89,219                      ; vmulps        %ymm3,%ymm10,%ymm3
   DB  196,99,125,8,211,1                  ; vroundps      $0x1,%ymm3,%ymm10
   DB  196,65,100,92,210                   ; vsubps        %ymm10,%ymm3,%ymm10
-  DB  196,98,125,24,29,142,51,0,0         ; vbroadcastss  0x338e(%rip),%ymm11        # 665c <_sk_callback_avx+0x31e>
+  DB  196,98,125,24,29,38,52,0,0          ; vbroadcastss  0x3426(%rip),%ymm11        # 66f4 <_sk_callback_avx+0x31d>
   DB  196,193,100,88,219                  ; vaddps        %ymm11,%ymm3,%ymm3
-  DB  196,98,125,24,29,132,51,0,0         ; vbroadcastss  0x3384(%rip),%ymm11        # 6660 <_sk_callback_avx+0x322>
+  DB  196,98,125,24,29,28,52,0,0          ; vbroadcastss  0x341c(%rip),%ymm11        # 66f8 <_sk_callback_avx+0x321>
   DB  196,65,44,89,219                    ; vmulps        %ymm11,%ymm10,%ymm11
   DB  196,193,100,92,219                  ; vsubps        %ymm11,%ymm3,%ymm3
-  DB  196,98,125,24,29,117,51,0,0         ; vbroadcastss  0x3375(%rip),%ymm11        # 6664 <_sk_callback_avx+0x326>
+  DB  196,98,125,24,29,13,52,0,0          ; vbroadcastss  0x340d(%rip),%ymm11        # 66fc <_sk_callback_avx+0x325>
   DB  196,65,36,92,210                    ; vsubps        %ymm10,%ymm11,%ymm10
-  DB  196,98,125,24,29,107,51,0,0         ; vbroadcastss  0x336b(%rip),%ymm11        # 6668 <_sk_callback_avx+0x32a>
+  DB  196,98,125,24,29,3,52,0,0           ; vbroadcastss  0x3403(%rip),%ymm11        # 6700 <_sk_callback_avx+0x329>
   DB  196,65,36,94,210                    ; vdivps        %ymm10,%ymm11,%ymm10
   DB  196,193,100,88,218                  ; vaddps        %ymm10,%ymm3,%ymm3
-  DB  196,98,125,24,21,92,51,0,0          ; vbroadcastss  0x335c(%rip),%ymm10        # 666c <_sk_callback_avx+0x32e>
+  DB  196,98,125,24,21,244,51,0,0         ; vbroadcastss  0x33f4(%rip),%ymm10        # 6704 <_sk_callback_avx+0x32d>
   DB  196,193,100,89,218                  ; vmulps        %ymm10,%ymm3,%ymm3
   DB  197,253,91,219                      ; vcvtps2dq     %ymm3,%ymm3
   DB  196,98,125,24,80,20                 ; vbroadcastss  0x14(%rax),%ymm10
@@ -7947,38 +7975,38 @@ _sk_parametric_a_avx LABEL PROC
   DB  196,195,101,74,217,128              ; vblendvps     %ymm8,%ymm9,%ymm3,%ymm3
   DB  196,65,60,87,192                    ; vxorps        %ymm8,%ymm8,%ymm8
   DB  196,193,100,95,216                  ; vmaxps        %ymm8,%ymm3,%ymm3
-  DB  196,98,125,24,5,51,51,0,0           ; vbroadcastss  0x3333(%rip),%ymm8        # 6670 <_sk_callback_avx+0x332>
+  DB  196,98,125,24,5,203,51,0,0          ; vbroadcastss  0x33cb(%rip),%ymm8        # 6708 <_sk_callback_avx+0x331>
   DB  196,193,100,93,216                  ; vminps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_lab_to_xyz_avx
 _sk_lab_to_xyz_avx LABEL PROC
-  DB  196,98,125,24,5,37,51,0,0           ; vbroadcastss  0x3325(%rip),%ymm8        # 6674 <_sk_callback_avx+0x336>
+  DB  196,98,125,24,5,189,51,0,0          ; vbroadcastss  0x33bd(%rip),%ymm8        # 670c <_sk_callback_avx+0x335>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,27,51,0,0           ; vbroadcastss  0x331b(%rip),%ymm8        # 6678 <_sk_callback_avx+0x33a>
+  DB  196,98,125,24,5,179,51,0,0          ; vbroadcastss  0x33b3(%rip),%ymm8        # 6710 <_sk_callback_avx+0x339>
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
-  DB  196,98,125,24,13,17,51,0,0          ; vbroadcastss  0x3311(%rip),%ymm9        # 667c <_sk_callback_avx+0x33e>
+  DB  196,98,125,24,13,169,51,0,0         ; vbroadcastss  0x33a9(%rip),%ymm9        # 6714 <_sk_callback_avx+0x33d>
   DB  196,193,116,88,201                  ; vaddps        %ymm9,%ymm1,%ymm1
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  196,193,108,88,209                  ; vaddps        %ymm9,%ymm2,%ymm2
-  DB  196,98,125,24,5,253,50,0,0          ; vbroadcastss  0x32fd(%rip),%ymm8        # 6680 <_sk_callback_avx+0x342>
+  DB  196,98,125,24,5,149,51,0,0          ; vbroadcastss  0x3395(%rip),%ymm8        # 6718 <_sk_callback_avx+0x341>
   DB  196,193,124,88,192                  ; vaddps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,243,50,0,0          ; vbroadcastss  0x32f3(%rip),%ymm8        # 6684 <_sk_callback_avx+0x346>
+  DB  196,98,125,24,5,139,51,0,0          ; vbroadcastss  0x338b(%rip),%ymm8        # 671c <_sk_callback_avx+0x345>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,5,233,50,0,0          ; vbroadcastss  0x32e9(%rip),%ymm8        # 6688 <_sk_callback_avx+0x34a>
+  DB  196,98,125,24,5,129,51,0,0          ; vbroadcastss  0x3381(%rip),%ymm8        # 6720 <_sk_callback_avx+0x349>
   DB  196,193,116,89,200                  ; vmulps        %ymm8,%ymm1,%ymm1
   DB  197,252,88,201                      ; vaddps        %ymm1,%ymm0,%ymm1
-  DB  196,98,125,24,5,219,50,0,0          ; vbroadcastss  0x32db(%rip),%ymm8        # 668c <_sk_callback_avx+0x34e>
+  DB  196,98,125,24,5,115,51,0,0          ; vbroadcastss  0x3373(%rip),%ymm8        # 6724 <_sk_callback_avx+0x34d>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  197,252,92,210                      ; vsubps        %ymm2,%ymm0,%ymm2
   DB  197,116,89,193                      ; vmulps        %ymm1,%ymm1,%ymm8
   DB  196,65,116,89,192                   ; vmulps        %ymm8,%ymm1,%ymm8
-  DB  196,98,125,24,13,196,50,0,0         ; vbroadcastss  0x32c4(%rip),%ymm9        # 6690 <_sk_callback_avx+0x352>
+  DB  196,98,125,24,13,92,51,0,0          ; vbroadcastss  0x335c(%rip),%ymm9        # 6728 <_sk_callback_avx+0x351>
   DB  196,65,52,194,208,1                 ; vcmpltps      %ymm8,%ymm9,%ymm10
-  DB  196,98,125,24,29,185,50,0,0         ; vbroadcastss  0x32b9(%rip),%ymm11        # 6694 <_sk_callback_avx+0x356>
+  DB  196,98,125,24,29,81,51,0,0          ; vbroadcastss  0x3351(%rip),%ymm11        # 672c <_sk_callback_avx+0x355>
   DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
-  DB  196,98,125,24,37,175,50,0,0         ; vbroadcastss  0x32af(%rip),%ymm12        # 6698 <_sk_callback_avx+0x35a>
+  DB  196,98,125,24,37,71,51,0,0          ; vbroadcastss  0x3347(%rip),%ymm12        # 6730 <_sk_callback_avx+0x359>
   DB  196,193,116,89,204                  ; vmulps        %ymm12,%ymm1,%ymm1
   DB  196,67,117,74,192,160               ; vblendvps     %ymm10,%ymm8,%ymm1,%ymm8
   DB  197,252,89,200                      ; vmulps        %ymm0,%ymm0,%ymm1
@@ -7993,9 +8021,9 @@ _sk_lab_to_xyz_avx LABEL PROC
   DB  196,193,108,88,211                  ; vaddps        %ymm11,%ymm2,%ymm2
   DB  196,193,108,89,212                  ; vmulps        %ymm12,%ymm2,%ymm2
   DB  196,227,109,74,208,144              ; vblendvps     %ymm9,%ymm0,%ymm2,%ymm2
-  DB  196,226,125,24,5,101,50,0,0         ; vbroadcastss  0x3265(%rip),%ymm0        # 669c <_sk_callback_avx+0x35e>
+  DB  196,226,125,24,5,253,50,0,0         ; vbroadcastss  0x32fd(%rip),%ymm0        # 6734 <_sk_callback_avx+0x35d>
   DB  197,188,89,192                      ; vmulps        %ymm0,%ymm8,%ymm0
-  DB  196,98,125,24,5,92,50,0,0           ; vbroadcastss  0x325c(%rip),%ymm8        # 66a0 <_sk_callback_avx+0x362>
+  DB  196,98,125,24,5,244,50,0,0          ; vbroadcastss  0x32f4(%rip),%ymm8        # 6738 <_sk_callback_avx+0x361>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8014,7 +8042,7 @@ _sk_load_a8_avx LABEL PROC
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,32,50,0,0         ; vbroadcastss  0x3220(%rip),%ymm1        # 66a4 <_sk_callback_avx+0x366>
+  DB  196,226,125,24,13,184,50,0,0        ; vbroadcastss  0x32b8(%rip),%ymm1        # 673c <_sk_callback_avx+0x365>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -8081,7 +8109,7 @@ _sk_gather_a8_avx LABEL PROC
   DB  196,226,121,49,201                  ; vpmovzxbd     %xmm1,%xmm1
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,21,49,0,0         ; vbroadcastss  0x3115(%rip),%ymm1        # 66a8 <_sk_callback_avx+0x36a>
+  DB  196,226,125,24,13,173,49,0,0        ; vbroadcastss  0x31ad(%rip),%ymm1        # 6740 <_sk_callback_avx+0x369>
   DB  197,252,89,217                      ; vmulps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  197,252,87,192                      ; vxorps        %ymm0,%ymm0,%ymm0
@@ -8097,7 +8125,7 @@ PUBLIC _sk_store_a8_avx
 _sk_store_a8_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,240,48,0,0          ; vbroadcastss  0x30f0(%rip),%ymm8        # 66ac <_sk_callback_avx+0x36e>
+  DB  196,98,125,24,5,136,49,0,0          ; vbroadcastss  0x3188(%rip),%ymm8        # 6744 <_sk_callback_avx+0x36d>
   DB  196,65,100,89,192                   ; vmulps        %ymm8,%ymm3,%ymm8
   DB  196,65,125,91,192                   ; vcvtps2dq     %ymm8,%ymm8
   DB  196,67,125,25,193,1                 ; vextractf128  $0x1,%ymm8,%xmm9
@@ -8165,10 +8193,10 @@ _sk_load_g8_avx LABEL PROC
   DB  196,226,121,49,192                  ; vpmovzxbd     %xmm0,%xmm0
   DB  196,227,117,24,192,1                ; vinsertf128   $0x1,%xmm0,%ymm1,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,21,48,0,0         ; vbroadcastss  0x3015(%rip),%ymm1        # 66b0 <_sk_callback_avx+0x372>
+  DB  196,226,125,24,13,173,48,0,0        ; vbroadcastss  0x30ad(%rip),%ymm1        # 6748 <_sk_callback_avx+0x371>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,10,48,0,0         ; vbroadcastss  0x300a(%rip),%ymm3        # 66b4 <_sk_callback_avx+0x376>
+  DB  196,226,125,24,29,162,48,0,0        ; vbroadcastss  0x30a2(%rip),%ymm3        # 674c <_sk_callback_avx+0x375>
   DB  76,137,193                          ; mov           %r8,%rcx
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
@@ -8232,10 +8260,10 @@ _sk_gather_g8_avx LABEL PROC
   DB  196,226,121,49,201                  ; vpmovzxbd     %xmm1,%xmm1
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,9,47,0,0          ; vbroadcastss  0x2f09(%rip),%ymm1        # 66b8 <_sk_callback_avx+0x37a>
+  DB  196,226,125,24,13,161,47,0,0        ; vbroadcastss  0x2fa1(%rip),%ymm1        # 6750 <_sk_callback_avx+0x379>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,254,46,0,0        ; vbroadcastss  0x2efe(%rip),%ymm3        # 66bc <_sk_callback_avx+0x37e>
+  DB  196,226,125,24,29,150,47,0,0        ; vbroadcastss  0x2f96(%rip),%ymm3        # 6754 <_sk_callback_avx+0x37d>
   DB  197,252,40,200                      ; vmovaps       %ymm0,%ymm1
   DB  197,252,40,208                      ; vmovaps       %ymm0,%ymm2
   DB  91                                  ; pop           %rbx
@@ -8313,10 +8341,10 @@ _sk_gather_i8_avx LABEL PROC
   DB  196,163,121,34,4,163,2              ; vpinsrd       $0x2,(%rbx,%r12,4),%xmm0,%xmm0
   DB  196,163,121,34,28,19,3              ; vpinsrd       $0x3,(%rbx,%r10,1),%xmm0,%xmm3
   DB  196,227,61,24,195,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  DB  197,124,40,21,118,47,0,0            ; vmovaps       0x2f76(%rip),%ymm10        # 6880 <_sk_callback_avx+0x542>
+  DB  197,124,40,21,22,48,0,0             ; vmovaps       0x3016(%rip),%ymm10        # 6920 <_sk_callback_avx+0x549>
   DB  196,193,124,84,194                  ; vandps        %ymm10,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,164,45,0,0         ; vbroadcastss  0x2da4(%rip),%ymm9        # 66c0 <_sk_callback_avx+0x382>
+  DB  196,98,125,24,13,60,46,0,0          ; vbroadcastss  0x2e3c(%rip),%ymm9        # 6758 <_sk_callback_avx+0x381>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
   DB  197,233,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm2
@@ -8354,23 +8382,23 @@ _sk_load_565_avx LABEL PROC
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,209,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  DB  196,226,125,24,5,14,45,0,0          ; vbroadcastss  0x2d0e(%rip),%ymm0        # 66c4 <_sk_callback_avx+0x386>
+  DB  196,226,125,24,5,166,45,0,0         ; vbroadcastss  0x2da6(%rip),%ymm0        # 675c <_sk_callback_avx+0x385>
   DB  197,236,84,192                      ; vandps        %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,1,45,0,0          ; vbroadcastss  0x2d01(%rip),%ymm1        # 66c8 <_sk_callback_avx+0x38a>
+  DB  196,226,125,24,13,153,45,0,0        ; vbroadcastss  0x2d99(%rip),%ymm1        # 6760 <_sk_callback_avx+0x389>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,248,44,0,0        ; vbroadcastss  0x2cf8(%rip),%ymm1        # 66cc <_sk_callback_avx+0x38e>
+  DB  196,226,125,24,13,144,45,0,0        ; vbroadcastss  0x2d90(%rip),%ymm1        # 6764 <_sk_callback_avx+0x38d>
   DB  197,236,84,201                      ; vandps        %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,235,44,0,0        ; vbroadcastss  0x2ceb(%rip),%ymm3        # 66d0 <_sk_callback_avx+0x392>
+  DB  196,226,125,24,29,131,45,0,0        ; vbroadcastss  0x2d83(%rip),%ymm3        # 6768 <_sk_callback_avx+0x391>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,24,29,226,44,0,0        ; vbroadcastss  0x2ce2(%rip),%ymm3        # 66d4 <_sk_callback_avx+0x396>
+  DB  196,226,125,24,29,122,45,0,0        ; vbroadcastss  0x2d7a(%rip),%ymm3        # 676c <_sk_callback_avx+0x395>
   DB  197,236,84,211                      ; vandps        %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,213,44,0,0        ; vbroadcastss  0x2cd5(%rip),%ymm3        # 66d8 <_sk_callback_avx+0x39a>
+  DB  196,226,125,24,29,109,45,0,0        ; vbroadcastss  0x2d6d(%rip),%ymm3        # 6770 <_sk_callback_avx+0x399>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,202,44,0,0        ; vbroadcastss  0x2cca(%rip),%ymm3        # 66dc <_sk_callback_avx+0x39e>
+  DB  196,226,125,24,29,98,45,0,0         ; vbroadcastss  0x2d62(%rip),%ymm3        # 6774 <_sk_callback_avx+0x39d>
   DB  255,224                             ; jmpq          *%rax
   DB  65,137,200                          ; mov           %ecx,%r8d
   DB  65,128,224,7                        ; and           $0x7,%r8b
@@ -8467,23 +8495,23 @@ _sk_gather_565_avx LABEL PROC
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,209,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm2
-  DB  196,226,125,24,5,106,43,0,0         ; vbroadcastss  0x2b6a(%rip),%ymm0        # 66e0 <_sk_callback_avx+0x3a2>
+  DB  196,226,125,24,5,2,44,0,0           ; vbroadcastss  0x2c02(%rip),%ymm0        # 6778 <_sk_callback_avx+0x3a1>
   DB  197,236,84,192                      ; vandps        %ymm0,%ymm2,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,93,43,0,0         ; vbroadcastss  0x2b5d(%rip),%ymm1        # 66e4 <_sk_callback_avx+0x3a6>
+  DB  196,226,125,24,13,245,43,0,0        ; vbroadcastss  0x2bf5(%rip),%ymm1        # 677c <_sk_callback_avx+0x3a5>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,84,43,0,0         ; vbroadcastss  0x2b54(%rip),%ymm1        # 66e8 <_sk_callback_avx+0x3aa>
+  DB  196,226,125,24,13,236,43,0,0        ; vbroadcastss  0x2bec(%rip),%ymm1        # 6780 <_sk_callback_avx+0x3a9>
   DB  197,236,84,201                      ; vandps        %ymm1,%ymm2,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,29,71,43,0,0         ; vbroadcastss  0x2b47(%rip),%ymm3        # 66ec <_sk_callback_avx+0x3ae>
+  DB  196,226,125,24,29,223,43,0,0        ; vbroadcastss  0x2bdf(%rip),%ymm3        # 6784 <_sk_callback_avx+0x3ad>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
-  DB  196,226,125,24,29,62,43,0,0         ; vbroadcastss  0x2b3e(%rip),%ymm3        # 66f0 <_sk_callback_avx+0x3b2>
+  DB  196,226,125,24,29,214,43,0,0        ; vbroadcastss  0x2bd6(%rip),%ymm3        # 6788 <_sk_callback_avx+0x3b1>
   DB  197,236,84,211                      ; vandps        %ymm3,%ymm2,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,226,125,24,29,49,43,0,0         ; vbroadcastss  0x2b31(%rip),%ymm3        # 66f4 <_sk_callback_avx+0x3b6>
+  DB  196,226,125,24,29,201,43,0,0        ; vbroadcastss  0x2bc9(%rip),%ymm3        # 678c <_sk_callback_avx+0x3b5>
   DB  197,236,89,211                      ; vmulps        %ymm3,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,38,43,0,0         ; vbroadcastss  0x2b26(%rip),%ymm3        # 66f8 <_sk_callback_avx+0x3ba>
+  DB  196,226,125,24,29,190,43,0,0        ; vbroadcastss  0x2bbe(%rip),%ymm3        # 6790 <_sk_callback_avx+0x3b9>
   DB  91                                  ; pop           %rbx
   DB  65,92                               ; pop           %r12
   DB  65,94                               ; pop           %r14
@@ -8495,14 +8523,14 @@ PUBLIC _sk_store_565_avx
 _sk_store_565_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,18,43,0,0           ; vbroadcastss  0x2b12(%rip),%ymm8        # 66fc <_sk_callback_avx+0x3be>
+  DB  196,98,125,24,5,170,43,0,0          ; vbroadcastss  0x2baa(%rip),%ymm8        # 6794 <_sk_callback_avx+0x3bd>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,41,114,241,11               ; vpslld        $0xb,%xmm9,%xmm10
   DB  196,67,125,25,201,1                 ; vextractf128  $0x1,%ymm9,%xmm9
   DB  196,193,49,114,241,11               ; vpslld        $0xb,%xmm9,%xmm9
   DB  196,67,45,24,201,1                  ; vinsertf128   $0x1,%xmm9,%ymm10,%ymm9
-  DB  196,98,125,24,21,235,42,0,0         ; vbroadcastss  0x2aeb(%rip),%ymm10        # 6700 <_sk_callback_avx+0x3c2>
+  DB  196,98,125,24,21,131,43,0,0         ; vbroadcastss  0x2b83(%rip),%ymm10        # 6798 <_sk_callback_avx+0x3c1>
   DB  196,65,116,89,210                   ; vmulps        %ymm10,%ymm1,%ymm10
   DB  196,65,125,91,210                   ; vcvtps2dq     %ymm10,%ymm10
   DB  196,193,33,114,242,5                ; vpslld        $0x5,%xmm10,%xmm11
@@ -8574,25 +8602,25 @@ _sk_load_4444_avx LABEL PROC
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,217,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  DB  196,226,125,24,5,244,41,0,0         ; vbroadcastss  0x29f4(%rip),%ymm0        # 6704 <_sk_callback_avx+0x3c6>
+  DB  196,226,125,24,5,140,42,0,0         ; vbroadcastss  0x2a8c(%rip),%ymm0        # 679c <_sk_callback_avx+0x3c5>
   DB  197,228,84,192                      ; vandps        %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,231,41,0,0        ; vbroadcastss  0x29e7(%rip),%ymm1        # 6708 <_sk_callback_avx+0x3ca>
+  DB  196,226,125,24,13,127,42,0,0        ; vbroadcastss  0x2a7f(%rip),%ymm1        # 67a0 <_sk_callback_avx+0x3c9>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,222,41,0,0        ; vbroadcastss  0x29de(%rip),%ymm1        # 670c <_sk_callback_avx+0x3ce>
+  DB  196,226,125,24,13,118,42,0,0        ; vbroadcastss  0x2a76(%rip),%ymm1        # 67a4 <_sk_callback_avx+0x3cd>
   DB  197,228,84,201                      ; vandps        %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,209,41,0,0        ; vbroadcastss  0x29d1(%rip),%ymm2        # 6710 <_sk_callback_avx+0x3d2>
+  DB  196,226,125,24,21,105,42,0,0        ; vbroadcastss  0x2a69(%rip),%ymm2        # 67a8 <_sk_callback_avx+0x3d1>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,200,41,0,0        ; vbroadcastss  0x29c8(%rip),%ymm2        # 6714 <_sk_callback_avx+0x3d6>
+  DB  196,226,125,24,21,96,42,0,0         ; vbroadcastss  0x2a60(%rip),%ymm2        # 67ac <_sk_callback_avx+0x3d5>
   DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,187,41,0,0          ; vbroadcastss  0x29bb(%rip),%ymm8        # 6718 <_sk_callback_avx+0x3da>
+  DB  196,98,125,24,5,83,42,0,0           ; vbroadcastss  0x2a53(%rip),%ymm8        # 67b0 <_sk_callback_avx+0x3d9>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,177,41,0,0          ; vbroadcastss  0x29b1(%rip),%ymm8        # 671c <_sk_callback_avx+0x3de>
+  DB  196,98,125,24,5,73,42,0,0           ; vbroadcastss  0x2a49(%rip),%ymm8        # 67b4 <_sk_callback_avx+0x3dd>
   DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,163,41,0,0          ; vbroadcastss  0x29a3(%rip),%ymm8        # 6720 <_sk_callback_avx+0x3e2>
+  DB  196,98,125,24,5,59,42,0,0           ; vbroadcastss  0x2a3b(%rip),%ymm8        # 67b8 <_sk_callback_avx+0x3e1>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -8692,25 +8720,25 @@ _sk_gather_4444_avx LABEL PROC
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,217,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm3
-  DB  196,226,125,24,5,58,40,0,0          ; vbroadcastss  0x283a(%rip),%ymm0        # 6724 <_sk_callback_avx+0x3e6>
+  DB  196,226,125,24,5,210,40,0,0         ; vbroadcastss  0x28d2(%rip),%ymm0        # 67bc <_sk_callback_avx+0x3e5>
   DB  197,228,84,192                      ; vandps        %ymm0,%ymm3,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,226,125,24,13,45,40,0,0         ; vbroadcastss  0x282d(%rip),%ymm1        # 6728 <_sk_callback_avx+0x3ea>
+  DB  196,226,125,24,13,197,40,0,0        ; vbroadcastss  0x28c5(%rip),%ymm1        # 67c0 <_sk_callback_avx+0x3e9>
   DB  197,252,89,193                      ; vmulps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,36,40,0,0         ; vbroadcastss  0x2824(%rip),%ymm1        # 672c <_sk_callback_avx+0x3ee>
+  DB  196,226,125,24,13,188,40,0,0        ; vbroadcastss  0x28bc(%rip),%ymm1        # 67c4 <_sk_callback_avx+0x3ed>
   DB  197,228,84,201                      ; vandps        %ymm1,%ymm3,%ymm1
   DB  197,252,91,201                      ; vcvtdq2ps     %ymm1,%ymm1
-  DB  196,226,125,24,21,23,40,0,0         ; vbroadcastss  0x2817(%rip),%ymm2        # 6730 <_sk_callback_avx+0x3f2>
+  DB  196,226,125,24,21,175,40,0,0        ; vbroadcastss  0x28af(%rip),%ymm2        # 67c8 <_sk_callback_avx+0x3f1>
   DB  197,244,89,202                      ; vmulps        %ymm2,%ymm1,%ymm1
-  DB  196,226,125,24,21,14,40,0,0         ; vbroadcastss  0x280e(%rip),%ymm2        # 6734 <_sk_callback_avx+0x3f6>
+  DB  196,226,125,24,21,166,40,0,0        ; vbroadcastss  0x28a6(%rip),%ymm2        # 67cc <_sk_callback_avx+0x3f5>
   DB  197,228,84,210                      ; vandps        %ymm2,%ymm3,%ymm2
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
-  DB  196,98,125,24,5,1,40,0,0            ; vbroadcastss  0x2801(%rip),%ymm8        # 6738 <_sk_callback_avx+0x3fa>
+  DB  196,98,125,24,5,153,40,0,0          ; vbroadcastss  0x2899(%rip),%ymm8        # 67d0 <_sk_callback_avx+0x3f9>
   DB  196,193,108,89,208                  ; vmulps        %ymm8,%ymm2,%ymm2
-  DB  196,98,125,24,5,247,39,0,0          ; vbroadcastss  0x27f7(%rip),%ymm8        # 673c <_sk_callback_avx+0x3fe>
+  DB  196,98,125,24,5,143,40,0,0          ; vbroadcastss  0x288f(%rip),%ymm8        # 67d4 <_sk_callback_avx+0x3fd>
   DB  196,193,100,84,216                  ; vandps        %ymm8,%ymm3,%ymm3
   DB  197,252,91,219                      ; vcvtdq2ps     %ymm3,%ymm3
-  DB  196,98,125,24,5,233,39,0,0          ; vbroadcastss  0x27e9(%rip),%ymm8        # 6740 <_sk_callback_avx+0x402>
+  DB  196,98,125,24,5,129,40,0,0          ; vbroadcastss  0x2881(%rip),%ymm8        # 67d8 <_sk_callback_avx+0x401>
   DB  196,193,100,89,216                  ; vmulps        %ymm8,%ymm3,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  91                                  ; pop           %rbx
@@ -8724,7 +8752,7 @@ PUBLIC _sk_store_4444_avx
 _sk_store_4444_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,206,39,0,0          ; vbroadcastss  0x27ce(%rip),%ymm8        # 6744 <_sk_callback_avx+0x406>
+  DB  196,98,125,24,5,102,40,0,0          ; vbroadcastss  0x2866(%rip),%ymm8        # 67dc <_sk_callback_avx+0x405>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,193,41,114,241,12               ; vpslld        $0xc,%xmm9,%xmm10
@@ -8803,10 +8831,10 @@ _sk_load_8888_avx LABEL PROC
   DB  72,133,201                          ; test          %rcx,%rcx
   DB  15,133,135,0,0,0                    ; jne           411d <_sk_load_8888_avx+0x95>
   DB  196,65,124,16,12,186                ; vmovups       (%r10,%rdi,4),%ymm9
-  DB  197,124,40,21,252,39,0,0            ; vmovaps       0x27fc(%rip),%ymm10        # 68a0 <_sk_callback_avx+0x562>
+  DB  197,124,40,21,156,40,0,0            ; vmovaps       0x289c(%rip),%ymm10        # 6940 <_sk_callback_avx+0x569>
   DB  196,193,52,84,194                   ; vandps        %ymm10,%ymm9,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,5,146,38,0,0          ; vbroadcastss  0x2692(%rip),%ymm8        # 6748 <_sk_callback_avx+0x40a>
+  DB  196,98,125,24,5,42,39,0,0           ; vbroadcastss  0x272a(%rip),%ymm8        # 67e0 <_sk_callback_avx+0x409>
   DB  196,193,124,89,192                  ; vmulps        %ymm8,%ymm0,%ymm0
   DB  196,193,113,114,209,8               ; vpsrld        $0x8,%xmm9,%xmm1
   DB  196,99,125,25,203,1                 ; vextractf128  $0x1,%ymm9,%xmm3
@@ -8919,10 +8947,10 @@ _sk_gather_8888_avx LABEL PROC
   DB  196,131,121,34,4,152,2              ; vpinsrd       $0x2,(%r8,%r11,4),%xmm0,%xmm0
   DB  196,131,121,34,28,144,3             ; vpinsrd       $0x3,(%r8,%r10,4),%xmm0,%xmm3
   DB  196,227,61,24,195,1                 ; vinsertf128   $0x1,%xmm3,%ymm8,%ymm0
-  DB  197,124,40,21,38,38,0,0             ; vmovaps       0x2626(%rip),%ymm10        # 68c0 <_sk_callback_avx+0x582>
+  DB  197,124,40,21,198,38,0,0            ; vmovaps       0x26c6(%rip),%ymm10        # 6960 <_sk_callback_avx+0x589>
   DB  196,193,124,84,194                  ; vandps        %ymm10,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,13,160,36,0,0         ; vbroadcastss  0x24a0(%rip),%ymm9        # 674c <_sk_callback_avx+0x40e>
+  DB  196,98,125,24,13,56,37,0,0          ; vbroadcastss  0x2538(%rip),%ymm9        # 67e4 <_sk_callback_avx+0x40d>
   DB  196,193,124,89,193                  ; vmulps        %ymm9,%ymm0,%ymm0
   DB  196,193,113,114,208,8               ; vpsrld        $0x8,%xmm8,%xmm1
   DB  197,233,114,211,8                   ; vpsrld        $0x8,%xmm3,%xmm2
@@ -8952,7 +8980,7 @@ PUBLIC _sk_store_8888_avx
 _sk_store_8888_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,16                           ; mov           (%rax),%r10
-  DB  196,98,125,24,5,46,36,0,0           ; vbroadcastss  0x242e(%rip),%ymm8        # 6750 <_sk_callback_avx+0x412>
+  DB  196,98,125,24,5,198,36,0,0          ; vbroadcastss  0x24c6(%rip),%ymm8        # 67e8 <_sk_callback_avx+0x411>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,65,116,89,208                   ; vmulps        %ymm8,%ymm1,%ymm10
@@ -9055,13 +9083,13 @@ _sk_load_f16_avx LABEL PROC
   DB  197,249,105,201                     ; vpunpckhwd    %xmm1,%xmm0,%xmm1
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
-  DB  196,98,125,24,37,147,34,0,0         ; vbroadcastss  0x2293(%rip),%ymm12        # 6754 <_sk_callback_avx+0x416>
+  DB  196,98,125,24,37,43,35,0,0          ; vbroadcastss  0x232b(%rip),%ymm12        # 67ec <_sk_callback_avx+0x415>
   DB  196,193,124,84,204                  ; vandps        %ymm12,%ymm0,%ymm1
   DB  197,252,87,193                      ; vxorps        %ymm1,%ymm0,%ymm0
   DB  196,195,125,25,198,1                ; vextractf128  $0x1,%ymm0,%xmm14
-  DB  196,98,121,24,29,127,34,0,0         ; vbroadcastss  0x227f(%rip),%xmm11        # 6758 <_sk_callback_avx+0x41a>
+  DB  196,98,121,24,29,23,35,0,0          ; vbroadcastss  0x2317(%rip),%xmm11        # 67f0 <_sk_callback_avx+0x419>
   DB  196,193,8,87,219                    ; vxorps        %xmm11,%xmm14,%xmm3
-  DB  196,98,121,24,45,117,34,0,0         ; vbroadcastss  0x2275(%rip),%xmm13        # 675c <_sk_callback_avx+0x41e>
+  DB  196,98,121,24,45,13,35,0,0          ; vbroadcastss  0x230d(%rip),%xmm13        # 67f4 <_sk_callback_avx+0x41d>
   DB  197,145,102,219                     ; vpcmpgtd      %xmm3,%xmm13,%xmm3
   DB  196,65,120,87,211                   ; vxorps        %xmm11,%xmm0,%xmm10
   DB  196,65,17,102,210                   ; vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -9075,7 +9103,7 @@ _sk_load_f16_avx LABEL PROC
   DB  196,227,125,24,195,1                ; vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   DB  197,252,86,193                      ; vorps         %ymm1,%ymm0,%ymm0
   DB  196,227,125,25,193,1                ; vextractf128  $0x1,%ymm0,%xmm1
-  DB  196,226,121,24,29,43,34,0,0         ; vbroadcastss  0x222b(%rip),%xmm3        # 6760 <_sk_callback_avx+0x422>
+  DB  196,226,121,24,29,195,34,0,0        ; vbroadcastss  0x22c3(%rip),%xmm3        # 67f8 <_sk_callback_avx+0x421>
   DB  197,241,254,203                     ; vpaddd        %xmm3,%xmm1,%xmm1
   DB  197,249,254,195                     ; vpaddd        %xmm3,%xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
@@ -9252,13 +9280,13 @@ _sk_gather_f16_avx LABEL PROC
   DB  197,249,105,210                     ; vpunpckhwd    %xmm2,%xmm0,%xmm2
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,194,1                ; vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
-  DB  196,98,125,24,37,235,30,0,0         ; vbroadcastss  0x1eeb(%rip),%ymm12        # 6764 <_sk_callback_avx+0x426>
+  DB  196,98,125,24,37,131,31,0,0         ; vbroadcastss  0x1f83(%rip),%ymm12        # 67fc <_sk_callback_avx+0x425>
   DB  196,193,124,84,212                  ; vandps        %ymm12,%ymm0,%ymm2
   DB  197,252,87,194                      ; vxorps        %ymm2,%ymm0,%ymm0
   DB  196,195,125,25,198,1                ; vextractf128  $0x1,%ymm0,%xmm14
-  DB  196,98,121,24,29,215,30,0,0         ; vbroadcastss  0x1ed7(%rip),%xmm11        # 6768 <_sk_callback_avx+0x42a>
+  DB  196,98,121,24,29,111,31,0,0         ; vbroadcastss  0x1f6f(%rip),%xmm11        # 6800 <_sk_callback_avx+0x429>
   DB  196,193,8,87,219                    ; vxorps        %xmm11,%xmm14,%xmm3
-  DB  196,98,121,24,45,205,30,0,0         ; vbroadcastss  0x1ecd(%rip),%xmm13        # 676c <_sk_callback_avx+0x42e>
+  DB  196,98,121,24,45,101,31,0,0         ; vbroadcastss  0x1f65(%rip),%xmm13        # 6804 <_sk_callback_avx+0x42d>
   DB  197,145,102,219                     ; vpcmpgtd      %xmm3,%xmm13,%xmm3
   DB  196,65,120,87,211                   ; vxorps        %xmm11,%xmm0,%xmm10
   DB  196,65,17,102,210                   ; vpcmpgtd      %xmm10,%xmm13,%xmm10
@@ -9272,7 +9300,7 @@ _sk_gather_f16_avx LABEL PROC
   DB  196,227,125,24,195,1                ; vinsertf128   $0x1,%xmm3,%ymm0,%ymm0
   DB  197,252,86,194                      ; vorps         %ymm2,%ymm0,%ymm0
   DB  196,227,125,25,194,1                ; vextractf128  $0x1,%ymm0,%xmm2
-  DB  196,226,121,24,29,131,30,0,0        ; vbroadcastss  0x1e83(%rip),%xmm3        # 6770 <_sk_callback_avx+0x432>
+  DB  196,226,121,24,29,27,31,0,0         ; vbroadcastss  0x1f1b(%rip),%xmm3        # 6808 <_sk_callback_avx+0x431>
   DB  197,233,254,211                     ; vpaddd        %xmm3,%xmm2,%xmm2
   DB  197,249,254,195                     ; vpaddd        %xmm3,%xmm0,%xmm0
   DB  196,227,125,24,194,1                ; vinsertf128   $0x1,%xmm2,%ymm0,%ymm0
@@ -9374,12 +9402,12 @@ _sk_store_f16_avx LABEL PROC
   DB  197,252,17,180,36,128,0,0,0         ; vmovups       %ymm6,0x80(%rsp)
   DB  197,252,17,108,36,96                ; vmovups       %ymm5,0x60(%rsp)
   DB  197,252,17,100,36,64                ; vmovups       %ymm4,0x40(%rsp)
-  DB  196,98,125,24,13,144,28,0,0         ; vbroadcastss  0x1c90(%rip),%ymm9        # 6774 <_sk_callback_avx+0x436>
+  DB  196,98,125,24,13,40,29,0,0          ; vbroadcastss  0x1d28(%rip),%ymm9        # 680c <_sk_callback_avx+0x435>
   DB  196,65,124,84,209                   ; vandps        %ymm9,%ymm0,%ymm10
   DB  197,252,17,4,36                     ; vmovups       %ymm0,(%rsp)
   DB  196,65,124,87,218                   ; vxorps        %ymm10,%ymm0,%ymm11
   DB  196,67,125,25,220,1                 ; vextractf128  $0x1,%ymm11,%xmm12
-  DB  196,98,121,24,5,118,28,0,0          ; vbroadcastss  0x1c76(%rip),%xmm8        # 6778 <_sk_callback_avx+0x43a>
+  DB  196,98,121,24,5,14,29,0,0           ; vbroadcastss  0x1d0e(%rip),%xmm8        # 6810 <_sk_callback_avx+0x439>
   DB  196,65,57,102,236                   ; vpcmpgtd      %xmm12,%xmm8,%xmm13
   DB  196,65,57,102,243                   ; vpcmpgtd      %xmm11,%xmm8,%xmm14
   DB  196,67,13,24,237,1                  ; vinsertf128   $0x1,%xmm13,%ymm14,%ymm13
@@ -9389,7 +9417,7 @@ _sk_store_f16_avx LABEL PROC
   DB  196,67,13,24,242,1                  ; vinsertf128   $0x1,%xmm10,%ymm14,%ymm14
   DB  196,193,33,114,211,13               ; vpsrld        $0xd,%xmm11,%xmm11
   DB  196,193,25,114,212,13               ; vpsrld        $0xd,%xmm12,%xmm12
-  DB  196,98,125,24,21,61,28,0,0          ; vbroadcastss  0x1c3d(%rip),%ymm10        # 677c <_sk_callback_avx+0x43e>
+  DB  196,98,125,24,21,213,28,0,0         ; vbroadcastss  0x1cd5(%rip),%ymm10        # 6814 <_sk_callback_avx+0x43d>
   DB  196,65,12,86,242                    ; vorps         %ymm10,%ymm14,%ymm14
   DB  196,67,125,25,247,1                 ; vextractf128  $0x1,%ymm14,%xmm15
   DB  196,65,1,254,228                    ; vpaddd        %xmm12,%xmm15,%xmm12
@@ -9532,7 +9560,7 @@ _sk_load_u16_be_avx LABEL PROC
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,29,140,25,0,0         ; vbroadcastss  0x198c(%rip),%ymm11        # 6780 <_sk_callback_avx+0x442>
+  DB  196,98,125,24,29,36,26,0,0          ; vbroadcastss  0x1a24(%rip),%ymm11        # 6818 <_sk_callback_avx+0x441>
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,177,109,202                     ; vpunpckhqdq   %xmm2,%xmm9,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -9623,7 +9651,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
   DB  196,226,121,51,192                  ; vpmovzxwd     %xmm0,%xmm0
   DB  196,227,125,24,193,1                ; vinsertf128   $0x1,%xmm1,%ymm0,%ymm0
   DB  197,252,91,192                      ; vcvtdq2ps     %ymm0,%ymm0
-  DB  196,98,125,24,29,236,23,0,0         ; vbroadcastss  0x17ec(%rip),%ymm11        # 6784 <_sk_callback_avx+0x446>
+  DB  196,98,125,24,29,132,24,0,0         ; vbroadcastss  0x1884(%rip),%ymm11        # 681c <_sk_callback_avx+0x445>
   DB  196,193,124,89,195                  ; vmulps        %ymm11,%ymm0,%ymm0
   DB  197,185,109,202                     ; vpunpckhqdq   %xmm2,%xmm8,%xmm1
   DB  197,233,113,241,8                   ; vpsllw        $0x8,%xmm1,%xmm2
@@ -9644,7 +9672,7 @@ _sk_load_rgb_u16_be_avx LABEL PROC
   DB  197,252,91,210                      ; vcvtdq2ps     %ymm2,%ymm2
   DB  196,193,108,89,211                  ; vmulps        %ymm11,%ymm2,%ymm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,29,137,23,0,0        ; vbroadcastss  0x1789(%rip),%ymm3        # 6788 <_sk_callback_avx+0x44a>
+  DB  196,226,125,24,29,33,24,0,0         ; vbroadcastss  0x1821(%rip),%ymm3        # 6820 <_sk_callback_avx+0x449>
   DB  255,224                             ; jmpq          *%rax
   DB  196,193,121,110,4,64                ; vmovd         (%r8,%rax,2),%xmm0
   DB  196,193,121,196,68,64,4,2           ; vpinsrw       $0x2,0x4(%r8,%rax,2),%xmm0,%xmm0
@@ -9685,7 +9713,7 @@ _sk_store_u16_be_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  72,141,4,189,0,0,0,0                ; lea           0x0(,%rdi,4),%rax
-  DB  196,98,125,24,5,198,22,0,0          ; vbroadcastss  0x16c6(%rip),%ymm8        # 678c <_sk_callback_avx+0x44e>
+  DB  196,98,125,24,5,94,23,0,0           ; vbroadcastss  0x175e(%rip),%ymm8        # 6824 <_sk_callback_avx+0x44d>
   DB  196,65,124,89,200                   ; vmulps        %ymm8,%ymm0,%ymm9
   DB  196,65,125,91,201                   ; vcvtps2dq     %ymm9,%ymm9
   DB  196,67,125,25,202,1                 ; vextractf128  $0x1,%ymm9,%xmm10
@@ -9933,12 +9961,12 @@ _sk_mirror_y_avx LABEL PROC
 
 PUBLIC _sk_luminance_to_alpha_avx
 _sk_luminance_to_alpha_avx LABEL PROC
-  DB  196,226,125,24,29,235,18,0,0        ; vbroadcastss  0x12eb(%rip),%ymm3        # 6790 <_sk_callback_avx+0x452>
+  DB  196,226,125,24,29,131,19,0,0        ; vbroadcastss  0x1383(%rip),%ymm3        # 6828 <_sk_callback_avx+0x451>
   DB  197,252,89,195                      ; vmulps        %ymm3,%ymm0,%ymm0
-  DB  196,226,125,24,29,226,18,0,0        ; vbroadcastss  0x12e2(%rip),%ymm3        # 6794 <_sk_callback_avx+0x456>
+  DB  196,226,125,24,29,122,19,0,0        ; vbroadcastss  0x137a(%rip),%ymm3        # 682c <_sk_callback_avx+0x455>
   DB  197,244,89,203                      ; vmulps        %ymm3,%ymm1,%ymm1
   DB  197,252,88,193                      ; vaddps        %ymm1,%ymm0,%ymm0
-  DB  196,226,125,24,13,213,18,0,0        ; vbroadcastss  0x12d5(%rip),%ymm1        # 6798 <_sk_callback_avx+0x45a>
+  DB  196,226,125,24,13,109,19,0,0        ; vbroadcastss  0x136d(%rip),%ymm1        # 6830 <_sk_callback_avx+0x459>
   DB  197,236,89,201                      ; vmulps        %ymm1,%ymm2,%ymm1
   DB  197,252,88,217                      ; vaddps        %ymm1,%ymm0,%ymm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10067,6 +10095,42 @@ _sk_matrix_4x5_avx LABEL PROC
   DB  197,124,41,210                      ; vmovaps       %ymm10,%ymm2
   DB  255,224                             ; jmpq          *%rax
 
+PUBLIC _sk_matrix_4x3_avx
+_sk_matrix_4x3_avx LABEL PROC
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  196,226,125,24,16                   ; vbroadcastss  (%rax),%ymm2
+  DB  196,226,125,24,88,16                ; vbroadcastss  0x10(%rax),%ymm3
+  DB  196,98,125,24,64,32                 ; vbroadcastss  0x20(%rax),%ymm8
+  DB  197,228,89,217                      ; vmulps        %ymm1,%ymm3,%ymm3
+  DB  196,193,100,88,216                  ; vaddps        %ymm8,%ymm3,%ymm3
+  DB  197,236,89,208                      ; vmulps        %ymm0,%ymm2,%ymm2
+  DB  197,108,88,195                      ; vaddps        %ymm3,%ymm2,%ymm8
+  DB  196,226,125,24,80,4                 ; vbroadcastss  0x4(%rax),%ymm2
+  DB  196,226,125,24,88,20                ; vbroadcastss  0x14(%rax),%ymm3
+  DB  196,98,125,24,72,36                 ; vbroadcastss  0x24(%rax),%ymm9
+  DB  197,228,89,217                      ; vmulps        %ymm1,%ymm3,%ymm3
+  DB  196,193,100,88,217                  ; vaddps        %ymm9,%ymm3,%ymm3
+  DB  197,236,89,208                      ; vmulps        %ymm0,%ymm2,%ymm2
+  DB  197,108,88,203                      ; vaddps        %ymm3,%ymm2,%ymm9
+  DB  196,226,125,24,80,8                 ; vbroadcastss  0x8(%rax),%ymm2
+  DB  196,226,125,24,88,24                ; vbroadcastss  0x18(%rax),%ymm3
+  DB  196,98,125,24,80,40                 ; vbroadcastss  0x28(%rax),%ymm10
+  DB  197,228,89,217                      ; vmulps        %ymm1,%ymm3,%ymm3
+  DB  196,193,100,88,218                  ; vaddps        %ymm10,%ymm3,%ymm3
+  DB  197,236,89,208                      ; vmulps        %ymm0,%ymm2,%ymm2
+  DB  197,236,88,211                      ; vaddps        %ymm3,%ymm2,%ymm2
+  DB  196,226,125,24,88,12                ; vbroadcastss  0xc(%rax),%ymm3
+  DB  196,98,125,24,80,28                 ; vbroadcastss  0x1c(%rax),%ymm10
+  DB  196,98,125,24,88,44                 ; vbroadcastss  0x2c(%rax),%ymm11
+  DB  197,172,89,201                      ; vmulps        %ymm1,%ymm10,%ymm1
+  DB  196,193,116,88,203                  ; vaddps        %ymm11,%ymm1,%ymm1
+  DB  197,228,89,192                      ; vmulps        %ymm0,%ymm3,%ymm0
+  DB  197,252,88,217                      ; vaddps        %ymm1,%ymm0,%ymm3
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  197,124,41,192                      ; vmovaps       %ymm8,%ymm0
+  DB  197,124,41,201                      ; vmovaps       %ymm9,%ymm1
+  DB  255,224                             ; jmpq          *%rax
+
 PUBLIC _sk_matrix_perspective_avx
 _sk_matrix_perspective_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10109,9 +10173,9 @@ _sk_evenly_spaced_gradient_avx LABEL PROC
   DB  72,139,24                           ; mov           (%rax),%rbx
   DB  72,139,104,8                        ; mov           0x8(%rax),%rbp
   DB  72,255,203                          ; dec           %rbx
-  DB  120,7                               ; js            5780 <_sk_evenly_spaced_gradient_avx+0x1f>
+  DB  120,7                               ; js            5819 <_sk_evenly_spaced_gradient_avx+0x1f>
   DB  196,225,242,42,203                  ; vcvtsi2ss     %rbx,%xmm1,%xmm1
-  DB  235,21                              ; jmp           5795 <_sk_evenly_spaced_gradient_avx+0x34>
+  DB  235,21                              ; jmp           582e <_sk_evenly_spaced_gradient_avx+0x34>
   DB  73,137,216                          ; mov           %rbx,%r8
   DB  73,209,232                          ; shr           %r8
   DB  131,227,1                           ; and           $0x1,%ebx
@@ -10276,12 +10340,12 @@ _sk_gradient_avx LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
   DB  73,131,248,2                        ; cmp           $0x2,%r8
-  DB  114,80                              ; jb            5b23 <_sk_gradient_avx+0x69>
+  DB  114,80                              ; jb            5bbc <_sk_gradient_avx+0x69>
   DB  72,139,88,72                        ; mov           0x48(%rax),%rbx
   DB  73,255,200                          ; dec           %r8
   DB  72,131,195,4                        ; add           $0x4,%rbx
   DB  196,65,52,87,201                    ; vxorps        %ymm9,%ymm9,%ymm9
-  DB  196,98,125,24,21,176,12,0,0         ; vbroadcastss  0xcb0(%rip),%ymm10        # 679c <_sk_callback_avx+0x45e>
+  DB  196,98,125,24,21,175,12,0,0         ; vbroadcastss  0xcaf(%rip),%ymm10        # 6834 <_sk_callback_avx+0x45d>
   DB  197,244,87,201                      ; vxorps        %ymm1,%ymm1,%ymm1
   DB  196,98,125,24,3                     ; vbroadcastss  (%rbx),%ymm8
   DB  197,60,194,192,2                    ; vcmpleps      %ymm0,%ymm8,%ymm8
@@ -10293,7 +10357,7 @@ _sk_gradient_avx LABEL PROC
   DB  196,227,117,24,202,1                ; vinsertf128   $0x1,%xmm2,%ymm1,%ymm1
   DB  72,131,195,4                        ; add           $0x4,%rbx
   DB  73,255,200                          ; dec           %r8
-  DB  117,205                             ; jne           5af0 <_sk_gradient_avx+0x36>
+  DB  117,205                             ; jne           5b89 <_sk_gradient_avx+0x36>
   DB  196,195,249,22,200,1                ; vpextrq       $0x1,%xmm1,%r8
   DB  69,137,193                          ; mov           %r8d,%r9d
   DB  73,193,232,32                       ; shr           $0x20,%r8
@@ -10471,27 +10535,27 @@ _sk_xy_to_unit_angle_avx LABEL PROC
   DB  196,65,52,95,226                    ; vmaxps        %ymm10,%ymm9,%ymm12
   DB  196,65,36,94,220                    ; vdivps        %ymm12,%ymm11,%ymm11
   DB  196,65,36,89,227                    ; vmulps        %ymm11,%ymm11,%ymm12
-  DB  196,98,125,24,45,212,8,0,0          ; vbroadcastss  0x8d4(%rip),%ymm13        # 67a0 <_sk_callback_avx+0x462>
+  DB  196,98,125,24,45,211,8,0,0          ; vbroadcastss  0x8d3(%rip),%ymm13        # 6838 <_sk_callback_avx+0x461>
   DB  196,65,28,89,237                    ; vmulps        %ymm13,%ymm12,%ymm13
-  DB  196,98,125,24,53,202,8,0,0          ; vbroadcastss  0x8ca(%rip),%ymm14        # 67a4 <_sk_callback_avx+0x466>
+  DB  196,98,125,24,53,201,8,0,0          ; vbroadcastss  0x8c9(%rip),%ymm14        # 683c <_sk_callback_avx+0x465>
   DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
   DB  196,65,28,89,237                    ; vmulps        %ymm13,%ymm12,%ymm13
-  DB  196,98,125,24,53,187,8,0,0          ; vbroadcastss  0x8bb(%rip),%ymm14        # 67a8 <_sk_callback_avx+0x46a>
+  DB  196,98,125,24,53,186,8,0,0          ; vbroadcastss  0x8ba(%rip),%ymm14        # 6840 <_sk_callback_avx+0x469>
   DB  196,65,20,88,238                    ; vaddps        %ymm14,%ymm13,%ymm13
   DB  196,65,28,89,229                    ; vmulps        %ymm13,%ymm12,%ymm12
-  DB  196,98,125,24,45,172,8,0,0          ; vbroadcastss  0x8ac(%rip),%ymm13        # 67ac <_sk_callback_avx+0x46e>
+  DB  196,98,125,24,45,171,8,0,0          ; vbroadcastss  0x8ab(%rip),%ymm13        # 6844 <_sk_callback_avx+0x46d>
   DB  196,65,28,88,229                    ; vaddps        %ymm13,%ymm12,%ymm12
   DB  196,65,36,89,220                    ; vmulps        %ymm12,%ymm11,%ymm11
   DB  196,65,52,194,202,1                 ; vcmpltps      %ymm10,%ymm9,%ymm9
-  DB  196,98,125,24,21,151,8,0,0          ; vbroadcastss  0x897(%rip),%ymm10        # 67b0 <_sk_callback_avx+0x472>
+  DB  196,98,125,24,21,150,8,0,0          ; vbroadcastss  0x896(%rip),%ymm10        # 6848 <_sk_callback_avx+0x471>
   DB  196,65,44,92,211                    ; vsubps        %ymm11,%ymm10,%ymm10
   DB  196,67,37,74,202,144                ; vblendvps     %ymm9,%ymm10,%ymm11,%ymm9
   DB  196,193,124,194,192,1               ; vcmpltps      %ymm8,%ymm0,%ymm0
-  DB  196,98,125,24,21,129,8,0,0          ; vbroadcastss  0x881(%rip),%ymm10        # 67b4 <_sk_callback_avx+0x476>
+  DB  196,98,125,24,21,128,8,0,0          ; vbroadcastss  0x880(%rip),%ymm10        # 684c <_sk_callback_avx+0x475>
   DB  196,65,44,92,209                    ; vsubps        %ymm9,%ymm10,%ymm10
   DB  196,195,53,74,194,0                 ; vblendvps     %ymm0,%ymm10,%ymm9,%ymm0
   DB  196,65,116,194,200,1                ; vcmpltps      %ymm8,%ymm1,%ymm9
-  DB  196,98,125,24,21,107,8,0,0          ; vbroadcastss  0x86b(%rip),%ymm10        # 67b8 <_sk_callback_avx+0x47a>
+  DB  196,98,125,24,21,106,8,0,0          ; vbroadcastss  0x86a(%rip),%ymm10        # 6850 <_sk_callback_avx+0x479>
   DB  197,44,92,208                       ; vsubps        %ymm0,%ymm10,%ymm10
   DB  196,195,125,74,194,144              ; vblendvps     %ymm9,%ymm10,%ymm0,%ymm0
   DB  196,65,124,194,200,3                ; vcmpunordps   %ymm8,%ymm0,%ymm9
@@ -10511,7 +10575,7 @@ _sk_xy_to_radius_avx LABEL PROC
 PUBLIC _sk_save_xy_avx
 _sk_save_xy_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,53,8,0,0            ; vbroadcastss  0x835(%rip),%ymm8        # 67bc <_sk_callback_avx+0x47e>
+  DB  196,98,125,24,5,52,8,0,0            ; vbroadcastss  0x834(%rip),%ymm8        # 6854 <_sk_callback_avx+0x47d>
   DB  196,65,124,88,200                   ; vaddps        %ymm8,%ymm0,%ymm9
   DB  196,67,125,8,209,1                  ; vroundps      $0x1,%ymm9,%ymm10
   DB  196,65,52,92,202                    ; vsubps        %ymm10,%ymm9,%ymm9
@@ -10544,9 +10608,9 @@ _sk_accumulate_avx LABEL PROC
 PUBLIC _sk_bilinear_nx_avx
 _sk_bilinear_nx_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,193,7,0,0          ; vbroadcastss  0x7c1(%rip),%ymm0        # 67c0 <_sk_callback_avx+0x482>
+  DB  196,226,125,24,5,192,7,0,0          ; vbroadcastss  0x7c0(%rip),%ymm0        # 6858 <_sk_callback_avx+0x481>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,184,7,0,0           ; vbroadcastss  0x7b8(%rip),%ymm8        # 67c4 <_sk_callback_avx+0x486>
+  DB  196,98,125,24,5,183,7,0,0           ; vbroadcastss  0x7b7(%rip),%ymm8        # 685c <_sk_callback_avx+0x485>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10555,7 +10619,7 @@ _sk_bilinear_nx_avx LABEL PROC
 PUBLIC _sk_bilinear_px_avx
 _sk_bilinear_px_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,160,7,0,0          ; vbroadcastss  0x7a0(%rip),%ymm0        # 67c8 <_sk_callback_avx+0x48a>
+  DB  196,226,125,24,5,159,7,0,0          ; vbroadcastss  0x79f(%rip),%ymm0        # 6860 <_sk_callback_avx+0x489>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -10565,9 +10629,9 @@ _sk_bilinear_px_avx LABEL PROC
 PUBLIC _sk_bilinear_ny_avx
 _sk_bilinear_ny_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,132,7,0,0         ; vbroadcastss  0x784(%rip),%ymm1        # 67cc <_sk_callback_avx+0x48e>
+  DB  196,226,125,24,13,131,7,0,0         ; vbroadcastss  0x783(%rip),%ymm1        # 6864 <_sk_callback_avx+0x48d>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,122,7,0,0           ; vbroadcastss  0x77a(%rip),%ymm8        # 67d0 <_sk_callback_avx+0x492>
+  DB  196,98,125,24,5,121,7,0,0           ; vbroadcastss  0x779(%rip),%ymm8        # 6868 <_sk_callback_avx+0x491>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10576,7 +10640,7 @@ _sk_bilinear_ny_avx LABEL PROC
 PUBLIC _sk_bilinear_py_avx
 _sk_bilinear_py_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,98,7,0,0          ; vbroadcastss  0x762(%rip),%ymm1        # 67d4 <_sk_callback_avx+0x496>
+  DB  196,226,125,24,13,97,7,0,0          ; vbroadcastss  0x761(%rip),%ymm1        # 686c <_sk_callback_avx+0x495>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -10586,14 +10650,14 @@ _sk_bilinear_py_avx LABEL PROC
 PUBLIC _sk_bicubic_n3x_avx
 _sk_bicubic_n3x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,69,7,0,0           ; vbroadcastss  0x745(%rip),%ymm0        # 67d8 <_sk_callback_avx+0x49a>
+  DB  196,226,125,24,5,68,7,0,0           ; vbroadcastss  0x744(%rip),%ymm0        # 6870 <_sk_callback_avx+0x499>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,60,7,0,0            ; vbroadcastss  0x73c(%rip),%ymm8        # 67dc <_sk_callback_avx+0x49e>
+  DB  196,98,125,24,5,59,7,0,0            ; vbroadcastss  0x73b(%rip),%ymm8        # 6874 <_sk_callback_avx+0x49d>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,45,7,0,0           ; vbroadcastss  0x72d(%rip),%ymm10        # 67e0 <_sk_callback_avx+0x4a2>
+  DB  196,98,125,24,21,44,7,0,0           ; vbroadcastss  0x72c(%rip),%ymm10        # 6878 <_sk_callback_avx+0x4a1>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,35,7,0,0           ; vbroadcastss  0x723(%rip),%ymm10        # 67e4 <_sk_callback_avx+0x4a6>
+  DB  196,98,125,24,21,34,7,0,0           ; vbroadcastss  0x722(%rip),%ymm10        # 687c <_sk_callback_avx+0x4a5>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -10603,19 +10667,19 @@ _sk_bicubic_n3x_avx LABEL PROC
 PUBLIC _sk_bicubic_n1x_avx
 _sk_bicubic_n1x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,6,7,0,0            ; vbroadcastss  0x706(%rip),%ymm0        # 67e8 <_sk_callback_avx+0x4aa>
+  DB  196,226,125,24,5,5,7,0,0            ; vbroadcastss  0x705(%rip),%ymm0        # 6880 <_sk_callback_avx+0x4a9>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
-  DB  196,98,125,24,5,253,6,0,0           ; vbroadcastss  0x6fd(%rip),%ymm8        # 67ec <_sk_callback_avx+0x4ae>
+  DB  196,98,125,24,5,252,6,0,0           ; vbroadcastss  0x6fc(%rip),%ymm8        # 6884 <_sk_callback_avx+0x4ad>
   DB  197,60,92,64,64                     ; vsubps        0x40(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,243,6,0,0          ; vbroadcastss  0x6f3(%rip),%ymm9        # 67f0 <_sk_callback_avx+0x4b2>
+  DB  196,98,125,24,13,242,6,0,0          ; vbroadcastss  0x6f2(%rip),%ymm9        # 6888 <_sk_callback_avx+0x4b1>
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,233,6,0,0          ; vbroadcastss  0x6e9(%rip),%ymm10        # 67f4 <_sk_callback_avx+0x4b6>
+  DB  196,98,125,24,21,232,6,0,0          ; vbroadcastss  0x6e8(%rip),%ymm10        # 688c <_sk_callback_avx+0x4b5>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,218,6,0,0          ; vbroadcastss  0x6da(%rip),%ymm10        # 67f8 <_sk_callback_avx+0x4ba>
+  DB  196,98,125,24,21,217,6,0,0          ; vbroadcastss  0x6d9(%rip),%ymm10        # 6890 <_sk_callback_avx+0x4b9>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,203,6,0,0          ; vbroadcastss  0x6cb(%rip),%ymm9        # 67fc <_sk_callback_avx+0x4be>
+  DB  196,98,125,24,13,202,6,0,0          ; vbroadcastss  0x6ca(%rip),%ymm9        # 6894 <_sk_callback_avx+0x4bd>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10624,17 +10688,17 @@ _sk_bicubic_n1x_avx LABEL PROC
 PUBLIC _sk_bicubic_p1x_avx
 _sk_bicubic_p1x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,179,6,0,0           ; vbroadcastss  0x6b3(%rip),%ymm8        # 6800 <_sk_callback_avx+0x4c2>
+  DB  196,98,125,24,5,178,6,0,0           ; vbroadcastss  0x6b2(%rip),%ymm8        # 6898 <_sk_callback_avx+0x4c1>
   DB  197,188,88,0                        ; vaddps        (%rax),%ymm8,%ymm0
   DB  197,124,16,72,64                    ; vmovups       0x40(%rax),%ymm9
-  DB  196,98,125,24,21,165,6,0,0          ; vbroadcastss  0x6a5(%rip),%ymm10        # 6804 <_sk_callback_avx+0x4c6>
+  DB  196,98,125,24,21,164,6,0,0          ; vbroadcastss  0x6a4(%rip),%ymm10        # 689c <_sk_callback_avx+0x4c5>
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
-  DB  196,98,125,24,29,155,6,0,0          ; vbroadcastss  0x69b(%rip),%ymm11        # 6808 <_sk_callback_avx+0x4ca>
+  DB  196,98,125,24,29,154,6,0,0          ; vbroadcastss  0x69a(%rip),%ymm11        # 68a0 <_sk_callback_avx+0x4c9>
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
   DB  196,65,44,88,192                    ; vaddps        %ymm8,%ymm10,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
-  DB  196,98,125,24,13,130,6,0,0          ; vbroadcastss  0x682(%rip),%ymm9        # 680c <_sk_callback_avx+0x4ce>
+  DB  196,98,125,24,13,129,6,0,0          ; vbroadcastss  0x681(%rip),%ymm9        # 68a4 <_sk_callback_avx+0x4cd>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10643,13 +10707,13 @@ _sk_bicubic_p1x_avx LABEL PROC
 PUBLIC _sk_bicubic_p3x_avx
 _sk_bicubic_p3x_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,5,106,6,0,0          ; vbroadcastss  0x66a(%rip),%ymm0        # 6810 <_sk_callback_avx+0x4d2>
+  DB  196,226,125,24,5,105,6,0,0          ; vbroadcastss  0x669(%rip),%ymm0        # 68a8 <_sk_callback_avx+0x4d1>
   DB  197,252,88,0                        ; vaddps        (%rax),%ymm0,%ymm0
   DB  197,124,16,64,64                    ; vmovups       0x40(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,87,6,0,0           ; vbroadcastss  0x657(%rip),%ymm10        # 6814 <_sk_callback_avx+0x4d6>
+  DB  196,98,125,24,21,86,6,0,0           ; vbroadcastss  0x656(%rip),%ymm10        # 68ac <_sk_callback_avx+0x4d5>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,77,6,0,0           ; vbroadcastss  0x64d(%rip),%ymm10        # 6818 <_sk_callback_avx+0x4da>
+  DB  196,98,125,24,21,76,6,0,0           ; vbroadcastss  0x64c(%rip),%ymm10        # 68b0 <_sk_callback_avx+0x4d9>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,128,0,0,0            ; vmovups       %ymm8,0x80(%rax)
@@ -10659,14 +10723,14 @@ _sk_bicubic_p3x_avx LABEL PROC
 PUBLIC _sk_bicubic_n3y_avx
 _sk_bicubic_n3y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,48,6,0,0          ; vbroadcastss  0x630(%rip),%ymm1        # 681c <_sk_callback_avx+0x4de>
+  DB  196,226,125,24,13,47,6,0,0          ; vbroadcastss  0x62f(%rip),%ymm1        # 68b4 <_sk_callback_avx+0x4dd>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,38,6,0,0            ; vbroadcastss  0x626(%rip),%ymm8        # 6820 <_sk_callback_avx+0x4e2>
+  DB  196,98,125,24,5,37,6,0,0            ; vbroadcastss  0x625(%rip),%ymm8        # 68b8 <_sk_callback_avx+0x4e1>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,23,6,0,0           ; vbroadcastss  0x617(%rip),%ymm10        # 6824 <_sk_callback_avx+0x4e6>
+  DB  196,98,125,24,21,22,6,0,0           ; vbroadcastss  0x616(%rip),%ymm10        # 68bc <_sk_callback_avx+0x4e5>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,13,6,0,0           ; vbroadcastss  0x60d(%rip),%ymm10        # 6828 <_sk_callback_avx+0x4ea>
+  DB  196,98,125,24,21,12,6,0,0           ; vbroadcastss  0x60c(%rip),%ymm10        # 68c0 <_sk_callback_avx+0x4e9>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -10676,19 +10740,19 @@ _sk_bicubic_n3y_avx LABEL PROC
 PUBLIC _sk_bicubic_n1y_avx
 _sk_bicubic_n1y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,240,5,0,0         ; vbroadcastss  0x5f0(%rip),%ymm1        # 682c <_sk_callback_avx+0x4ee>
+  DB  196,226,125,24,13,239,5,0,0         ; vbroadcastss  0x5ef(%rip),%ymm1        # 68c4 <_sk_callback_avx+0x4ed>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
-  DB  196,98,125,24,5,230,5,0,0           ; vbroadcastss  0x5e6(%rip),%ymm8        # 6830 <_sk_callback_avx+0x4f2>
+  DB  196,98,125,24,5,229,5,0,0           ; vbroadcastss  0x5e5(%rip),%ymm8        # 68c8 <_sk_callback_avx+0x4f1>
   DB  197,60,92,64,96                     ; vsubps        0x60(%rax),%ymm8,%ymm8
-  DB  196,98,125,24,13,220,5,0,0          ; vbroadcastss  0x5dc(%rip),%ymm9        # 6834 <_sk_callback_avx+0x4f6>
+  DB  196,98,125,24,13,219,5,0,0          ; vbroadcastss  0x5db(%rip),%ymm9        # 68cc <_sk_callback_avx+0x4f5>
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,210,5,0,0          ; vbroadcastss  0x5d2(%rip),%ymm10        # 6838 <_sk_callback_avx+0x4fa>
+  DB  196,98,125,24,21,209,5,0,0          ; vbroadcastss  0x5d1(%rip),%ymm10        # 68d0 <_sk_callback_avx+0x4f9>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,201                    ; vmulps        %ymm9,%ymm8,%ymm9
-  DB  196,98,125,24,21,195,5,0,0          ; vbroadcastss  0x5c3(%rip),%ymm10        # 683c <_sk_callback_avx+0x4fe>
+  DB  196,98,125,24,21,194,5,0,0          ; vbroadcastss  0x5c2(%rip),%ymm10        # 68d4 <_sk_callback_avx+0x4fd>
   DB  196,65,52,88,202                    ; vaddps        %ymm10,%ymm9,%ymm9
   DB  196,65,60,89,193                    ; vmulps        %ymm9,%ymm8,%ymm8
-  DB  196,98,125,24,13,180,5,0,0          ; vbroadcastss  0x5b4(%rip),%ymm9        # 6840 <_sk_callback_avx+0x502>
+  DB  196,98,125,24,13,179,5,0,0          ; vbroadcastss  0x5b3(%rip),%ymm9        # 68d8 <_sk_callback_avx+0x501>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10697,17 +10761,17 @@ _sk_bicubic_n1y_avx LABEL PROC
 PUBLIC _sk_bicubic_p1y_avx
 _sk_bicubic_p1y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,98,125,24,5,156,5,0,0           ; vbroadcastss  0x59c(%rip),%ymm8        # 6844 <_sk_callback_avx+0x506>
+  DB  196,98,125,24,5,155,5,0,0           ; vbroadcastss  0x59b(%rip),%ymm8        # 68dc <_sk_callback_avx+0x505>
   DB  197,188,88,72,32                    ; vaddps        0x20(%rax),%ymm8,%ymm1
   DB  197,124,16,72,96                    ; vmovups       0x60(%rax),%ymm9
-  DB  196,98,125,24,21,141,5,0,0          ; vbroadcastss  0x58d(%rip),%ymm10        # 6848 <_sk_callback_avx+0x50a>
+  DB  196,98,125,24,21,140,5,0,0          ; vbroadcastss  0x58c(%rip),%ymm10        # 68e0 <_sk_callback_avx+0x509>
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
-  DB  196,98,125,24,29,131,5,0,0          ; vbroadcastss  0x583(%rip),%ymm11        # 684c <_sk_callback_avx+0x50e>
+  DB  196,98,125,24,29,130,5,0,0          ; vbroadcastss  0x582(%rip),%ymm11        # 68e4 <_sk_callback_avx+0x50d>
   DB  196,65,44,88,211                    ; vaddps        %ymm11,%ymm10,%ymm10
   DB  196,65,52,89,210                    ; vmulps        %ymm10,%ymm9,%ymm10
   DB  196,65,44,88,192                    ; vaddps        %ymm8,%ymm10,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
-  DB  196,98,125,24,13,106,5,0,0          ; vbroadcastss  0x56a(%rip),%ymm9        # 6850 <_sk_callback_avx+0x512>
+  DB  196,98,125,24,13,105,5,0,0          ; vbroadcastss  0x569(%rip),%ymm9        # 68e8 <_sk_callback_avx+0x511>
   DB  196,65,60,88,193                    ; vaddps        %ymm9,%ymm8,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -10716,13 +10780,13 @@ _sk_bicubic_p1y_avx LABEL PROC
 PUBLIC _sk_bicubic_p3y_avx
 _sk_bicubic_p3y_avx LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  196,226,125,24,13,82,5,0,0          ; vbroadcastss  0x552(%rip),%ymm1        # 6854 <_sk_callback_avx+0x516>
+  DB  196,226,125,24,13,81,5,0,0          ; vbroadcastss  0x551(%rip),%ymm1        # 68ec <_sk_callback_avx+0x515>
   DB  197,244,88,72,32                    ; vaddps        0x20(%rax),%ymm1,%ymm1
   DB  197,124,16,64,96                    ; vmovups       0x60(%rax),%ymm8
   DB  196,65,60,89,200                    ; vmulps        %ymm8,%ymm8,%ymm9
-  DB  196,98,125,24,21,62,5,0,0           ; vbroadcastss  0x53e(%rip),%ymm10        # 6858 <_sk_callback_avx+0x51a>
+  DB  196,98,125,24,21,61,5,0,0           ; vbroadcastss  0x53d(%rip),%ymm10        # 68f0 <_sk_callback_avx+0x519>
   DB  196,65,60,89,194                    ; vmulps        %ymm10,%ymm8,%ymm8
-  DB  196,98,125,24,21,52,5,0,0           ; vbroadcastss  0x534(%rip),%ymm10        # 685c <_sk_callback_avx+0x51e>
+  DB  196,98,125,24,21,51,5,0,0           ; vbroadcastss  0x533(%rip),%ymm10        # 68f4 <_sk_callback_avx+0x51d>
   DB  196,65,60,88,194                    ; vaddps        %ymm10,%ymm8,%ymm8
   DB  196,65,52,89,192                    ; vmulps        %ymm8,%ymm9,%ymm8
   DB  197,124,17,128,160,0,0,0            ; vmovups       %ymm8,0xa0(%rax)
@@ -10836,25 +10900,25 @@ ALIGN 4
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 6509 <.literal4+0xb1>
+  DB  71,225,61                           ; rex.RXB       loope 65a1 <.literal4+0xb1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 6519 <.literal4+0xc1>
+  DB  71,225,61                           ; rex.RXB       loope 65b1 <.literal4+0xc1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 6529 <.literal4+0xd1>
+  DB  71,225,61                           ; rex.RXB       loope 65c1 <.literal4+0xd1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,154                          ; cmpb          $0x9a,(%rdi)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
   DB  62,61,10,23,63,174                  ; ds            cmp $0xae3f170a,%eax
-  DB  71,225,61                           ; rex.RXB       loope 6539 <.literal4+0xe1>
+  DB  71,225,61                           ; rex.RXB       loope 65d1 <.literal4+0xe1>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -10902,7 +10966,7 @@ ALIGN 4
   DB  190,129,128,128,59                  ; mov           $0x3b808081,%esi
   DB  129,128,128,59,0,248,0,0,8,33       ; addl          $0x21080000,-0x7ffc480(%rax)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        6585 <.literal4+0x12d>
+  DB  224,7                               ; loopne        661d <.literal4+0x12d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -10918,10 +10982,10 @@ ALIGN 4
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
   DB  0,52,255                            ; add           %dh,(%rdi,%rdi,8)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            65ac <.literal4+0x154>
+  DB  127,0                               ; jg            6644 <.literal4+0x154>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            6625 <.literal4+0x1cd>
+  DB  119,115                             ; ja            66bd <.literal4+0x1cd>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -10935,10 +10999,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            65e0 <.literal4+0x188>
+  DB  127,0                               ; jg            6678 <.literal4+0x188>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            6659 <.literal4+0x201>
+  DB  119,115                             ; ja            66f1 <.literal4+0x201>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -10952,10 +11016,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            6614 <.literal4+0x1bc>
+  DB  127,0                               ; jg            66ac <.literal4+0x1bc>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            668d <.literal4+0x235>
+  DB  119,115                             ; ja            6725 <.literal4+0x235>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -10969,10 +11033,10 @@ ALIGN 4
   DB  0,128,63,0,0,0                      ; add           %al,0x3f(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            6648 <.literal4+0x1f0>
+  DB  127,0                               ; jg            66e0 <.literal4+0x1f0>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            66c1 <.literal4+0x269>
+  DB  119,115                             ; ja            6759 <.literal4+0x269>
   DB  248                                 ; clc
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,249,68,180                   ; mov           $0xb444f93f,%edi
@@ -10985,7 +11049,7 @@ ALIGN 4
   DB  0,75,0                              ; add           %cl,0x0(%rbx)
   DB  0,128,63,0,0,200                    ; add           %al,-0x37ffffc1(%rax)
   DB  66,0,0                              ; rex.X         add %al,(%rax)
-  DB  127,67                              ; jg            66bf <.literal4+0x267>
+  DB  127,67                              ; jg            6757 <.literal4+0x267>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,195                               ; add           %al,%bl
   DB  0,0                                 ; add           %al,(%rax)
@@ -10997,10 +11061,10 @@ ALIGN 4
   DB  190,80,128,3,62                     ; mov           $0x3e038050,%esi
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           66df <.literal4+0x287>
+  DB  118,63                              ; jbe           6777 <.literal4+0x287>
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            66f3 <.literal4+0x29b>
+  DB  127,67                              ; jg            678b <.literal4+0x29b>
   DB  129,128,128,59,0,0,128,63,129,128   ; addl          $0x80813f80,0x3b80(%rax)
   DB  128,59,0                            ; cmpb          $0x0,(%rbx)
   DB  0,128,63,129,128,128                ; add           %al,-0x7f7f7ec1(%rax)
@@ -11009,7 +11073,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        66d5 <.literal4+0x27d>
+  DB  224,7                               ; loopne        676d <.literal4+0x27d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -11021,7 +11085,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        66f1 <.literal4+0x299>
+  DB  224,7                               ; loopne        6789 <.literal4+0x299>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -11032,7 +11096,7 @@ ALIGN 4
   DB  0,0                                 ; add           %al,(%rax)
   DB  248                                 ; clc
   DB  65,0,0                              ; add           %al,(%r8)
-  DB  124,66                              ; jl            6746 <.literal4+0x2ee>
+  DB  124,66                              ; jl            67de <.literal4+0x2ee>
   DB  0,240                               ; add           %dh,%al
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,55,0,15                 ; mov           %ecx,0xf003788(%rax)
@@ -11050,9 +11114,9 @@ ALIGN 4
   DB  137,136,136,59,15,0                 ; mov           %ecx,0xf3b88(%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  137,136,136,61,0,0                  ; mov           %ecx,0x3d88(%rax)
-  DB  112,65                              ; jo            6789 <.literal4+0x331>
+  DB  112,65                              ; jo            6821 <.literal4+0x331>
   DB  129,128,128,59,129,128,128,59,0,0   ; addl          $0x3b80,-0x7f7ec480(%rax)
-  DB  127,67                              ; jg            6797 <.literal4+0x33f>
+  DB  127,67                              ; jg            682f <.literal4+0x33f>
   DB  0,128,0,0,0,0                       ; add           %al,0x0(%rax)
   DB  0,128,0,4,0,128                     ; add           %al,-0x7ffffc00(%rax)
   DB  0,0                                 ; add           %al,(%rax)
@@ -11068,7 +11132,7 @@ ALIGN 4
   DB  0,128,55,0,0,128                    ; add           %al,-0x7fffffc9(%rax)
   DB  63                                  ; (bad)
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            67d7 <.literal4+0x37f>
+  DB  127,71                              ; jg            686f <.literal4+0x37f>
   DB  208                                 ; (bad)
   DB  179,89                              ; mov           $0x59,%bl
   DB  62,89                               ; ds            pop %rcx
@@ -11319,7 +11383,7 @@ _sk_seed_shader_sse41 LABEL PROC
   DB  102,15,110,199                      ; movd          %edi,%xmm0
   DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
   DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
-  DB  15,40,21,161,70,0,0                 ; movaps        0x46a1(%rip),%xmm2        # 47b0 <_sk_callback_sse41+0xb1>
+  DB  15,40,21,97,71,0,0                  ; movaps        0x4761(%rip),%xmm2        # 4870 <_sk_callback_sse41+0xb7>
   DB  15,88,202                           ; addps         %xmm2,%xmm1
   DB  15,16,2                             ; movups        (%rdx),%xmm0
   DB  15,88,193                           ; addps         %xmm1,%xmm0
@@ -11328,7 +11392,7 @@ _sk_seed_shader_sse41 LABEL PROC
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  15,88,202                           ; addps         %xmm2,%xmm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,21,144,70,0,0                 ; movaps        0x4690(%rip),%xmm2        # 47c0 <_sk_callback_sse41+0xc1>
+  DB  15,40,21,80,71,0,0                  ; movaps        0x4750(%rip),%xmm2        # 4880 <_sk_callback_sse41+0xc7>
   DB  15,87,219                           ; xorps         %xmm3,%xmm3
   DB  15,87,228                           ; xorps         %xmm4,%xmm4
   DB  15,87,237                           ; xorps         %xmm5,%xmm5
@@ -11349,14 +11413,14 @@ _sk_dither_sse41 LABEL PROC
   DB  102,68,15,110,1                     ; movd          (%rcx),%xmm8
   DB  102,69,15,112,192,0                 ; pshufd        $0x0,%xmm8,%xmm8
   DB  102,69,15,239,193                   ; pxor          %xmm9,%xmm8
-  DB  102,68,15,111,21,85,70,0,0          ; movdqa        0x4655(%rip),%xmm10        # 47d0 <_sk_callback_sse41+0xd1>
+  DB  102,68,15,111,21,21,71,0,0          ; movdqa        0x4715(%rip),%xmm10        # 4890 <_sk_callback_sse41+0xd7>
   DB  102,69,15,111,216                   ; movdqa        %xmm8,%xmm11
   DB  102,69,15,219,218                   ; pand          %xmm10,%xmm11
   DB  102,65,15,114,243,5                 ; pslld         $0x5,%xmm11
   DB  102,69,15,219,209                   ; pand          %xmm9,%xmm10
   DB  102,65,15,114,242,4                 ; pslld         $0x4,%xmm10
-  DB  102,68,15,111,37,65,70,0,0          ; movdqa        0x4641(%rip),%xmm12        # 47e0 <_sk_callback_sse41+0xe1>
-  DB  102,68,15,111,45,72,70,0,0          ; movdqa        0x4648(%rip),%xmm13        # 47f0 <_sk_callback_sse41+0xf1>
+  DB  102,68,15,111,37,1,71,0,0           ; movdqa        0x4701(%rip),%xmm12        # 48a0 <_sk_callback_sse41+0xe7>
+  DB  102,68,15,111,45,8,71,0,0           ; movdqa        0x4708(%rip),%xmm13        # 48b0 <_sk_callback_sse41+0xf7>
   DB  102,69,15,111,240                   ; movdqa        %xmm8,%xmm14
   DB  102,69,15,219,245                   ; pand          %xmm13,%xmm14
   DB  102,65,15,114,246,2                 ; pslld         $0x2,%xmm14
@@ -11372,8 +11436,8 @@ _sk_dither_sse41 LABEL PROC
   DB  102,69,15,235,245                   ; por           %xmm13,%xmm14
   DB  102,69,15,235,240                   ; por           %xmm8,%xmm14
   DB  69,15,91,198                        ; cvtdq2ps      %xmm14,%xmm8
-  DB  68,15,89,5,3,70,0,0                 ; mulps         0x4603(%rip),%xmm8        # 4800 <_sk_callback_sse41+0x101>
-  DB  68,15,88,5,11,70,0,0                ; addps         0x460b(%rip),%xmm8        # 4810 <_sk_callback_sse41+0x111>
+  DB  68,15,89,5,195,70,0,0               ; mulps         0x46c3(%rip),%xmm8        # 48c0 <_sk_callback_sse41+0x107>
+  DB  68,15,88,5,203,70,0,0               ; addps         0x46cb(%rip),%xmm8        # 48d0 <_sk_callback_sse41+0x117>
   DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
   DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
   DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
@@ -11440,7 +11504,7 @@ _sk_clear_sse41 LABEL PROC
 PUBLIC _sk_srcatop_sse41
 _sk_srcatop_sse41 LABEL PROC
   DB  15,89,199                           ; mulps         %xmm7,%xmm0
-  DB  68,15,40,5,100,69,0,0               ; movaps        0x4564(%rip),%xmm8        # 4820 <_sk_callback_sse41+0x121>
+  DB  68,15,40,5,36,70,0,0                ; movaps        0x4624(%rip),%xmm8        # 48e0 <_sk_callback_sse41+0x127>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
@@ -11463,7 +11527,7 @@ PUBLIC _sk_dstatop_sse41
 _sk_dstatop_sse41 LABEL PROC
   DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
   DB  68,15,89,196                        ; mulps         %xmm4,%xmm8
-  DB  68,15,40,13,39,69,0,0               ; movaps        0x4527(%rip),%xmm9        # 4830 <_sk_callback_sse41+0x131>
+  DB  68,15,40,13,231,69,0,0              ; movaps        0x45e7(%rip),%xmm9        # 48f0 <_sk_callback_sse41+0x137>
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  65,15,88,192                        ; addps         %xmm8,%xmm0
@@ -11504,7 +11568,7 @@ _sk_dstin_sse41 LABEL PROC
 
 PUBLIC _sk_srcout_sse41
 _sk_srcout_sse41 LABEL PROC
-  DB  68,15,40,5,203,68,0,0               ; movaps        0x44cb(%rip),%xmm8        # 4840 <_sk_callback_sse41+0x141>
+  DB  68,15,40,5,139,69,0,0               ; movaps        0x458b(%rip),%xmm8        # 4900 <_sk_callback_sse41+0x147>
   DB  68,15,92,199                        ; subps         %xmm7,%xmm8
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
@@ -11515,7 +11579,7 @@ _sk_srcout_sse41 LABEL PROC
 
 PUBLIC _sk_dstout_sse41
 _sk_dstout_sse41 LABEL PROC
-  DB  68,15,40,5,187,68,0,0               ; movaps        0x44bb(%rip),%xmm8        # 4850 <_sk_callback_sse41+0x151>
+  DB  68,15,40,5,123,69,0,0               ; movaps        0x457b(%rip),%xmm8        # 4910 <_sk_callback_sse41+0x157>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  15,89,196                           ; mulps         %xmm4,%xmm0
@@ -11530,7 +11594,7 @@ _sk_dstout_sse41 LABEL PROC
 
 PUBLIC _sk_srcover_sse41
 _sk_srcover_sse41 LABEL PROC
-  DB  68,15,40,5,158,68,0,0               ; movaps        0x449e(%rip),%xmm8        # 4860 <_sk_callback_sse41+0x161>
+  DB  68,15,40,5,94,69,0,0                ; movaps        0x455e(%rip),%xmm8        # 4920 <_sk_callback_sse41+0x167>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
@@ -11548,7 +11612,7 @@ _sk_srcover_sse41 LABEL PROC
 
 PUBLIC _sk_dstover_sse41
 _sk_dstover_sse41 LABEL PROC
-  DB  68,15,40,5,114,68,0,0               ; movaps        0x4472(%rip),%xmm8        # 4870 <_sk_callback_sse41+0x171>
+  DB  68,15,40,5,50,69,0,0                ; movaps        0x4532(%rip),%xmm8        # 4930 <_sk_callback_sse41+0x177>
   DB  68,15,92,199                        ; subps         %xmm7,%xmm8
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -11572,7 +11636,7 @@ _sk_modulate_sse41 LABEL PROC
 
 PUBLIC _sk_multiply_sse41
 _sk_multiply_sse41 LABEL PROC
-  DB  68,15,40,5,70,68,0,0                ; movaps        0x4446(%rip),%xmm8        # 4880 <_sk_callback_sse41+0x181>
+  DB  68,15,40,5,6,69,0,0                 ; movaps        0x4506(%rip),%xmm8        # 4940 <_sk_callback_sse41+0x187>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
@@ -11642,7 +11706,7 @@ _sk_screen_sse41 LABEL PROC
 PUBLIC _sk_xor__sse41
 _sk_xor__sse41 LABEL PROC
   DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
-  DB  15,40,29,119,67,0,0                 ; movaps        0x4377(%rip),%xmm3        # 4890 <_sk_callback_sse41+0x191>
+  DB  15,40,29,55,68,0,0                  ; movaps        0x4437(%rip),%xmm3        # 4950 <_sk_callback_sse41+0x197>
   DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
@@ -11688,7 +11752,7 @@ _sk_darken_sse41 LABEL PROC
   DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
   DB  65,15,95,209                        ; maxps         %xmm9,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,226,66,0,0                 ; movaps        0x42e2(%rip),%xmm2        # 48a0 <_sk_callback_sse41+0x1a1>
+  DB  15,40,21,162,67,0,0                 ; movaps        0x43a2(%rip),%xmm2        # 4960 <_sk_callback_sse41+0x1a7>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -11720,7 +11784,7 @@ _sk_lighten_sse41 LABEL PROC
   DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
   DB  65,15,93,209                        ; minps         %xmm9,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,135,66,0,0                 ; movaps        0x4287(%rip),%xmm2        # 48b0 <_sk_callback_sse41+0x1b1>
+  DB  15,40,21,71,67,0,0                  ; movaps        0x4347(%rip),%xmm2        # 4970 <_sk_callback_sse41+0x1b7>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -11755,7 +11819,7 @@ _sk_difference_sse41 LABEL PROC
   DB  65,15,93,209                        ; minps         %xmm9,%xmm2
   DB  15,88,210                           ; addps         %xmm2,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,33,66,0,0                  ; movaps        0x4221(%rip),%xmm2        # 48c0 <_sk_callback_sse41+0x1c1>
+  DB  15,40,21,225,66,0,0                 ; movaps        0x42e1(%rip),%xmm2        # 4980 <_sk_callback_sse41+0x1c7>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -11780,7 +11844,7 @@ _sk_exclusion_sse41 LABEL PROC
   DB  15,89,214                           ; mulps         %xmm6,%xmm2
   DB  15,88,210                           ; addps         %xmm2,%xmm2
   DB  68,15,92,202                        ; subps         %xmm2,%xmm9
-  DB  15,40,13,226,65,0,0                 ; movaps        0x41e2(%rip),%xmm1        # 48d0 <_sk_callback_sse41+0x1d1>
+  DB  15,40,13,162,66,0,0                 ; movaps        0x42a2(%rip),%xmm1        # 4990 <_sk_callback_sse41+0x1d7>
   DB  15,92,203                           ; subps         %xmm3,%xmm1
   DB  15,89,207                           ; mulps         %xmm7,%xmm1
   DB  15,88,217                           ; addps         %xmm1,%xmm3
@@ -11792,7 +11856,7 @@ _sk_exclusion_sse41 LABEL PROC
 PUBLIC _sk_colorburn_sse41
 _sk_colorburn_sse41 LABEL PROC
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
-  DB  68,15,40,21,209,65,0,0              ; movaps        0x41d1(%rip),%xmm10        # 48e0 <_sk_callback_sse41+0x1e1>
+  DB  68,15,40,21,145,66,0,0              ; movaps        0x4291(%rip),%xmm10        # 49a0 <_sk_callback_sse41+0x1e7>
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
   DB  68,15,92,223                        ; subps         %xmm7,%xmm11
   DB  69,15,40,203                        ; movaps        %xmm11,%xmm9
@@ -11872,7 +11936,7 @@ _sk_colorburn_sse41 LABEL PROC
 PUBLIC _sk_colordodge_sse41
 _sk_colordodge_sse41 LABEL PROC
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
-  DB  68,15,40,21,175,64,0,0              ; movaps        0x40af(%rip),%xmm10        # 48f0 <_sk_callback_sse41+0x1f1>
+  DB  68,15,40,21,111,65,0,0              ; movaps        0x416f(%rip),%xmm10        # 49b0 <_sk_callback_sse41+0x1f7>
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
   DB  68,15,92,223                        ; subps         %xmm7,%xmm11
   DB  69,15,40,227                        ; movaps        %xmm11,%xmm12
@@ -11953,7 +12017,7 @@ _sk_hardlight_sse41 LABEL PROC
   DB  15,40,244                           ; movaps        %xmm4,%xmm6
   DB  15,40,227                           ; movaps        %xmm3,%xmm4
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
-  DB  68,15,40,21,133,63,0,0              ; movaps        0x3f85(%rip),%xmm10        # 4900 <_sk_callback_sse41+0x201>
+  DB  68,15,40,21,69,64,0,0               ; movaps        0x4045(%rip),%xmm10        # 49c0 <_sk_callback_sse41+0x207>
   DB  65,15,40,234                        ; movaps        %xmm10,%xmm5
   DB  15,92,239                           ; subps         %xmm7,%xmm5
   DB  15,40,197                           ; movaps        %xmm5,%xmm0
@@ -12035,7 +12099,7 @@ PUBLIC _sk_overlay_sse41
 _sk_overlay_sse41 LABEL PROC
   DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
   DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
-  DB  68,15,40,21,103,62,0,0              ; movaps        0x3e67(%rip),%xmm10        # 4910 <_sk_callback_sse41+0x211>
+  DB  68,15,40,21,39,63,0,0               ; movaps        0x3f27(%rip),%xmm10        # 49d0 <_sk_callback_sse41+0x217>
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
   DB  68,15,92,223                        ; subps         %xmm7,%xmm11
   DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
@@ -12119,7 +12183,7 @@ _sk_softlight_sse41 LABEL PROC
   DB  15,40,198                           ; movaps        %xmm6,%xmm0
   DB  15,94,199                           ; divps         %xmm7,%xmm0
   DB  65,15,84,193                        ; andps         %xmm9,%xmm0
-  DB  15,40,13,58,61,0,0                  ; movaps        0x3d3a(%rip),%xmm1        # 4920 <_sk_callback_sse41+0x221>
+  DB  15,40,13,250,61,0,0                 ; movaps        0x3dfa(%rip),%xmm1        # 49e0 <_sk_callback_sse41+0x227>
   DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
   DB  68,15,92,208                        ; subps         %xmm0,%xmm10
   DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
@@ -12132,10 +12196,10 @@ _sk_softlight_sse41 LABEL PROC
   DB  15,40,208                           ; movaps        %xmm0,%xmm2
   DB  15,89,210                           ; mulps         %xmm2,%xmm2
   DB  15,88,208                           ; addps         %xmm0,%xmm2
-  DB  68,15,40,45,24,61,0,0               ; movaps        0x3d18(%rip),%xmm13        # 4930 <_sk_callback_sse41+0x231>
+  DB  68,15,40,45,216,61,0,0              ; movaps        0x3dd8(%rip),%xmm13        # 49f0 <_sk_callback_sse41+0x237>
   DB  69,15,88,245                        ; addps         %xmm13,%xmm14
   DB  68,15,89,242                        ; mulps         %xmm2,%xmm14
-  DB  68,15,40,37,24,61,0,0               ; movaps        0x3d18(%rip),%xmm12        # 4940 <_sk_callback_sse41+0x241>
+  DB  68,15,40,37,216,61,0,0              ; movaps        0x3dd8(%rip),%xmm12        # 4a00 <_sk_callback_sse41+0x247>
   DB  69,15,89,252                        ; mulps         %xmm12,%xmm15
   DB  69,15,88,254                        ; addps         %xmm14,%xmm15
   DB  15,40,198                           ; movaps        %xmm6,%xmm0
@@ -12321,12 +12385,12 @@ _sk_hue_sse41 LABEL PROC
   DB  68,15,84,208                        ; andps         %xmm0,%xmm10
   DB  15,84,200                           ; andps         %xmm0,%xmm1
   DB  68,15,84,232                        ; andps         %xmm0,%xmm13
-  DB  15,40,5,126,58,0,0                  ; movaps        0x3a7e(%rip),%xmm0        # 4950 <_sk_callback_sse41+0x251>
+  DB  15,40,5,62,59,0,0                   ; movaps        0x3b3e(%rip),%xmm0        # 4a10 <_sk_callback_sse41+0x257>
   DB  68,15,89,224                        ; mulps         %xmm0,%xmm12
-  DB  15,40,21,131,58,0,0                 ; movaps        0x3a83(%rip),%xmm2        # 4960 <_sk_callback_sse41+0x261>
+  DB  15,40,21,67,59,0,0                  ; movaps        0x3b43(%rip),%xmm2        # 4a20 <_sk_callback_sse41+0x267>
   DB  15,89,250                           ; mulps         %xmm2,%xmm7
   DB  65,15,88,252                        ; addps         %xmm12,%xmm7
-  DB  68,15,40,53,132,58,0,0              ; movaps        0x3a84(%rip),%xmm14        # 4970 <_sk_callback_sse41+0x271>
+  DB  68,15,40,53,68,59,0,0               ; movaps        0x3b44(%rip),%xmm14        # 4a30 <_sk_callback_sse41+0x277>
   DB  68,15,40,252                        ; movaps        %xmm4,%xmm15
   DB  69,15,89,254                        ; mulps         %xmm14,%xmm15
   DB  68,15,88,255                        ; addps         %xmm7,%xmm15
@@ -12409,7 +12473,7 @@ _sk_hue_sse41 LABEL PROC
   DB  65,15,88,214                        ; addps         %xmm14,%xmm2
   DB  15,40,196                           ; movaps        %xmm4,%xmm0
   DB  102,15,56,20,202                    ; blendvps      %xmm0,%xmm2,%xmm1
-  DB  68,15,40,13,73,57,0,0               ; movaps        0x3949(%rip),%xmm9        # 4980 <_sk_callback_sse41+0x281>
+  DB  68,15,40,13,9,58,0,0                ; movaps        0x3a09(%rip),%xmm9        # 4a40 <_sk_callback_sse41+0x287>
   DB  65,15,40,225                        ; movaps        %xmm9,%xmm4
   DB  15,92,229                           ; subps         %xmm5,%xmm4
   DB  15,40,68,36,48                      ; movaps        0x30(%rsp),%xmm0
@@ -12503,14 +12567,14 @@ _sk_saturation_sse41 LABEL PROC
   DB  68,15,84,215                        ; andps         %xmm7,%xmm10
   DB  68,15,84,223                        ; andps         %xmm7,%xmm11
   DB  68,15,84,199                        ; andps         %xmm7,%xmm8
-  DB  15,40,21,252,55,0,0                 ; movaps        0x37fc(%rip),%xmm2        # 4990 <_sk_callback_sse41+0x291>
+  DB  15,40,21,188,56,0,0                 ; movaps        0x38bc(%rip),%xmm2        # 4a50 <_sk_callback_sse41+0x297>
   DB  15,40,221                           ; movaps        %xmm5,%xmm3
   DB  15,89,218                           ; mulps         %xmm2,%xmm3
-  DB  15,40,13,255,55,0,0                 ; movaps        0x37ff(%rip),%xmm1        # 49a0 <_sk_callback_sse41+0x2a1>
+  DB  15,40,13,191,56,0,0                 ; movaps        0x38bf(%rip),%xmm1        # 4a60 <_sk_callback_sse41+0x2a7>
   DB  15,40,254                           ; movaps        %xmm6,%xmm7
   DB  15,89,249                           ; mulps         %xmm1,%xmm7
   DB  15,88,251                           ; addps         %xmm3,%xmm7
-  DB  68,15,40,45,254,55,0,0              ; movaps        0x37fe(%rip),%xmm13        # 49b0 <_sk_callback_sse41+0x2b1>
+  DB  68,15,40,45,190,56,0,0              ; movaps        0x38be(%rip),%xmm13        # 4a70 <_sk_callback_sse41+0x2b7>
   DB  69,15,89,245                        ; mulps         %xmm13,%xmm14
   DB  68,15,88,247                        ; addps         %xmm7,%xmm14
   DB  65,15,40,218                        ; movaps        %xmm10,%xmm3
@@ -12591,7 +12655,7 @@ _sk_saturation_sse41 LABEL PROC
   DB  65,15,88,253                        ; addps         %xmm13,%xmm7
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  102,68,15,56,20,223                 ; blendvps      %xmm0,%xmm7,%xmm11
-  DB  68,15,40,13,196,54,0,0              ; movaps        0x36c4(%rip),%xmm9        # 49c0 <_sk_callback_sse41+0x2c1>
+  DB  68,15,40,13,132,55,0,0              ; movaps        0x3784(%rip),%xmm9        # 4a80 <_sk_callback_sse41+0x2c7>
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  68,15,92,204                        ; subps         %xmm4,%xmm9
   DB  15,40,60,36                         ; movaps        (%rsp),%xmm7
@@ -12646,14 +12710,14 @@ _sk_color_sse41 LABEL PROC
   DB  15,40,231                           ; movaps        %xmm7,%xmm4
   DB  68,15,89,244                        ; mulps         %xmm4,%xmm14
   DB  15,89,204                           ; mulps         %xmm4,%xmm1
-  DB  68,15,40,13,9,54,0,0                ; movaps        0x3609(%rip),%xmm9        # 49d0 <_sk_callback_sse41+0x2d1>
+  DB  68,15,40,13,201,54,0,0              ; movaps        0x36c9(%rip),%xmm9        # 4a90 <_sk_callback_sse41+0x2d7>
   DB  65,15,40,250                        ; movaps        %xmm10,%xmm7
   DB  65,15,89,249                        ; mulps         %xmm9,%xmm7
-  DB  68,15,40,21,9,54,0,0                ; movaps        0x3609(%rip),%xmm10        # 49e0 <_sk_callback_sse41+0x2e1>
+  DB  68,15,40,21,201,54,0,0              ; movaps        0x36c9(%rip),%xmm10        # 4aa0 <_sk_callback_sse41+0x2e7>
   DB  65,15,40,219                        ; movaps        %xmm11,%xmm3
   DB  65,15,89,218                        ; mulps         %xmm10,%xmm3
   DB  15,88,223                           ; addps         %xmm7,%xmm3
-  DB  68,15,40,29,6,54,0,0                ; movaps        0x3606(%rip),%xmm11        # 49f0 <_sk_callback_sse41+0x2f1>
+  DB  68,15,40,29,198,54,0,0              ; movaps        0x36c6(%rip),%xmm11        # 4ab0 <_sk_callback_sse41+0x2f7>
   DB  69,15,40,236                        ; movaps        %xmm12,%xmm13
   DB  69,15,89,235                        ; mulps         %xmm11,%xmm13
   DB  68,15,88,235                        ; addps         %xmm3,%xmm13
@@ -12738,7 +12802,7 @@ _sk_color_sse41 LABEL PROC
   DB  65,15,88,251                        ; addps         %xmm11,%xmm7
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  102,15,56,20,207                    ; blendvps      %xmm0,%xmm7,%xmm1
-  DB  68,15,40,13,194,52,0,0              ; movaps        0x34c2(%rip),%xmm9        # 4a00 <_sk_callback_sse41+0x301>
+  DB  68,15,40,13,130,53,0,0              ; movaps        0x3582(%rip),%xmm9        # 4ac0 <_sk_callback_sse41+0x307>
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  15,92,196                           ; subps         %xmm4,%xmm0
   DB  68,15,89,192                        ; mulps         %xmm0,%xmm8
@@ -12790,13 +12854,13 @@ _sk_luminosity_sse41 LABEL PROC
   DB  69,15,89,216                        ; mulps         %xmm8,%xmm11
   DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
   DB  68,15,89,205                        ; mulps         %xmm5,%xmm9
-  DB  68,15,40,5,20,52,0,0                ; movaps        0x3414(%rip),%xmm8        # 4a10 <_sk_callback_sse41+0x311>
+  DB  68,15,40,5,212,52,0,0               ; movaps        0x34d4(%rip),%xmm8        # 4ad0 <_sk_callback_sse41+0x317>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
-  DB  68,15,40,21,24,52,0,0               ; movaps        0x3418(%rip),%xmm10        # 4a20 <_sk_callback_sse41+0x321>
+  DB  68,15,40,21,216,52,0,0              ; movaps        0x34d8(%rip),%xmm10        # 4ae0 <_sk_callback_sse41+0x327>
   DB  15,40,233                           ; movaps        %xmm1,%xmm5
   DB  65,15,89,234                        ; mulps         %xmm10,%xmm5
   DB  15,88,232                           ; addps         %xmm0,%xmm5
-  DB  68,15,40,37,22,52,0,0               ; movaps        0x3416(%rip),%xmm12        # 4a30 <_sk_callback_sse41+0x331>
+  DB  68,15,40,37,214,52,0,0              ; movaps        0x34d6(%rip),%xmm12        # 4af0 <_sk_callback_sse41+0x337>
   DB  68,15,40,242                        ; movaps        %xmm2,%xmm14
   DB  69,15,89,244                        ; mulps         %xmm12,%xmm14
   DB  68,15,88,245                        ; addps         %xmm5,%xmm14
@@ -12881,7 +12945,7 @@ _sk_luminosity_sse41 LABEL PROC
   DB  65,15,88,244                        ; addps         %xmm12,%xmm6
   DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
   DB  102,68,15,56,20,206                 ; blendvps      %xmm0,%xmm6,%xmm9
-  DB  15,40,5,204,50,0,0                  ; movaps        0x32cc(%rip),%xmm0        # 4a40 <_sk_callback_sse41+0x341>
+  DB  15,40,5,140,51,0,0                  ; movaps        0x338c(%rip),%xmm0        # 4b00 <_sk_callback_sse41+0x347>
   DB  15,40,208                           ; movaps        %xmm0,%xmm2
   DB  15,92,215                           ; subps         %xmm7,%xmm2
   DB  15,89,226                           ; mulps         %xmm2,%xmm4
@@ -12927,7 +12991,7 @@ _sk_clamp_0_sse41 LABEL PROC
 
 PUBLIC _sk_clamp_1_sse41
 _sk_clamp_1_sse41 LABEL PROC
-  DB  68,15,40,5,76,50,0,0                ; movaps        0x324c(%rip),%xmm8        # 4a50 <_sk_callback_sse41+0x351>
+  DB  68,15,40,5,12,51,0,0                ; movaps        0x330c(%rip),%xmm8        # 4b10 <_sk_callback_sse41+0x357>
   DB  65,15,93,192                        ; minps         %xmm8,%xmm0
   DB  65,15,93,200                        ; minps         %xmm8,%xmm1
   DB  65,15,93,208                        ; minps         %xmm8,%xmm2
@@ -12937,7 +13001,7 @@ _sk_clamp_1_sse41 LABEL PROC
 
 PUBLIC _sk_clamp_a_sse41
 _sk_clamp_a_sse41 LABEL PROC
-  DB  15,93,29,65,50,0,0                  ; minps         0x3241(%rip),%xmm3        # 4a60 <_sk_callback_sse41+0x361>
+  DB  15,93,29,1,51,0,0                   ; minps         0x3301(%rip),%xmm3        # 4b20 <_sk_callback_sse41+0x367>
   DB  15,93,195                           ; minps         %xmm3,%xmm0
   DB  15,93,203                           ; minps         %xmm3,%xmm1
   DB  15,93,211                           ; minps         %xmm3,%xmm2
@@ -13010,7 +13074,7 @@ _sk_premul_sse41 LABEL PROC
 PUBLIC _sk_unpremul_sse41
 _sk_unpremul_sse41 LABEL PROC
   DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
-  DB  68,15,40,13,172,49,0,0              ; movaps        0x31ac(%rip),%xmm9        # 4a70 <_sk_callback_sse41+0x371>
+  DB  68,15,40,13,108,50,0,0              ; movaps        0x326c(%rip),%xmm9        # 4b30 <_sk_callback_sse41+0x377>
   DB  68,15,94,203                        ; divps         %xmm3,%xmm9
   DB  68,15,194,195,4                     ; cmpneqps      %xmm3,%xmm8
   DB  69,15,84,193                        ; andps         %xmm9,%xmm8
@@ -13022,20 +13086,20 @@ _sk_unpremul_sse41 LABEL PROC
 
 PUBLIC _sk_from_srgb_sse41
 _sk_from_srgb_sse41 LABEL PROC
-  DB  68,15,40,29,151,49,0,0              ; movaps        0x3197(%rip),%xmm11        # 4a80 <_sk_callback_sse41+0x381>
+  DB  68,15,40,29,87,50,0,0               ; movaps        0x3257(%rip),%xmm11        # 4b40 <_sk_callback_sse41+0x387>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
   DB  68,15,40,208                        ; movaps        %xmm0,%xmm10
   DB  69,15,89,210                        ; mulps         %xmm10,%xmm10
-  DB  68,15,40,37,143,49,0,0              ; movaps        0x318f(%rip),%xmm12        # 4a90 <_sk_callback_sse41+0x391>
+  DB  68,15,40,37,79,50,0,0               ; movaps        0x324f(%rip),%xmm12        # 4b50 <_sk_callback_sse41+0x397>
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
   DB  69,15,89,196                        ; mulps         %xmm12,%xmm8
-  DB  68,15,40,45,143,49,0,0              ; movaps        0x318f(%rip),%xmm13        # 4aa0 <_sk_callback_sse41+0x3a1>
+  DB  68,15,40,45,79,50,0,0               ; movaps        0x324f(%rip),%xmm13        # 4b60 <_sk_callback_sse41+0x3a7>
   DB  69,15,88,197                        ; addps         %xmm13,%xmm8
   DB  69,15,89,194                        ; mulps         %xmm10,%xmm8
-  DB  68,15,40,53,143,49,0,0              ; movaps        0x318f(%rip),%xmm14        # 4ab0 <_sk_callback_sse41+0x3b1>
+  DB  68,15,40,53,79,50,0,0               ; movaps        0x324f(%rip),%xmm14        # 4b70 <_sk_callback_sse41+0x3b7>
   DB  69,15,88,198                        ; addps         %xmm14,%xmm8
-  DB  68,15,40,61,147,49,0,0              ; movaps        0x3193(%rip),%xmm15        # 4ac0 <_sk_callback_sse41+0x3c1>
+  DB  68,15,40,61,83,50,0,0               ; movaps        0x3253(%rip),%xmm15        # 4b80 <_sk_callback_sse41+0x3c7>
   DB  65,15,194,199,1                     ; cmpltps       %xmm15,%xmm0
   DB  102,69,15,56,20,193                 ; blendvps      %xmm0,%xmm9,%xmm8
   DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
@@ -13077,22 +13141,22 @@ _sk_to_srgb_sse41 LABEL PROC
   DB  15,40,218                           ; movaps        %xmm2,%xmm3
   DB  15,40,209                           ; movaps        %xmm1,%xmm2
   DB  68,15,82,192                        ; rsqrtps       %xmm0,%xmm8
-  DB  68,15,40,29,8,49,0,0                ; movaps        0x3108(%rip),%xmm11        # 4ad0 <_sk_callback_sse41+0x3d1>
+  DB  68,15,40,29,200,49,0,0              ; movaps        0x31c8(%rip),%xmm11        # 4b90 <_sk_callback_sse41+0x3d7>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
-  DB  68,15,40,37,8,49,0,0                ; movaps        0x3108(%rip),%xmm12        # 4ae0 <_sk_callback_sse41+0x3e1>
+  DB  68,15,40,37,200,49,0,0              ; movaps        0x31c8(%rip),%xmm12        # 4ba0 <_sk_callback_sse41+0x3e7>
   DB  69,15,40,248                        ; movaps        %xmm8,%xmm15
   DB  69,15,89,252                        ; mulps         %xmm12,%xmm15
-  DB  68,15,40,21,8,49,0,0                ; movaps        0x3108(%rip),%xmm10        # 4af0 <_sk_callback_sse41+0x3f1>
+  DB  68,15,40,21,200,49,0,0              ; movaps        0x31c8(%rip),%xmm10        # 4bb0 <_sk_callback_sse41+0x3f7>
   DB  69,15,88,250                        ; addps         %xmm10,%xmm15
   DB  69,15,89,248                        ; mulps         %xmm8,%xmm15
-  DB  68,15,40,45,8,49,0,0                ; movaps        0x3108(%rip),%xmm13        # 4b00 <_sk_callback_sse41+0x401>
+  DB  68,15,40,45,200,49,0,0              ; movaps        0x31c8(%rip),%xmm13        # 4bc0 <_sk_callback_sse41+0x407>
   DB  69,15,88,253                        ; addps         %xmm13,%xmm15
-  DB  68,15,40,53,12,49,0,0               ; movaps        0x310c(%rip),%xmm14        # 4b10 <_sk_callback_sse41+0x411>
+  DB  68,15,40,53,204,49,0,0              ; movaps        0x31cc(%rip),%xmm14        # 4bd0 <_sk_callback_sse41+0x417>
   DB  69,15,88,198                        ; addps         %xmm14,%xmm8
   DB  69,15,83,192                        ; rcpps         %xmm8,%xmm8
   DB  69,15,89,199                        ; mulps         %xmm15,%xmm8
-  DB  68,15,40,61,8,49,0,0                ; movaps        0x3108(%rip),%xmm15        # 4b20 <_sk_callback_sse41+0x421>
+  DB  68,15,40,61,200,49,0,0              ; movaps        0x31c8(%rip),%xmm15        # 4be0 <_sk_callback_sse41+0x427>
   DB  65,15,194,199,1                     ; cmpltps       %xmm15,%xmm0
   DB  102,69,15,56,20,193                 ; blendvps      %xmm0,%xmm9,%xmm8
   DB  68,15,82,202                        ; rsqrtps       %xmm2,%xmm9
@@ -13144,7 +13208,7 @@ _sk_rgb_to_hsl_sse41 LABEL PROC
   DB  68,15,93,226                        ; minps         %xmm2,%xmm12
   DB  65,15,40,203                        ; movaps        %xmm11,%xmm1
   DB  65,15,92,204                        ; subps         %xmm12,%xmm1
-  DB  68,15,40,53,82,48,0,0               ; movaps        0x3052(%rip),%xmm14        # 4b30 <_sk_callback_sse41+0x431>
+  DB  68,15,40,53,18,49,0,0               ; movaps        0x3112(%rip),%xmm14        # 4bf0 <_sk_callback_sse41+0x437>
   DB  68,15,94,241                        ; divps         %xmm1,%xmm14
   DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
   DB  69,15,194,208,0                     ; cmpeqps       %xmm8,%xmm10
@@ -13153,27 +13217,27 @@ _sk_rgb_to_hsl_sse41 LABEL PROC
   DB  65,15,89,198                        ; mulps         %xmm14,%xmm0
   DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
   DB  68,15,194,250,1                     ; cmpltps       %xmm2,%xmm15
-  DB  68,15,84,61,57,48,0,0               ; andps         0x3039(%rip),%xmm15        # 4b40 <_sk_callback_sse41+0x441>
+  DB  68,15,84,61,249,48,0,0              ; andps         0x30f9(%rip),%xmm15        # 4c00 <_sk_callback_sse41+0x447>
   DB  68,15,88,248                        ; addps         %xmm0,%xmm15
   DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
   DB  65,15,194,193,0                     ; cmpeqps       %xmm9,%xmm0
   DB  65,15,92,208                        ; subps         %xmm8,%xmm2
   DB  65,15,89,214                        ; mulps         %xmm14,%xmm2
-  DB  68,15,40,45,44,48,0,0               ; movaps        0x302c(%rip),%xmm13        # 4b50 <_sk_callback_sse41+0x451>
+  DB  68,15,40,45,236,48,0,0              ; movaps        0x30ec(%rip),%xmm13        # 4c10 <_sk_callback_sse41+0x457>
   DB  65,15,88,213                        ; addps         %xmm13,%xmm2
   DB  69,15,92,193                        ; subps         %xmm9,%xmm8
   DB  69,15,89,198                        ; mulps         %xmm14,%xmm8
-  DB  68,15,88,5,40,48,0,0                ; addps         0x3028(%rip),%xmm8        # 4b60 <_sk_callback_sse41+0x461>
+  DB  68,15,88,5,232,48,0,0               ; addps         0x30e8(%rip),%xmm8        # 4c20 <_sk_callback_sse41+0x467>
   DB  102,68,15,56,20,194                 ; blendvps      %xmm0,%xmm2,%xmm8
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  102,69,15,56,20,199                 ; blendvps      %xmm0,%xmm15,%xmm8
-  DB  68,15,89,5,32,48,0,0                ; mulps         0x3020(%rip),%xmm8        # 4b70 <_sk_callback_sse41+0x471>
+  DB  68,15,89,5,224,48,0,0               ; mulps         0x30e0(%rip),%xmm8        # 4c30 <_sk_callback_sse41+0x477>
   DB  69,15,40,203                        ; movaps        %xmm11,%xmm9
   DB  69,15,194,204,4                     ; cmpneqps      %xmm12,%xmm9
   DB  69,15,84,193                        ; andps         %xmm9,%xmm8
   DB  69,15,92,235                        ; subps         %xmm11,%xmm13
   DB  69,15,88,220                        ; addps         %xmm12,%xmm11
-  DB  15,40,5,20,48,0,0                   ; movaps        0x3014(%rip),%xmm0        # 4b80 <_sk_callback_sse41+0x481>
+  DB  15,40,5,212,48,0,0                  ; movaps        0x30d4(%rip),%xmm0        # 4c40 <_sk_callback_sse41+0x487>
   DB  65,15,40,211                        ; movaps        %xmm11,%xmm2
   DB  15,89,208                           ; mulps         %xmm0,%xmm2
   DB  15,194,194,1                        ; cmpltps       %xmm2,%xmm0
@@ -13194,7 +13258,7 @@ _sk_hsl_to_rgb_sse41 LABEL PROC
   DB  15,41,100,36,32                     ; movaps        %xmm4,0x20(%rsp)
   DB  15,41,92,36,16                      ; movaps        %xmm3,0x10(%rsp)
   DB  68,15,40,208                        ; movaps        %xmm0,%xmm10
-  DB  68,15,40,13,214,47,0,0              ; movaps        0x2fd6(%rip),%xmm9        # 4b90 <_sk_callback_sse41+0x491>
+  DB  68,15,40,13,150,48,0,0              ; movaps        0x3096(%rip),%xmm9        # 4c50 <_sk_callback_sse41+0x497>
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  15,194,194,2                        ; cmpleps       %xmm2,%xmm0
   DB  15,40,217                           ; movaps        %xmm1,%xmm3
@@ -13207,19 +13271,19 @@ _sk_hsl_to_rgb_sse41 LABEL PROC
   DB  15,41,20,36                         ; movaps        %xmm2,(%rsp)
   DB  69,15,88,192                        ; addps         %xmm8,%xmm8
   DB  68,15,92,197                        ; subps         %xmm5,%xmm8
-  DB  68,15,40,53,178,47,0,0              ; movaps        0x2fb2(%rip),%xmm14        # 4ba0 <_sk_callback_sse41+0x4a1>
+  DB  68,15,40,53,114,48,0,0              ; movaps        0x3072(%rip),%xmm14        # 4c60 <_sk_callback_sse41+0x4a7>
   DB  69,15,88,242                        ; addps         %xmm10,%xmm14
   DB  102,65,15,58,8,198,1                ; roundps       $0x1,%xmm14,%xmm0
   DB  68,15,92,240                        ; subps         %xmm0,%xmm14
-  DB  68,15,40,29,171,47,0,0              ; movaps        0x2fab(%rip),%xmm11        # 4bb0 <_sk_callback_sse41+0x4b1>
+  DB  68,15,40,29,107,48,0,0              ; movaps        0x306b(%rip),%xmm11        # 4c70 <_sk_callback_sse41+0x4b7>
   DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
   DB  65,15,194,198,2                     ; cmpleps       %xmm14,%xmm0
   DB  15,40,245                           ; movaps        %xmm5,%xmm6
   DB  65,15,92,240                        ; subps         %xmm8,%xmm6
-  DB  15,40,61,164,47,0,0                 ; movaps        0x2fa4(%rip),%xmm7        # 4bc0 <_sk_callback_sse41+0x4c1>
+  DB  15,40,61,100,48,0,0                 ; movaps        0x3064(%rip),%xmm7        # 4c80 <_sk_callback_sse41+0x4c7>
   DB  69,15,40,238                        ; movaps        %xmm14,%xmm13
   DB  68,15,89,239                        ; mulps         %xmm7,%xmm13
-  DB  15,40,29,165,47,0,0                 ; movaps        0x2fa5(%rip),%xmm3        # 4bd0 <_sk_callback_sse41+0x4d1>
+  DB  15,40,29,101,48,0,0                 ; movaps        0x3065(%rip),%xmm3        # 4c90 <_sk_callback_sse41+0x4d7>
   DB  68,15,40,227                        ; movaps        %xmm3,%xmm12
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
   DB  68,15,89,230                        ; mulps         %xmm6,%xmm12
@@ -13229,7 +13293,7 @@ _sk_hsl_to_rgb_sse41 LABEL PROC
   DB  65,15,194,198,2                     ; cmpleps       %xmm14,%xmm0
   DB  68,15,40,253                        ; movaps        %xmm5,%xmm15
   DB  102,69,15,56,20,252                 ; blendvps      %xmm0,%xmm12,%xmm15
-  DB  68,15,40,37,132,47,0,0              ; movaps        0x2f84(%rip),%xmm12        # 4be0 <_sk_callback_sse41+0x4e1>
+  DB  68,15,40,37,68,48,0,0               ; movaps        0x3044(%rip),%xmm12        # 4ca0 <_sk_callback_sse41+0x4e7>
   DB  65,15,40,196                        ; movaps        %xmm12,%xmm0
   DB  65,15,194,198,2                     ; cmpleps       %xmm14,%xmm0
   DB  68,15,89,238                        ; mulps         %xmm6,%xmm13
@@ -13263,7 +13327,7 @@ _sk_hsl_to_rgb_sse41 LABEL PROC
   DB  65,15,40,198                        ; movaps        %xmm14,%xmm0
   DB  15,40,20,36                         ; movaps        (%rsp),%xmm2
   DB  102,15,56,20,202                    ; blendvps      %xmm0,%xmm2,%xmm1
-  DB  68,15,88,21,253,46,0,0              ; addps         0x2efd(%rip),%xmm10        # 4bf0 <_sk_callback_sse41+0x4f1>
+  DB  68,15,88,21,189,47,0,0              ; addps         0x2fbd(%rip),%xmm10        # 4cb0 <_sk_callback_sse41+0x4f7>
   DB  102,65,15,58,8,194,1                ; roundps       $0x1,%xmm10,%xmm0
   DB  68,15,92,208                        ; subps         %xmm0,%xmm10
   DB  69,15,194,218,2                     ; cmpleps       %xmm10,%xmm11
@@ -13312,7 +13376,7 @@ _sk_scale_u8_sse41 LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,68,15,56,49,4,56                ; pmovzxbd      (%rax,%rdi,1),%xmm8
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,86,46,0,0                ; mulps         0x2e56(%rip),%xmm8        # 4c00 <_sk_callback_sse41+0x501>
+  DB  68,15,89,5,22,47,0,0                ; mulps         0x2f16(%rip),%xmm8        # 4cc0 <_sk_callback_sse41+0x507>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
@@ -13346,7 +13410,7 @@ _sk_lerp_u8_sse41 LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,68,15,56,49,4,56                ; pmovzxbd      (%rax,%rdi,1),%xmm8
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,2,46,0,0                 ; mulps         0x2e02(%rip),%xmm8        # 4c10 <_sk_callback_sse41+0x511>
+  DB  68,15,89,5,194,46,0,0               ; mulps         0x2ec2(%rip),%xmm8        # 4cd0 <_sk_callback_sse41+0x517>
   DB  15,92,196                           ; subps         %xmm4,%xmm0
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -13367,17 +13431,17 @@ _sk_lerp_565_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,68,15,56,51,20,120              ; pmovzxwd      (%rax,%rdi,2),%xmm10
-  DB  102,68,15,111,5,209,45,0,0          ; movdqa        0x2dd1(%rip),%xmm8        # 4c20 <_sk_callback_sse41+0x521>
+  DB  102,68,15,111,5,145,46,0,0          ; movdqa        0x2e91(%rip),%xmm8        # 4ce0 <_sk_callback_sse41+0x527>
   DB  102,69,15,219,194                   ; pand          %xmm10,%xmm8
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,208,45,0,0               ; mulps         0x2dd0(%rip),%xmm8        # 4c30 <_sk_callback_sse41+0x531>
-  DB  102,68,15,111,13,215,45,0,0         ; movdqa        0x2dd7(%rip),%xmm9        # 4c40 <_sk_callback_sse41+0x541>
+  DB  68,15,89,5,144,46,0,0               ; mulps         0x2e90(%rip),%xmm8        # 4cf0 <_sk_callback_sse41+0x537>
+  DB  102,68,15,111,13,151,46,0,0         ; movdqa        0x2e97(%rip),%xmm9        # 4d00 <_sk_callback_sse41+0x547>
   DB  102,69,15,219,202                   ; pand          %xmm10,%xmm9
   DB  69,15,91,201                        ; cvtdq2ps      %xmm9,%xmm9
-  DB  68,15,89,13,214,45,0,0              ; mulps         0x2dd6(%rip),%xmm9        # 4c50 <_sk_callback_sse41+0x551>
-  DB  102,68,15,219,21,221,45,0,0         ; pand          0x2ddd(%rip),%xmm10        # 4c60 <_sk_callback_sse41+0x561>
+  DB  68,15,89,13,150,46,0,0              ; mulps         0x2e96(%rip),%xmm9        # 4d10 <_sk_callback_sse41+0x557>
+  DB  102,68,15,219,21,157,46,0,0         ; pand          0x2e9d(%rip),%xmm10        # 4d20 <_sk_callback_sse41+0x567>
   DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
-  DB  68,15,89,21,225,45,0,0              ; mulps         0x2de1(%rip),%xmm10        # 4c70 <_sk_callback_sse41+0x571>
+  DB  68,15,89,21,161,46,0,0              ; mulps         0x2ea1(%rip),%xmm10        # 4d30 <_sk_callback_sse41+0x577>
   DB  15,92,196                           ; subps         %xmm4,%xmm0
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -13406,7 +13470,7 @@ _sk_load_tables_sse41 LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,139,72,8                         ; mov           0x8(%rax),%r9
   DB  243,69,15,111,4,184                 ; movdqu        (%r8,%rdi,4),%xmm8
-  DB  102,15,111,5,146,45,0,0             ; movdqa        0x2d92(%rip),%xmm0        # 4c80 <_sk_callback_sse41+0x581>
+  DB  102,15,111,5,82,46,0,0              ; movdqa        0x2e52(%rip),%xmm0        # 4d40 <_sk_callback_sse41+0x587>
   DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
   DB  102,73,15,58,22,192,1               ; pextrq        $0x1,%xmm0,%r8
   DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
@@ -13421,7 +13485,7 @@ _sk_load_tables_sse41 LABEL PROC
   DB  102,15,58,33,193,48                 ; insertps      $0x30,%xmm1,%xmm0
   DB  76,139,64,16                        ; mov           0x10(%rax),%r8
   DB  102,65,15,111,200                   ; movdqa        %xmm8,%xmm1
-  DB  102,15,56,0,13,77,45,0,0            ; pshufb        0x2d4d(%rip),%xmm1        # 4c90 <_sk_callback_sse41+0x591>
+  DB  102,15,56,0,13,13,46,0,0            ; pshufb        0x2e0d(%rip),%xmm1        # 4d50 <_sk_callback_sse41+0x597>
   DB  102,73,15,58,22,201,1               ; pextrq        $0x1,%xmm1,%r9
   DB  102,72,15,126,201                   ; movq          %xmm1,%rcx
   DB  68,15,182,209                       ; movzbl        %cl,%r10d
@@ -13436,7 +13500,7 @@ _sk_load_tables_sse41 LABEL PROC
   DB  102,15,58,33,202,48                 ; insertps      $0x30,%xmm2,%xmm1
   DB  76,139,64,24                        ; mov           0x18(%rax),%r8
   DB  102,65,15,111,208                   ; movdqa        %xmm8,%xmm2
-  DB  102,15,56,0,21,9,45,0,0             ; pshufb        0x2d09(%rip),%xmm2        # 4ca0 <_sk_callback_sse41+0x5a1>
+  DB  102,15,56,0,21,201,45,0,0           ; pshufb        0x2dc9(%rip),%xmm2        # 4d60 <_sk_callback_sse41+0x5a7>
   DB  102,72,15,58,22,209,1               ; pextrq        $0x1,%xmm2,%rcx
   DB  102,72,15,126,208                   ; movq          %xmm2,%rax
   DB  68,15,182,200                       ; movzbl        %al,%r9d
@@ -13451,7 +13515,7 @@ _sk_load_tables_sse41 LABEL PROC
   DB  102,15,58,33,211,48                 ; insertps      $0x30,%xmm3,%xmm2
   DB  102,65,15,114,208,24                ; psrld         $0x18,%xmm8
   DB  65,15,91,216                        ; cvtdq2ps      %xmm8,%xmm3
-  DB  15,89,29,198,44,0,0                 ; mulps         0x2cc6(%rip),%xmm3        # 4cb0 <_sk_callback_sse41+0x5b1>
+  DB  15,89,29,134,45,0,0                 ; mulps         0x2d86(%rip),%xmm3        # 4d70 <_sk_callback_sse41+0x5b7>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -13468,7 +13532,7 @@ _sk_load_tables_u16_be_sse41 LABEL PROC
   DB  102,65,15,111,201                   ; movdqa        %xmm9,%xmm1
   DB  102,15,97,200                       ; punpcklwd     %xmm0,%xmm1
   DB  102,68,15,105,200                   ; punpckhwd     %xmm0,%xmm9
-  DB  102,68,15,111,5,153,44,0,0          ; movdqa        0x2c99(%rip),%xmm8        # 4cc0 <_sk_callback_sse41+0x5c1>
+  DB  102,68,15,111,5,89,45,0,0           ; movdqa        0x2d59(%rip),%xmm8        # 4d80 <_sk_callback_sse41+0x5c7>
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
   DB  102,15,56,51,192                    ; pmovzxwd      %xmm0,%xmm0
@@ -13485,7 +13549,7 @@ _sk_load_tables_u16_be_sse41 LABEL PROC
   DB  243,67,15,16,20,8                   ; movss         (%r8,%r9,1),%xmm2
   DB  102,15,58,33,194,48                 ; insertps      $0x30,%xmm2,%xmm0
   DB  76,139,64,16                        ; mov           0x10(%rax),%r8
-  DB  102,15,56,0,13,76,44,0,0            ; pshufb        0x2c4c(%rip),%xmm1        # 4cd0 <_sk_callback_sse41+0x5d1>
+  DB  102,15,56,0,13,12,45,0,0            ; pshufb        0x2d0c(%rip),%xmm1        # 4d90 <_sk_callback_sse41+0x5d7>
   DB  102,15,56,51,201                    ; pmovzxwd      %xmm1,%xmm1
   DB  102,73,15,58,22,201,1               ; pextrq        $0x1,%xmm1,%r9
   DB  102,72,15,126,201                   ; movq          %xmm1,%rcx
@@ -13521,7 +13585,7 @@ _sk_load_tables_u16_be_sse41 LABEL PROC
   DB  102,65,15,235,216                   ; por           %xmm8,%xmm3
   DB  102,15,56,51,219                    ; pmovzxwd      %xmm3,%xmm3
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,154,43,0,0                 ; mulps         0x2b9a(%rip),%xmm3        # 4ce0 <_sk_callback_sse41+0x5e1>
+  DB  15,89,29,90,44,0,0                  ; mulps         0x2c5a(%rip),%xmm3        # 4da0 <_sk_callback_sse41+0x5e7>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -13541,7 +13605,7 @@ _sk_load_tables_rgb_u16_be_sse41 LABEL PROC
   DB  102,68,15,97,200                    ; punpcklwd     %xmm0,%xmm9
   DB  102,15,111,202                      ; movdqa        %xmm2,%xmm1
   DB  102,65,15,97,201                    ; punpcklwd     %xmm9,%xmm1
-  DB  102,68,15,111,5,92,43,0,0           ; movdqa        0x2b5c(%rip),%xmm8        # 4cf0 <_sk_callback_sse41+0x5f1>
+  DB  102,68,15,111,5,28,44,0,0           ; movdqa        0x2c1c(%rip),%xmm8        # 4db0 <_sk_callback_sse41+0x5f7>
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
   DB  102,15,56,51,192                    ; pmovzxwd      %xmm0,%xmm0
@@ -13558,7 +13622,7 @@ _sk_load_tables_rgb_u16_be_sse41 LABEL PROC
   DB  243,67,15,16,28,8                   ; movss         (%r8,%r9,1),%xmm3
   DB  102,15,58,33,195,48                 ; insertps      $0x30,%xmm3,%xmm0
   DB  76,139,64,16                        ; mov           0x10(%rax),%r8
-  DB  102,15,56,0,13,15,43,0,0            ; pshufb        0x2b0f(%rip),%xmm1        # 4d00 <_sk_callback_sse41+0x601>
+  DB  102,15,56,0,13,207,43,0,0           ; pshufb        0x2bcf(%rip),%xmm1        # 4dc0 <_sk_callback_sse41+0x607>
   DB  102,15,56,51,201                    ; pmovzxwd      %xmm1,%xmm1
   DB  102,73,15,58,22,201,1               ; pextrq        $0x1,%xmm1,%r9
   DB  102,72,15,126,201                   ; movq          %xmm1,%rcx
@@ -13589,7 +13653,7 @@ _sk_load_tables_rgb_u16_be_sse41 LABEL PROC
   DB  243,65,15,16,28,8                   ; movss         (%r8,%rcx,1),%xmm3
   DB  102,15,58,33,211,48                 ; insertps      $0x30,%xmm3,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,122,42,0,0                 ; movaps        0x2a7a(%rip),%xmm3        # 4d10 <_sk_callback_sse41+0x611>
+  DB  15,40,29,58,43,0,0                  ; movaps        0x2b3a(%rip),%xmm3        # 4dd0 <_sk_callback_sse41+0x617>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_byte_tables_sse41
@@ -13597,7 +13661,7 @@ _sk_byte_tables_sse41 LABEL PROC
   DB  65,86                               ; push          %r14
   DB  83                                  ; push          %rbx
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,123,42,0,0               ; movaps        0x2a7b(%rip),%xmm8        # 4d20 <_sk_callback_sse41+0x621>
+  DB  68,15,40,5,59,43,0,0                ; movaps        0x2b3b(%rip),%xmm8        # 4de0 <_sk_callback_sse41+0x627>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,91,192                       ; cvtps2dq      %xmm0,%xmm0
   DB  102,72,15,58,22,193,1               ; pextrq        $0x1,%xmm0,%rcx
@@ -13616,7 +13680,7 @@ _sk_byte_tables_sse41 LABEL PROC
   DB  102,15,58,32,193,3                  ; pinsrb        $0x3,%ecx,%xmm0
   DB  102,15,56,49,192                    ; pmovzxbd      %xmm0,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,13,44,42,0,0               ; movaps        0x2a2c(%rip),%xmm9        # 4d30 <_sk_callback_sse41+0x631>
+  DB  68,15,40,13,236,42,0,0              ; movaps        0x2aec(%rip),%xmm9        # 4df0 <_sk_callback_sse41+0x637>
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,91,201                       ; cvtps2dq      %xmm1,%xmm1
@@ -13705,7 +13769,7 @@ _sk_byte_tables_rgb_sse41 LABEL PROC
   DB  102,15,58,32,193,3                  ; pinsrb        $0x3,%ecx,%xmm0
   DB  102,15,56,49,192                    ; pmovzxbd      %xmm0,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,13,180,40,0,0              ; movaps        0x28b4(%rip),%xmm9        # 4d40 <_sk_callback_sse41+0x641>
+  DB  68,15,40,13,116,41,0,0              ; movaps        0x2974(%rip),%xmm9        # 4e00 <_sk_callback_sse41+0x647>
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,91,201                       ; cvtps2dq      %xmm1,%xmm1
@@ -13872,31 +13936,31 @@ _sk_parametric_r_sse41 LABEL PROC
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,91,194                        ; cvtdq2ps      %xmm10,%xmm8
-  DB  68,15,89,5,11,38,0,0                ; mulps         0x260b(%rip),%xmm8        # 4d50 <_sk_callback_sse41+0x651>
-  DB  68,15,84,21,19,38,0,0               ; andps         0x2613(%rip),%xmm10        # 4d60 <_sk_callback_sse41+0x661>
-  DB  68,15,86,21,27,38,0,0               ; orps          0x261b(%rip),%xmm10        # 4d70 <_sk_callback_sse41+0x671>
-  DB  68,15,88,5,35,38,0,0                ; addps         0x2623(%rip),%xmm8        # 4d80 <_sk_callback_sse41+0x681>
-  DB  68,15,40,37,43,38,0,0               ; movaps        0x262b(%rip),%xmm12        # 4d90 <_sk_callback_sse41+0x691>
+  DB  68,15,89,5,203,38,0,0               ; mulps         0x26cb(%rip),%xmm8        # 4e10 <_sk_callback_sse41+0x657>
+  DB  68,15,84,21,211,38,0,0              ; andps         0x26d3(%rip),%xmm10        # 4e20 <_sk_callback_sse41+0x667>
+  DB  68,15,86,21,219,38,0,0              ; orps          0x26db(%rip),%xmm10        # 4e30 <_sk_callback_sse41+0x677>
+  DB  68,15,88,5,227,38,0,0               ; addps         0x26e3(%rip),%xmm8        # 4e40 <_sk_callback_sse41+0x687>
+  DB  68,15,40,37,235,38,0,0              ; movaps        0x26eb(%rip),%xmm12        # 4e50 <_sk_callback_sse41+0x697>
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  69,15,92,196                        ; subps         %xmm12,%xmm8
-  DB  68,15,88,21,43,38,0,0               ; addps         0x262b(%rip),%xmm10        # 4da0 <_sk_callback_sse41+0x6a1>
-  DB  68,15,40,37,51,38,0,0               ; movaps        0x2633(%rip),%xmm12        # 4db0 <_sk_callback_sse41+0x6b1>
+  DB  68,15,88,21,235,38,0,0              ; addps         0x26eb(%rip),%xmm10        # 4e60 <_sk_callback_sse41+0x6a7>
+  DB  68,15,40,37,243,38,0,0              ; movaps        0x26f3(%rip),%xmm12        # 4e70 <_sk_callback_sse41+0x6b7>
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,92,196                        ; subps         %xmm12,%xmm8
   DB  69,15,89,195                        ; mulps         %xmm11,%xmm8
   DB  102,69,15,58,8,208,1                ; roundps       $0x1,%xmm8,%xmm10
   DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
-  DB  68,15,88,5,32,38,0,0                ; addps         0x2620(%rip),%xmm8        # 4dc0 <_sk_callback_sse41+0x6c1>
-  DB  68,15,40,21,40,38,0,0               ; movaps        0x2628(%rip),%xmm10        # 4dd0 <_sk_callback_sse41+0x6d1>
+  DB  68,15,88,5,224,38,0,0               ; addps         0x26e0(%rip),%xmm8        # 4e80 <_sk_callback_sse41+0x6c7>
+  DB  68,15,40,21,232,38,0,0              ; movaps        0x26e8(%rip),%xmm10        # 4e90 <_sk_callback_sse41+0x6d7>
   DB  69,15,89,211                        ; mulps         %xmm11,%xmm10
   DB  69,15,92,194                        ; subps         %xmm10,%xmm8
-  DB  68,15,40,21,40,38,0,0               ; movaps        0x2628(%rip),%xmm10        # 4de0 <_sk_callback_sse41+0x6e1>
+  DB  68,15,40,21,232,38,0,0              ; movaps        0x26e8(%rip),%xmm10        # 4ea0 <_sk_callback_sse41+0x6e7>
   DB  69,15,92,211                        ; subps         %xmm11,%xmm10
-  DB  68,15,40,29,44,38,0,0               ; movaps        0x262c(%rip),%xmm11        # 4df0 <_sk_callback_sse41+0x6f1>
+  DB  68,15,40,29,236,38,0,0              ; movaps        0x26ec(%rip),%xmm11        # 4eb0 <_sk_callback_sse41+0x6f7>
   DB  69,15,94,218                        ; divps         %xmm10,%xmm11
   DB  69,15,88,216                        ; addps         %xmm8,%xmm11
-  DB  68,15,89,29,44,38,0,0               ; mulps         0x262c(%rip),%xmm11        # 4e00 <_sk_callback_sse41+0x701>
+  DB  68,15,89,29,236,38,0,0              ; mulps         0x26ec(%rip),%xmm11        # 4ec0 <_sk_callback_sse41+0x707>
   DB  102,69,15,91,211                    ; cvtps2dq      %xmm11,%xmm10
   DB  243,68,15,16,64,20                  ; movss         0x14(%rax),%xmm8
   DB  69,15,198,192,0                     ; shufps        $0x0,%xmm8,%xmm8
@@ -13904,7 +13968,7 @@ _sk_parametric_r_sse41 LABEL PROC
   DB  102,69,15,56,20,193                 ; blendvps      %xmm0,%xmm9,%xmm8
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  68,15,95,192                        ; maxps         %xmm0,%xmm8
-  DB  68,15,93,5,19,38,0,0                ; minps         0x2613(%rip),%xmm8        # 4e10 <_sk_callback_sse41+0x711>
+  DB  68,15,93,5,211,38,0,0               ; minps         0x26d3(%rip),%xmm8        # 4ed0 <_sk_callback_sse41+0x717>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  255,224                             ; jmpq          *%rax
@@ -13932,31 +13996,31 @@ _sk_parametric_g_sse41 LABEL PROC
   DB  68,15,88,217                        ; addps         %xmm1,%xmm11
   DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
   DB  69,15,91,227                        ; cvtdq2ps      %xmm11,%xmm12
-  DB  68,15,89,37,180,37,0,0              ; mulps         0x25b4(%rip),%xmm12        # 4e20 <_sk_callback_sse41+0x721>
-  DB  68,15,84,29,188,37,0,0              ; andps         0x25bc(%rip),%xmm11        # 4e30 <_sk_callback_sse41+0x731>
-  DB  68,15,86,29,196,37,0,0              ; orps          0x25c4(%rip),%xmm11        # 4e40 <_sk_callback_sse41+0x741>
-  DB  68,15,88,37,204,37,0,0              ; addps         0x25cc(%rip),%xmm12        # 4e50 <_sk_callback_sse41+0x751>
-  DB  15,40,13,213,37,0,0                 ; movaps        0x25d5(%rip),%xmm1        # 4e60 <_sk_callback_sse41+0x761>
+  DB  68,15,89,37,116,38,0,0              ; mulps         0x2674(%rip),%xmm12        # 4ee0 <_sk_callback_sse41+0x727>
+  DB  68,15,84,29,124,38,0,0              ; andps         0x267c(%rip),%xmm11        # 4ef0 <_sk_callback_sse41+0x737>
+  DB  68,15,86,29,132,38,0,0              ; orps          0x2684(%rip),%xmm11        # 4f00 <_sk_callback_sse41+0x747>
+  DB  68,15,88,37,140,38,0,0              ; addps         0x268c(%rip),%xmm12        # 4f10 <_sk_callback_sse41+0x757>
+  DB  15,40,13,149,38,0,0                 ; movaps        0x2695(%rip),%xmm1        # 4f20 <_sk_callback_sse41+0x767>
   DB  65,15,89,203                        ; mulps         %xmm11,%xmm1
   DB  68,15,92,225                        ; subps         %xmm1,%xmm12
-  DB  68,15,88,29,213,37,0,0              ; addps         0x25d5(%rip),%xmm11        # 4e70 <_sk_callback_sse41+0x771>
-  DB  15,40,13,222,37,0,0                 ; movaps        0x25de(%rip),%xmm1        # 4e80 <_sk_callback_sse41+0x781>
+  DB  68,15,88,29,149,38,0,0              ; addps         0x2695(%rip),%xmm11        # 4f30 <_sk_callback_sse41+0x777>
+  DB  15,40,13,158,38,0,0                 ; movaps        0x269e(%rip),%xmm1        # 4f40 <_sk_callback_sse41+0x787>
   DB  65,15,94,203                        ; divps         %xmm11,%xmm1
   DB  68,15,92,225                        ; subps         %xmm1,%xmm12
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  102,69,15,58,8,212,1                ; roundps       $0x1,%xmm12,%xmm10
   DB  69,15,40,220                        ; movaps        %xmm12,%xmm11
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
-  DB  68,15,88,37,203,37,0,0              ; addps         0x25cb(%rip),%xmm12        # 4e90 <_sk_callback_sse41+0x791>
-  DB  15,40,13,212,37,0,0                 ; movaps        0x25d4(%rip),%xmm1        # 4ea0 <_sk_callback_sse41+0x7a1>
+  DB  68,15,88,37,139,38,0,0              ; addps         0x268b(%rip),%xmm12        # 4f50 <_sk_callback_sse41+0x797>
+  DB  15,40,13,148,38,0,0                 ; movaps        0x2694(%rip),%xmm1        # 4f60 <_sk_callback_sse41+0x7a7>
   DB  65,15,89,203                        ; mulps         %xmm11,%xmm1
   DB  68,15,92,225                        ; subps         %xmm1,%xmm12
-  DB  68,15,40,21,212,37,0,0              ; movaps        0x25d4(%rip),%xmm10        # 4eb0 <_sk_callback_sse41+0x7b1>
+  DB  68,15,40,21,148,38,0,0              ; movaps        0x2694(%rip),%xmm10        # 4f70 <_sk_callback_sse41+0x7b7>
   DB  69,15,92,211                        ; subps         %xmm11,%xmm10
-  DB  15,40,13,217,37,0,0                 ; movaps        0x25d9(%rip),%xmm1        # 4ec0 <_sk_callback_sse41+0x7c1>
+  DB  15,40,13,153,38,0,0                 ; movaps        0x2699(%rip),%xmm1        # 4f80 <_sk_callback_sse41+0x7c7>
   DB  65,15,94,202                        ; divps         %xmm10,%xmm1
   DB  65,15,88,204                        ; addps         %xmm12,%xmm1
-  DB  15,89,13,218,37,0,0                 ; mulps         0x25da(%rip),%xmm1        # 4ed0 <_sk_callback_sse41+0x7d1>
+  DB  15,89,13,154,38,0,0                 ; mulps         0x269a(%rip),%xmm1        # 4f90 <_sk_callback_sse41+0x7d7>
   DB  102,68,15,91,209                    ; cvtps2dq      %xmm1,%xmm10
   DB  243,15,16,72,20                     ; movss         0x14(%rax),%xmm1
   DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
@@ -13964,7 +14028,7 @@ _sk_parametric_g_sse41 LABEL PROC
   DB  102,65,15,56,20,201                 ; blendvps      %xmm0,%xmm9,%xmm1
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  15,95,200                           ; maxps         %xmm0,%xmm1
-  DB  15,93,13,197,37,0,0                 ; minps         0x25c5(%rip),%xmm1        # 4ee0 <_sk_callback_sse41+0x7e1>
+  DB  15,93,13,133,38,0,0                 ; minps         0x2685(%rip),%xmm1        # 4fa0 <_sk_callback_sse41+0x7e7>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  255,224                             ; jmpq          *%rax
@@ -13992,31 +14056,31 @@ _sk_parametric_b_sse41 LABEL PROC
   DB  68,15,88,218                        ; addps         %xmm2,%xmm11
   DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
   DB  69,15,91,227                        ; cvtdq2ps      %xmm11,%xmm12
-  DB  68,15,89,37,102,37,0,0              ; mulps         0x2566(%rip),%xmm12        # 4ef0 <_sk_callback_sse41+0x7f1>
-  DB  68,15,84,29,110,37,0,0              ; andps         0x256e(%rip),%xmm11        # 4f00 <_sk_callback_sse41+0x801>
-  DB  68,15,86,29,118,37,0,0              ; orps          0x2576(%rip),%xmm11        # 4f10 <_sk_callback_sse41+0x811>
-  DB  68,15,88,37,126,37,0,0              ; addps         0x257e(%rip),%xmm12        # 4f20 <_sk_callback_sse41+0x821>
-  DB  15,40,21,135,37,0,0                 ; movaps        0x2587(%rip),%xmm2        # 4f30 <_sk_callback_sse41+0x831>
+  DB  68,15,89,37,38,38,0,0               ; mulps         0x2626(%rip),%xmm12        # 4fb0 <_sk_callback_sse41+0x7f7>
+  DB  68,15,84,29,46,38,0,0               ; andps         0x262e(%rip),%xmm11        # 4fc0 <_sk_callback_sse41+0x807>
+  DB  68,15,86,29,54,38,0,0               ; orps          0x2636(%rip),%xmm11        # 4fd0 <_sk_callback_sse41+0x817>
+  DB  68,15,88,37,62,38,0,0               ; addps         0x263e(%rip),%xmm12        # 4fe0 <_sk_callback_sse41+0x827>
+  DB  15,40,21,71,38,0,0                  ; movaps        0x2647(%rip),%xmm2        # 4ff0 <_sk_callback_sse41+0x837>
   DB  65,15,89,211                        ; mulps         %xmm11,%xmm2
   DB  68,15,92,226                        ; subps         %xmm2,%xmm12
-  DB  68,15,88,29,135,37,0,0              ; addps         0x2587(%rip),%xmm11        # 4f40 <_sk_callback_sse41+0x841>
-  DB  15,40,21,144,37,0,0                 ; movaps        0x2590(%rip),%xmm2        # 4f50 <_sk_callback_sse41+0x851>
+  DB  68,15,88,29,71,38,0,0               ; addps         0x2647(%rip),%xmm11        # 5000 <_sk_callback_sse41+0x847>
+  DB  15,40,21,80,38,0,0                  ; movaps        0x2650(%rip),%xmm2        # 5010 <_sk_callback_sse41+0x857>
   DB  65,15,94,211                        ; divps         %xmm11,%xmm2
   DB  68,15,92,226                        ; subps         %xmm2,%xmm12
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  102,69,15,58,8,212,1                ; roundps       $0x1,%xmm12,%xmm10
   DB  69,15,40,220                        ; movaps        %xmm12,%xmm11
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
-  DB  68,15,88,37,125,37,0,0              ; addps         0x257d(%rip),%xmm12        # 4f60 <_sk_callback_sse41+0x861>
-  DB  15,40,21,134,37,0,0                 ; movaps        0x2586(%rip),%xmm2        # 4f70 <_sk_callback_sse41+0x871>
+  DB  68,15,88,37,61,38,0,0               ; addps         0x263d(%rip),%xmm12        # 5020 <_sk_callback_sse41+0x867>
+  DB  15,40,21,70,38,0,0                  ; movaps        0x2646(%rip),%xmm2        # 5030 <_sk_callback_sse41+0x877>
   DB  65,15,89,211                        ; mulps         %xmm11,%xmm2
   DB  68,15,92,226                        ; subps         %xmm2,%xmm12
-  DB  68,15,40,21,134,37,0,0              ; movaps        0x2586(%rip),%xmm10        # 4f80 <_sk_callback_sse41+0x881>
+  DB  68,15,40,21,70,38,0,0               ; movaps        0x2646(%rip),%xmm10        # 5040 <_sk_callback_sse41+0x887>
   DB  69,15,92,211                        ; subps         %xmm11,%xmm10
-  DB  15,40,21,139,37,0,0                 ; movaps        0x258b(%rip),%xmm2        # 4f90 <_sk_callback_sse41+0x891>
+  DB  15,40,21,75,38,0,0                  ; movaps        0x264b(%rip),%xmm2        # 5050 <_sk_callback_sse41+0x897>
   DB  65,15,94,210                        ; divps         %xmm10,%xmm2
   DB  65,15,88,212                        ; addps         %xmm12,%xmm2
-  DB  15,89,21,140,37,0,0                 ; mulps         0x258c(%rip),%xmm2        # 4fa0 <_sk_callback_sse41+0x8a1>
+  DB  15,89,21,76,38,0,0                  ; mulps         0x264c(%rip),%xmm2        # 5060 <_sk_callback_sse41+0x8a7>
   DB  102,68,15,91,210                    ; cvtps2dq      %xmm2,%xmm10
   DB  243,15,16,80,20                     ; movss         0x14(%rax),%xmm2
   DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
@@ -14024,7 +14088,7 @@ _sk_parametric_b_sse41 LABEL PROC
   DB  102,65,15,56,20,209                 ; blendvps      %xmm0,%xmm9,%xmm2
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  15,95,208                           ; maxps         %xmm0,%xmm2
-  DB  15,93,21,119,37,0,0                 ; minps         0x2577(%rip),%xmm2        # 4fb0 <_sk_callback_sse41+0x8b1>
+  DB  15,93,21,55,38,0,0                  ; minps         0x2637(%rip),%xmm2        # 5070 <_sk_callback_sse41+0x8b7>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  255,224                             ; jmpq          *%rax
@@ -14052,31 +14116,31 @@ _sk_parametric_a_sse41 LABEL PROC
   DB  68,15,88,219                        ; addps         %xmm3,%xmm11
   DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
   DB  69,15,91,227                        ; cvtdq2ps      %xmm11,%xmm12
-  DB  68,15,89,37,24,37,0,0               ; mulps         0x2518(%rip),%xmm12        # 4fc0 <_sk_callback_sse41+0x8c1>
-  DB  68,15,84,29,32,37,0,0               ; andps         0x2520(%rip),%xmm11        # 4fd0 <_sk_callback_sse41+0x8d1>
-  DB  68,15,86,29,40,37,0,0               ; orps          0x2528(%rip),%xmm11        # 4fe0 <_sk_callback_sse41+0x8e1>
-  DB  68,15,88,37,48,37,0,0               ; addps         0x2530(%rip),%xmm12        # 4ff0 <_sk_callback_sse41+0x8f1>
-  DB  15,40,29,57,37,0,0                  ; movaps        0x2539(%rip),%xmm3        # 5000 <_sk_callback_sse41+0x901>
+  DB  68,15,89,37,216,37,0,0              ; mulps         0x25d8(%rip),%xmm12        # 5080 <_sk_callback_sse41+0x8c7>
+  DB  68,15,84,29,224,37,0,0              ; andps         0x25e0(%rip),%xmm11        # 5090 <_sk_callback_sse41+0x8d7>
+  DB  68,15,86,29,232,37,0,0              ; orps          0x25e8(%rip),%xmm11        # 50a0 <_sk_callback_sse41+0x8e7>
+  DB  68,15,88,37,240,37,0,0              ; addps         0x25f0(%rip),%xmm12        # 50b0 <_sk_callback_sse41+0x8f7>
+  DB  15,40,29,249,37,0,0                 ; movaps        0x25f9(%rip),%xmm3        # 50c0 <_sk_callback_sse41+0x907>
   DB  65,15,89,219                        ; mulps         %xmm11,%xmm3
   DB  68,15,92,227                        ; subps         %xmm3,%xmm12
-  DB  68,15,88,29,57,37,0,0               ; addps         0x2539(%rip),%xmm11        # 5010 <_sk_callback_sse41+0x911>
-  DB  15,40,29,66,37,0,0                  ; movaps        0x2542(%rip),%xmm3        # 5020 <_sk_callback_sse41+0x921>
+  DB  68,15,88,29,249,37,0,0              ; addps         0x25f9(%rip),%xmm11        # 50d0 <_sk_callback_sse41+0x917>
+  DB  15,40,29,2,38,0,0                   ; movaps        0x2602(%rip),%xmm3        # 50e0 <_sk_callback_sse41+0x927>
   DB  65,15,94,219                        ; divps         %xmm11,%xmm3
   DB  68,15,92,227                        ; subps         %xmm3,%xmm12
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  102,69,15,58,8,212,1                ; roundps       $0x1,%xmm12,%xmm10
   DB  69,15,40,220                        ; movaps        %xmm12,%xmm11
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
-  DB  68,15,88,37,47,37,0,0               ; addps         0x252f(%rip),%xmm12        # 5030 <_sk_callback_sse41+0x931>
-  DB  15,40,29,56,37,0,0                  ; movaps        0x2538(%rip),%xmm3        # 5040 <_sk_callback_sse41+0x941>
+  DB  68,15,88,37,239,37,0,0              ; addps         0x25ef(%rip),%xmm12        # 50f0 <_sk_callback_sse41+0x937>
+  DB  15,40,29,248,37,0,0                 ; movaps        0x25f8(%rip),%xmm3        # 5100 <_sk_callback_sse41+0x947>
   DB  65,15,89,219                        ; mulps         %xmm11,%xmm3
   DB  68,15,92,227                        ; subps         %xmm3,%xmm12
-  DB  68,15,40,21,56,37,0,0               ; movaps        0x2538(%rip),%xmm10        # 5050 <_sk_callback_sse41+0x951>
+  DB  68,15,40,21,248,37,0,0              ; movaps        0x25f8(%rip),%xmm10        # 5110 <_sk_callback_sse41+0x957>
   DB  69,15,92,211                        ; subps         %xmm11,%xmm10
-  DB  15,40,29,61,37,0,0                  ; movaps        0x253d(%rip),%xmm3        # 5060 <_sk_callback_sse41+0x961>
+  DB  15,40,29,253,37,0,0                 ; movaps        0x25fd(%rip),%xmm3        # 5120 <_sk_callback_sse41+0x967>
   DB  65,15,94,218                        ; divps         %xmm10,%xmm3
   DB  65,15,88,220                        ; addps         %xmm12,%xmm3
-  DB  15,89,29,62,37,0,0                  ; mulps         0x253e(%rip),%xmm3        # 5070 <_sk_callback_sse41+0x971>
+  DB  15,89,29,254,37,0,0                 ; mulps         0x25fe(%rip),%xmm3        # 5130 <_sk_callback_sse41+0x977>
   DB  102,68,15,91,211                    ; cvtps2dq      %xmm3,%xmm10
   DB  243,15,16,88,20                     ; movss         0x14(%rax),%xmm3
   DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
@@ -14084,7 +14148,7 @@ _sk_parametric_a_sse41 LABEL PROC
   DB  102,65,15,56,20,217                 ; blendvps      %xmm0,%xmm9,%xmm3
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  15,95,216                           ; maxps         %xmm0,%xmm3
-  DB  15,93,29,41,37,0,0                  ; minps         0x2529(%rip),%xmm3        # 5080 <_sk_callback_sse41+0x981>
+  DB  15,93,29,233,37,0,0                 ; minps         0x25e9(%rip),%xmm3        # 5140 <_sk_callback_sse41+0x987>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  255,224                             ; jmpq          *%rax
@@ -14092,29 +14156,29 @@ _sk_parametric_a_sse41 LABEL PROC
 PUBLIC _sk_lab_to_xyz_sse41
 _sk_lab_to_xyz_sse41 LABEL PROC
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
-  DB  68,15,89,5,37,37,0,0                ; mulps         0x2525(%rip),%xmm8        # 5090 <_sk_callback_sse41+0x991>
-  DB  68,15,40,13,45,37,0,0               ; movaps        0x252d(%rip),%xmm9        # 50a0 <_sk_callback_sse41+0x9a1>
+  DB  68,15,89,5,229,37,0,0               ; mulps         0x25e5(%rip),%xmm8        # 5150 <_sk_callback_sse41+0x997>
+  DB  68,15,40,13,237,37,0,0              ; movaps        0x25ed(%rip),%xmm9        # 5160 <_sk_callback_sse41+0x9a7>
   DB  65,15,89,201                        ; mulps         %xmm9,%xmm1
-  DB  15,40,5,50,37,0,0                   ; movaps        0x2532(%rip),%xmm0        # 50b0 <_sk_callback_sse41+0x9b1>
+  DB  15,40,5,242,37,0,0                  ; movaps        0x25f2(%rip),%xmm0        # 5170 <_sk_callback_sse41+0x9b7>
   DB  15,88,200                           ; addps         %xmm0,%xmm1
   DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
   DB  15,88,208                           ; addps         %xmm0,%xmm2
-  DB  68,15,88,5,48,37,0,0                ; addps         0x2530(%rip),%xmm8        # 50c0 <_sk_callback_sse41+0x9c1>
-  DB  68,15,89,5,56,37,0,0                ; mulps         0x2538(%rip),%xmm8        # 50d0 <_sk_callback_sse41+0x9d1>
-  DB  15,89,13,65,37,0,0                  ; mulps         0x2541(%rip),%xmm1        # 50e0 <_sk_callback_sse41+0x9e1>
+  DB  68,15,88,5,240,37,0,0               ; addps         0x25f0(%rip),%xmm8        # 5180 <_sk_callback_sse41+0x9c7>
+  DB  68,15,89,5,248,37,0,0               ; mulps         0x25f8(%rip),%xmm8        # 5190 <_sk_callback_sse41+0x9d7>
+  DB  15,89,13,1,38,0,0                   ; mulps         0x2601(%rip),%xmm1        # 51a0 <_sk_callback_sse41+0x9e7>
   DB  65,15,88,200                        ; addps         %xmm8,%xmm1
-  DB  15,89,21,70,37,0,0                  ; mulps         0x2546(%rip),%xmm2        # 50f0 <_sk_callback_sse41+0x9f1>
+  DB  15,89,21,6,38,0,0                   ; mulps         0x2606(%rip),%xmm2        # 51b0 <_sk_callback_sse41+0x9f7>
   DB  69,15,40,208                        ; movaps        %xmm8,%xmm10
   DB  68,15,92,210                        ; subps         %xmm2,%xmm10
   DB  68,15,40,217                        ; movaps        %xmm1,%xmm11
   DB  69,15,89,219                        ; mulps         %xmm11,%xmm11
   DB  68,15,89,217                        ; mulps         %xmm1,%xmm11
-  DB  68,15,40,13,58,37,0,0               ; movaps        0x253a(%rip),%xmm9        # 5100 <_sk_callback_sse41+0xa01>
+  DB  68,15,40,13,250,37,0,0              ; movaps        0x25fa(%rip),%xmm9        # 51c0 <_sk_callback_sse41+0xa07>
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  65,15,194,195,1                     ; cmpltps       %xmm11,%xmm0
-  DB  15,40,21,58,37,0,0                  ; movaps        0x253a(%rip),%xmm2        # 5110 <_sk_callback_sse41+0xa11>
+  DB  15,40,21,250,37,0,0                 ; movaps        0x25fa(%rip),%xmm2        # 51d0 <_sk_callback_sse41+0xa17>
   DB  15,88,202                           ; addps         %xmm2,%xmm1
-  DB  68,15,40,37,63,37,0,0               ; movaps        0x253f(%rip),%xmm12        # 5120 <_sk_callback_sse41+0xa21>
+  DB  68,15,40,37,255,37,0,0              ; movaps        0x25ff(%rip),%xmm12        # 51e0 <_sk_callback_sse41+0xa27>
   DB  65,15,89,204                        ; mulps         %xmm12,%xmm1
   DB  102,65,15,56,20,203                 ; blendvps      %xmm0,%xmm11,%xmm1
   DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
@@ -14133,8 +14197,8 @@ _sk_lab_to_xyz_sse41 LABEL PROC
   DB  65,15,89,212                        ; mulps         %xmm12,%xmm2
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  102,65,15,56,20,211                 ; blendvps      %xmm0,%xmm11,%xmm2
-  DB  15,89,13,248,36,0,0                 ; mulps         0x24f8(%rip),%xmm1        # 5130 <_sk_callback_sse41+0xa31>
-  DB  15,89,21,1,37,0,0                   ; mulps         0x2501(%rip),%xmm2        # 5140 <_sk_callback_sse41+0xa41>
+  DB  15,89,13,184,37,0,0                 ; mulps         0x25b8(%rip),%xmm1        # 51f0 <_sk_callback_sse41+0xa37>
+  DB  15,89,21,193,37,0,0                 ; mulps         0x25c1(%rip),%xmm2        # 5200 <_sk_callback_sse41+0xa47>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,40,193                           ; movaps        %xmm1,%xmm0
   DB  65,15,40,200                        ; movaps        %xmm8,%xmm1
@@ -14146,7 +14210,7 @@ _sk_load_a8_sse41 LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,15,56,49,4,56                   ; pmovzxbd      (%rax,%rdi,1),%xmm0
   DB  15,91,216                           ; cvtdq2ps      %xmm0,%xmm3
-  DB  15,89,29,241,36,0,0                 ; mulps         0x24f1(%rip),%xmm3        # 5150 <_sk_callback_sse41+0xa51>
+  DB  15,89,29,177,37,0,0                 ; mulps         0x25b1(%rip),%xmm3        # 5210 <_sk_callback_sse41+0xa57>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  15,87,201                           ; xorps         %xmm1,%xmm1
@@ -14177,7 +14241,7 @@ _sk_gather_a8_sse41 LABEL PROC
   DB  102,15,58,32,192,3                  ; pinsrb        $0x3,%eax,%xmm0
   DB  102,15,56,49,192                    ; pmovzxbd      %xmm0,%xmm0
   DB  15,91,216                           ; cvtdq2ps      %xmm0,%xmm3
-  DB  15,89,29,133,36,0,0                 ; mulps         0x2485(%rip),%xmm3        # 5160 <_sk_callback_sse41+0xa61>
+  DB  15,89,29,69,37,0,0                  ; mulps         0x2545(%rip),%xmm3        # 5220 <_sk_callback_sse41+0xa67>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
@@ -14188,7 +14252,7 @@ PUBLIC _sk_store_a8_sse41
 _sk_store_a8_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,121,36,0,0               ; movaps        0x2479(%rip),%xmm8        # 5170 <_sk_callback_sse41+0xa71>
+  DB  68,15,40,5,57,37,0,0                ; movaps        0x2539(%rip),%xmm8        # 5230 <_sk_callback_sse41+0xa77>
   DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
   DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
   DB  102,69,15,56,43,192                 ; packusdw      %xmm8,%xmm8
@@ -14203,9 +14267,9 @@ _sk_load_g8_sse41 LABEL PROC
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,15,56,49,4,56                   ; pmovzxbd      (%rax,%rdi,1),%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,86,36,0,0                   ; mulps         0x2456(%rip),%xmm0        # 5180 <_sk_callback_sse41+0xa81>
+  DB  15,89,5,22,37,0,0                   ; mulps         0x2516(%rip),%xmm0        # 5240 <_sk_callback_sse41+0xa87>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,93,36,0,0                  ; movaps        0x245d(%rip),%xmm3        # 5190 <_sk_callback_sse41+0xa91>
+  DB  15,40,29,29,37,0,0                  ; movaps        0x251d(%rip),%xmm3        # 5250 <_sk_callback_sse41+0xa97>
   DB  15,40,200                           ; movaps        %xmm0,%xmm1
   DB  15,40,208                           ; movaps        %xmm0,%xmm2
   DB  255,224                             ; jmpq          *%rax
@@ -14234,9 +14298,9 @@ _sk_gather_g8_sse41 LABEL PROC
   DB  102,15,58,32,192,3                  ; pinsrb        $0x3,%eax,%xmm0
   DB  102,15,56,49,192                    ; pmovzxbd      %xmm0,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,246,35,0,0                  ; mulps         0x23f6(%rip),%xmm0        # 51a0 <_sk_callback_sse41+0xaa1>
+  DB  15,89,5,182,36,0,0                  ; mulps         0x24b6(%rip),%xmm0        # 5260 <_sk_callback_sse41+0xaa7>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,253,35,0,0                 ; movaps        0x23fd(%rip),%xmm3        # 51b0 <_sk_callback_sse41+0xab1>
+  DB  15,40,29,189,36,0,0                 ; movaps        0x24bd(%rip),%xmm3        # 5270 <_sk_callback_sse41+0xab7>
   DB  15,40,200                           ; movaps        %xmm0,%xmm1
   DB  15,40,208                           ; movaps        %xmm0,%xmm2
   DB  255,224                             ; jmpq          *%rax
@@ -14279,17 +14343,17 @@ _sk_gather_i8_sse41 LABEL PROC
   DB  102,15,58,34,28,8,1                 ; pinsrd        $0x1,(%rax,%rcx,1),%xmm3
   DB  102,66,15,58,34,28,144,2            ; pinsrd        $0x2,(%rax,%r10,4),%xmm3
   DB  102,66,15,58,34,28,8,3              ; pinsrd        $0x3,(%rax,%r9,1),%xmm3
-  DB  102,15,111,5,84,35,0,0              ; movdqa        0x2354(%rip),%xmm0        # 51c0 <_sk_callback_sse41+0xac1>
+  DB  102,15,111,5,20,36,0,0              ; movdqa        0x2414(%rip),%xmm0        # 5280 <_sk_callback_sse41+0xac7>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,85,35,0,0                ; movaps        0x2355(%rip),%xmm8        # 51d0 <_sk_callback_sse41+0xad1>
+  DB  68,15,40,5,21,36,0,0                ; movaps        0x2415(%rip),%xmm8        # 5290 <_sk_callback_sse41+0xad7>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
-  DB  102,15,56,0,13,84,35,0,0            ; pshufb        0x2354(%rip),%xmm1        # 51e0 <_sk_callback_sse41+0xae1>
+  DB  102,15,56,0,13,20,36,0,0            ; pshufb        0x2414(%rip),%xmm1        # 52a0 <_sk_callback_sse41+0xae7>
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
-  DB  102,15,56,0,21,80,35,0,0            ; pshufb        0x2350(%rip),%xmm2        # 51f0 <_sk_callback_sse41+0xaf1>
+  DB  102,15,56,0,21,16,36,0,0            ; pshufb        0x2410(%rip),%xmm2        # 52b0 <_sk_callback_sse41+0xaf7>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
@@ -14303,19 +14367,19 @@ _sk_load_565_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,15,56,51,20,120                 ; pmovzxwd      (%rax,%rdi,2),%xmm2
-  DB  102,15,111,5,54,35,0,0              ; movdqa        0x2336(%rip),%xmm0        # 5200 <_sk_callback_sse41+0xb01>
+  DB  102,15,111,5,246,35,0,0             ; movdqa        0x23f6(%rip),%xmm0        # 52c0 <_sk_callback_sse41+0xb07>
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,56,35,0,0                   ; mulps         0x2338(%rip),%xmm0        # 5210 <_sk_callback_sse41+0xb11>
-  DB  102,15,111,13,64,35,0,0             ; movdqa        0x2340(%rip),%xmm1        # 5220 <_sk_callback_sse41+0xb21>
+  DB  15,89,5,248,35,0,0                  ; mulps         0x23f8(%rip),%xmm0        # 52d0 <_sk_callback_sse41+0xb17>
+  DB  102,15,111,13,0,36,0,0              ; movdqa        0x2400(%rip),%xmm1        # 52e0 <_sk_callback_sse41+0xb27>
   DB  102,15,219,202                      ; pand          %xmm2,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,66,35,0,0                  ; mulps         0x2342(%rip),%xmm1        # 5230 <_sk_callback_sse41+0xb31>
-  DB  102,15,219,21,74,35,0,0             ; pand          0x234a(%rip),%xmm2        # 5240 <_sk_callback_sse41+0xb41>
+  DB  15,89,13,2,36,0,0                   ; mulps         0x2402(%rip),%xmm1        # 52f0 <_sk_callback_sse41+0xb37>
+  DB  102,15,219,21,10,36,0,0             ; pand          0x240a(%rip),%xmm2        # 5300 <_sk_callback_sse41+0xb47>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,80,35,0,0                  ; mulps         0x2350(%rip),%xmm2        # 5250 <_sk_callback_sse41+0xb51>
+  DB  15,89,21,16,36,0,0                  ; mulps         0x2410(%rip),%xmm2        # 5310 <_sk_callback_sse41+0xb57>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,87,35,0,0                  ; movaps        0x2357(%rip),%xmm3        # 5260 <_sk_callback_sse41+0xb61>
+  DB  15,40,29,23,36,0,0                  ; movaps        0x2417(%rip),%xmm3        # 5320 <_sk_callback_sse41+0xb67>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_gather_565_sse41
@@ -14341,31 +14405,31 @@ _sk_gather_565_sse41 LABEL PROC
   DB  65,15,183,4,65                      ; movzwl        (%r9,%rax,2),%eax
   DB  102,15,196,192,3                    ; pinsrw        $0x3,%eax,%xmm0
   DB  102,15,56,51,208                    ; pmovzxwd      %xmm0,%xmm2
-  DB  102,15,111,5,252,34,0,0             ; movdqa        0x22fc(%rip),%xmm0        # 5270 <_sk_callback_sse41+0xb71>
+  DB  102,15,111,5,188,35,0,0             ; movdqa        0x23bc(%rip),%xmm0        # 5330 <_sk_callback_sse41+0xb77>
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,254,34,0,0                  ; mulps         0x22fe(%rip),%xmm0        # 5280 <_sk_callback_sse41+0xb81>
-  DB  102,15,111,13,6,35,0,0              ; movdqa        0x2306(%rip),%xmm1        # 5290 <_sk_callback_sse41+0xb91>
+  DB  15,89,5,190,35,0,0                  ; mulps         0x23be(%rip),%xmm0        # 5340 <_sk_callback_sse41+0xb87>
+  DB  102,15,111,13,198,35,0,0            ; movdqa        0x23c6(%rip),%xmm1        # 5350 <_sk_callback_sse41+0xb97>
   DB  102,15,219,202                      ; pand          %xmm2,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,8,35,0,0                   ; mulps         0x2308(%rip),%xmm1        # 52a0 <_sk_callback_sse41+0xba1>
-  DB  102,15,219,21,16,35,0,0             ; pand          0x2310(%rip),%xmm2        # 52b0 <_sk_callback_sse41+0xbb1>
+  DB  15,89,13,200,35,0,0                 ; mulps         0x23c8(%rip),%xmm1        # 5360 <_sk_callback_sse41+0xba7>
+  DB  102,15,219,21,208,35,0,0            ; pand          0x23d0(%rip),%xmm2        # 5370 <_sk_callback_sse41+0xbb7>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,22,35,0,0                  ; mulps         0x2316(%rip),%xmm2        # 52c0 <_sk_callback_sse41+0xbc1>
+  DB  15,89,21,214,35,0,0                 ; mulps         0x23d6(%rip),%xmm2        # 5380 <_sk_callback_sse41+0xbc7>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,29,35,0,0                  ; movaps        0x231d(%rip),%xmm3        # 52d0 <_sk_callback_sse41+0xbd1>
+  DB  15,40,29,221,35,0,0                 ; movaps        0x23dd(%rip),%xmm3        # 5390 <_sk_callback_sse41+0xbd7>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_store_565_sse41
 _sk_store_565_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,30,35,0,0                ; movaps        0x231e(%rip),%xmm8        # 52e0 <_sk_callback_sse41+0xbe1>
+  DB  68,15,40,5,222,35,0,0               ; movaps        0x23de(%rip),%xmm8        # 53a0 <_sk_callback_sse41+0xbe7>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
   DB  102,65,15,114,241,11                ; pslld         $0xb,%xmm9
-  DB  68,15,40,21,19,35,0,0               ; movaps        0x2313(%rip),%xmm10        # 52f0 <_sk_callback_sse41+0xbf1>
+  DB  68,15,40,21,211,35,0,0              ; movaps        0x23d3(%rip),%xmm10        # 53b0 <_sk_callback_sse41+0xbf7>
   DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
   DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
   DB  102,65,15,114,242,5                 ; pslld         $0x5,%xmm10
@@ -14383,21 +14447,21 @@ _sk_load_4444_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  102,15,56,51,28,120                 ; pmovzxwd      (%rax,%rdi,2),%xmm3
-  DB  102,15,111,5,222,34,0,0             ; movdqa        0x22de(%rip),%xmm0        # 5300 <_sk_callback_sse41+0xc01>
+  DB  102,15,111,5,158,35,0,0             ; movdqa        0x239e(%rip),%xmm0        # 53c0 <_sk_callback_sse41+0xc07>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,224,34,0,0                  ; mulps         0x22e0(%rip),%xmm0        # 5310 <_sk_callback_sse41+0xc11>
-  DB  102,15,111,13,232,34,0,0            ; movdqa        0x22e8(%rip),%xmm1        # 5320 <_sk_callback_sse41+0xc21>
+  DB  15,89,5,160,35,0,0                  ; mulps         0x23a0(%rip),%xmm0        # 53d0 <_sk_callback_sse41+0xc17>
+  DB  102,15,111,13,168,35,0,0            ; movdqa        0x23a8(%rip),%xmm1        # 53e0 <_sk_callback_sse41+0xc27>
   DB  102,15,219,203                      ; pand          %xmm3,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,234,34,0,0                 ; mulps         0x22ea(%rip),%xmm1        # 5330 <_sk_callback_sse41+0xc31>
-  DB  102,15,111,21,242,34,0,0            ; movdqa        0x22f2(%rip),%xmm2        # 5340 <_sk_callback_sse41+0xc41>
+  DB  15,89,13,170,35,0,0                 ; mulps         0x23aa(%rip),%xmm1        # 53f0 <_sk_callback_sse41+0xc37>
+  DB  102,15,111,21,178,35,0,0            ; movdqa        0x23b2(%rip),%xmm2        # 5400 <_sk_callback_sse41+0xc47>
   DB  102,15,219,211                      ; pand          %xmm3,%xmm2
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,244,34,0,0                 ; mulps         0x22f4(%rip),%xmm2        # 5350 <_sk_callback_sse41+0xc51>
-  DB  102,15,219,29,252,34,0,0            ; pand          0x22fc(%rip),%xmm3        # 5360 <_sk_callback_sse41+0xc61>
+  DB  15,89,21,180,35,0,0                 ; mulps         0x23b4(%rip),%xmm2        # 5410 <_sk_callback_sse41+0xc57>
+  DB  102,15,219,29,188,35,0,0            ; pand          0x23bc(%rip),%xmm3        # 5420 <_sk_callback_sse41+0xc67>
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,2,35,0,0                   ; mulps         0x2302(%rip),%xmm3        # 5370 <_sk_callback_sse41+0xc71>
+  DB  15,89,29,194,35,0,0                 ; mulps         0x23c2(%rip),%xmm3        # 5430 <_sk_callback_sse41+0xc77>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -14424,21 +14488,21 @@ _sk_gather_4444_sse41 LABEL PROC
   DB  65,15,183,4,65                      ; movzwl        (%r9,%rax,2),%eax
   DB  102,15,196,192,3                    ; pinsrw        $0x3,%eax,%xmm0
   DB  102,15,56,51,216                    ; pmovzxwd      %xmm0,%xmm3
-  DB  102,15,111,5,165,34,0,0             ; movdqa        0x22a5(%rip),%xmm0        # 5380 <_sk_callback_sse41+0xc81>
+  DB  102,15,111,5,101,35,0,0             ; movdqa        0x2365(%rip),%xmm0        # 5440 <_sk_callback_sse41+0xc87>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,167,34,0,0                  ; mulps         0x22a7(%rip),%xmm0        # 5390 <_sk_callback_sse41+0xc91>
-  DB  102,15,111,13,175,34,0,0            ; movdqa        0x22af(%rip),%xmm1        # 53a0 <_sk_callback_sse41+0xca1>
+  DB  15,89,5,103,35,0,0                  ; mulps         0x2367(%rip),%xmm0        # 5450 <_sk_callback_sse41+0xc97>
+  DB  102,15,111,13,111,35,0,0            ; movdqa        0x236f(%rip),%xmm1        # 5460 <_sk_callback_sse41+0xca7>
   DB  102,15,219,203                      ; pand          %xmm3,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,177,34,0,0                 ; mulps         0x22b1(%rip),%xmm1        # 53b0 <_sk_callback_sse41+0xcb1>
-  DB  102,15,111,21,185,34,0,0            ; movdqa        0x22b9(%rip),%xmm2        # 53c0 <_sk_callback_sse41+0xcc1>
+  DB  15,89,13,113,35,0,0                 ; mulps         0x2371(%rip),%xmm1        # 5470 <_sk_callback_sse41+0xcb7>
+  DB  102,15,111,21,121,35,0,0            ; movdqa        0x2379(%rip),%xmm2        # 5480 <_sk_callback_sse41+0xcc7>
   DB  102,15,219,211                      ; pand          %xmm3,%xmm2
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,187,34,0,0                 ; mulps         0x22bb(%rip),%xmm2        # 53d0 <_sk_callback_sse41+0xcd1>
-  DB  102,15,219,29,195,34,0,0            ; pand          0x22c3(%rip),%xmm3        # 53e0 <_sk_callback_sse41+0xce1>
+  DB  15,89,21,123,35,0,0                 ; mulps         0x237b(%rip),%xmm2        # 5490 <_sk_callback_sse41+0xcd7>
+  DB  102,15,219,29,131,35,0,0            ; pand          0x2383(%rip),%xmm3        # 54a0 <_sk_callback_sse41+0xce7>
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,201,34,0,0                 ; mulps         0x22c9(%rip),%xmm3        # 53f0 <_sk_callback_sse41+0xcf1>
+  DB  15,89,29,137,35,0,0                 ; mulps         0x2389(%rip),%xmm3        # 54b0 <_sk_callback_sse41+0xcf7>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -14446,7 +14510,7 @@ PUBLIC _sk_store_4444_sse41
 _sk_store_4444_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,200,34,0,0               ; movaps        0x22c8(%rip),%xmm8        # 5400 <_sk_callback_sse41+0xd01>
+  DB  68,15,40,5,136,35,0,0               ; movaps        0x2388(%rip),%xmm8        # 54c0 <_sk_callback_sse41+0xd07>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
@@ -14474,17 +14538,17 @@ _sk_load_8888_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  15,16,28,184                        ; movups        (%rax,%rdi,4),%xmm3
-  DB  15,40,5,103,34,0,0                  ; movaps        0x2267(%rip),%xmm0        # 5410 <_sk_callback_sse41+0xd11>
+  DB  15,40,5,39,35,0,0                   ; movaps        0x2327(%rip),%xmm0        # 54d0 <_sk_callback_sse41+0xd17>
   DB  15,84,195                           ; andps         %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,105,34,0,0               ; movaps        0x2269(%rip),%xmm8        # 5420 <_sk_callback_sse41+0xd21>
+  DB  68,15,40,5,41,35,0,0                ; movaps        0x2329(%rip),%xmm8        # 54e0 <_sk_callback_sse41+0xd27>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,40,203                           ; movaps        %xmm3,%xmm1
-  DB  102,15,56,0,13,105,34,0,0           ; pshufb        0x2269(%rip),%xmm1        # 5430 <_sk_callback_sse41+0xd31>
+  DB  102,15,56,0,13,41,35,0,0            ; pshufb        0x2329(%rip),%xmm1        # 54f0 <_sk_callback_sse41+0xd37>
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  15,40,211                           ; movaps        %xmm3,%xmm2
-  DB  102,15,56,0,21,102,34,0,0           ; pshufb        0x2266(%rip),%xmm2        # 5440 <_sk_callback_sse41+0xd41>
+  DB  102,15,56,0,21,38,35,0,0            ; pshufb        0x2326(%rip),%xmm2        # 5500 <_sk_callback_sse41+0xd47>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
@@ -14513,17 +14577,17 @@ _sk_gather_8888_sse41 LABEL PROC
   DB  102,65,15,58,34,28,129,1            ; pinsrd        $0x1,(%r9,%rax,4),%xmm3
   DB  102,67,15,58,34,28,145,2            ; pinsrd        $0x2,(%r9,%r10,4),%xmm3
   DB  102,65,15,58,34,28,137,3            ; pinsrd        $0x3,(%r9,%rcx,4),%xmm3
-  DB  102,15,111,5,255,33,0,0             ; movdqa        0x21ff(%rip),%xmm0        # 5450 <_sk_callback_sse41+0xd51>
+  DB  102,15,111,5,191,34,0,0             ; movdqa        0x22bf(%rip),%xmm0        # 5510 <_sk_callback_sse41+0xd57>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,0,34,0,0                 ; movaps        0x2200(%rip),%xmm8        # 5460 <_sk_callback_sse41+0xd61>
+  DB  68,15,40,5,192,34,0,0               ; movaps        0x22c0(%rip),%xmm8        # 5520 <_sk_callback_sse41+0xd67>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
-  DB  102,15,56,0,13,255,33,0,0           ; pshufb        0x21ff(%rip),%xmm1        # 5470 <_sk_callback_sse41+0xd71>
+  DB  102,15,56,0,13,191,34,0,0           ; pshufb        0x22bf(%rip),%xmm1        # 5530 <_sk_callback_sse41+0xd77>
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,111,211                      ; movdqa        %xmm3,%xmm2
-  DB  102,15,56,0,21,251,33,0,0           ; pshufb        0x21fb(%rip),%xmm2        # 5480 <_sk_callback_sse41+0xd81>
+  DB  102,15,56,0,21,187,34,0,0           ; pshufb        0x22bb(%rip),%xmm2        # 5540 <_sk_callback_sse41+0xd87>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  102,15,114,211,24                   ; psrld         $0x18,%xmm3
@@ -14536,7 +14600,7 @@ PUBLIC _sk_store_8888_sse41
 _sk_store_8888_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,231,33,0,0               ; movaps        0x21e7(%rip),%xmm8        # 5490 <_sk_callback_sse41+0xd91>
+  DB  68,15,40,5,167,34,0,0               ; movaps        0x22a7(%rip),%xmm8        # 5550 <_sk_callback_sse41+0xd97>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
@@ -14571,18 +14635,18 @@ _sk_load_f16_sse41 LABEL PROC
   DB  102,68,15,97,216                    ; punpcklwd     %xmm0,%xmm11
   DB  102,68,15,105,200                   ; punpckhwd     %xmm0,%xmm9
   DB  102,65,15,56,51,203                 ; pmovzxwd      %xmm11,%xmm1
-  DB  102,68,15,111,5,96,33,0,0           ; movdqa        0x2160(%rip),%xmm8        # 54a0 <_sk_callback_sse41+0xda1>
+  DB  102,68,15,111,5,32,34,0,0           ; movdqa        0x2220(%rip),%xmm8        # 5560 <_sk_callback_sse41+0xda7>
   DB  102,15,111,209                      ; movdqa        %xmm1,%xmm2
   DB  102,65,15,219,208                   ; pand          %xmm8,%xmm2
   DB  102,15,239,202                      ; pxor          %xmm2,%xmm1
-  DB  102,15,111,29,91,33,0,0             ; movdqa        0x215b(%rip),%xmm3        # 54b0 <_sk_callback_sse41+0xdb1>
+  DB  102,15,111,29,27,34,0,0             ; movdqa        0x221b(%rip),%xmm3        # 5570 <_sk_callback_sse41+0xdb7>
   DB  102,15,114,242,16                   ; pslld         $0x10,%xmm2
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,15,56,63,195                    ; pmaxud        %xmm3,%xmm0
   DB  102,15,118,193                      ; pcmpeqd       %xmm1,%xmm0
   DB  102,15,114,241,13                   ; pslld         $0xd,%xmm1
   DB  102,15,235,202                      ; por           %xmm2,%xmm1
-  DB  102,68,15,111,21,71,33,0,0          ; movdqa        0x2147(%rip),%xmm10        # 54c0 <_sk_callback_sse41+0xdc1>
+  DB  102,68,15,111,21,7,34,0,0           ; movdqa        0x2207(%rip),%xmm10        # 5580 <_sk_callback_sse41+0xdc7>
   DB  102,65,15,254,202                   ; paddd         %xmm10,%xmm1
   DB  102,15,219,193                      ; pand          %xmm1,%xmm0
   DB  102,65,15,115,219,8                 ; psrldq        $0x8,%xmm11
@@ -14653,18 +14717,18 @@ _sk_gather_f16_sse41 LABEL PROC
   DB  102,68,15,97,218                    ; punpcklwd     %xmm2,%xmm11
   DB  102,68,15,105,202                   ; punpckhwd     %xmm2,%xmm9
   DB  102,65,15,56,51,203                 ; pmovzxwd      %xmm11,%xmm1
-  DB  102,68,15,111,5,5,32,0,0            ; movdqa        0x2005(%rip),%xmm8        # 54d0 <_sk_callback_sse41+0xdd1>
+  DB  102,68,15,111,5,197,32,0,0          ; movdqa        0x20c5(%rip),%xmm8        # 5590 <_sk_callback_sse41+0xdd7>
   DB  102,15,111,209                      ; movdqa        %xmm1,%xmm2
   DB  102,65,15,219,208                   ; pand          %xmm8,%xmm2
   DB  102,15,239,202                      ; pxor          %xmm2,%xmm1
-  DB  102,15,111,29,0,32,0,0              ; movdqa        0x2000(%rip),%xmm3        # 54e0 <_sk_callback_sse41+0xde1>
+  DB  102,15,111,29,192,32,0,0            ; movdqa        0x20c0(%rip),%xmm3        # 55a0 <_sk_callback_sse41+0xde7>
   DB  102,15,114,242,16                   ; pslld         $0x10,%xmm2
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,15,56,63,195                    ; pmaxud        %xmm3,%xmm0
   DB  102,15,118,193                      ; pcmpeqd       %xmm1,%xmm0
   DB  102,15,114,241,13                   ; pslld         $0xd,%xmm1
   DB  102,15,235,202                      ; por           %xmm2,%xmm1
-  DB  102,68,15,111,21,236,31,0,0         ; movdqa        0x1fec(%rip),%xmm10        # 54f0 <_sk_callback_sse41+0xdf1>
+  DB  102,68,15,111,21,172,32,0,0         ; movdqa        0x20ac(%rip),%xmm10        # 55b0 <_sk_callback_sse41+0xdf7>
   DB  102,65,15,254,202                   ; paddd         %xmm10,%xmm1
   DB  102,15,219,193                      ; pand          %xmm1,%xmm0
   DB  102,65,15,115,219,8                 ; psrldq        $0x8,%xmm11
@@ -14710,17 +14774,17 @@ PUBLIC _sk_store_f16_sse41
 _sk_store_f16_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  102,68,15,111,21,34,31,0,0          ; movdqa        0x1f22(%rip),%xmm10        # 5500 <_sk_callback_sse41+0xe01>
+  DB  102,68,15,111,21,226,31,0,0         ; movdqa        0x1fe2(%rip),%xmm10        # 55c0 <_sk_callback_sse41+0xe07>
   DB  102,68,15,111,224                   ; movdqa        %xmm0,%xmm12
   DB  102,68,15,111,232                   ; movdqa        %xmm0,%xmm13
   DB  102,69,15,219,234                   ; pand          %xmm10,%xmm13
   DB  102,69,15,239,229                   ; pxor          %xmm13,%xmm12
-  DB  102,68,15,111,13,21,31,0,0          ; movdqa        0x1f15(%rip),%xmm9        # 5510 <_sk_callback_sse41+0xe11>
+  DB  102,68,15,111,13,213,31,0,0         ; movdqa        0x1fd5(%rip),%xmm9        # 55d0 <_sk_callback_sse41+0xe17>
   DB  102,65,15,114,213,16                ; psrld         $0x10,%xmm13
   DB  102,69,15,111,193                   ; movdqa        %xmm9,%xmm8
   DB  102,69,15,102,196                   ; pcmpgtd       %xmm12,%xmm8
   DB  102,65,15,114,212,13                ; psrld         $0xd,%xmm12
-  DB  102,68,15,111,29,6,31,0,0           ; movdqa        0x1f06(%rip),%xmm11        # 5520 <_sk_callback_sse41+0xe21>
+  DB  102,68,15,111,29,198,31,0,0         ; movdqa        0x1fc6(%rip),%xmm11        # 55e0 <_sk_callback_sse41+0xe27>
   DB  102,69,15,235,235                   ; por           %xmm11,%xmm13
   DB  102,69,15,254,236                   ; paddd         %xmm12,%xmm13
   DB  102,69,15,223,197                   ; pandn         %xmm13,%xmm8
@@ -14788,7 +14852,7 @@ _sk_load_u16_be_sse41 LABEL PROC
   DB  102,15,235,200                      ; por           %xmm0,%xmm1
   DB  102,15,56,51,193                    ; pmovzxwd      %xmm1,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,213,29,0,0               ; movaps        0x1dd5(%rip),%xmm8        # 5530 <_sk_callback_sse41+0xe31>
+  DB  68,15,40,5,149,30,0,0               ; movaps        0x1e95(%rip),%xmm8        # 55f0 <_sk_callback_sse41+0xe37>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
   DB  102,15,113,241,8                    ; psllw         $0x8,%xmm1
@@ -14838,7 +14902,7 @@ _sk_load_rgb_u16_be_sse41 LABEL PROC
   DB  102,15,235,193                      ; por           %xmm1,%xmm0
   DB  102,15,56,51,192                    ; pmovzxwd      %xmm0,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,22,29,0,0                ; movaps        0x1d16(%rip),%xmm8        # 5540 <_sk_callback_sse41+0xe41>
+  DB  68,15,40,5,214,29,0,0               ; movaps        0x1dd6(%rip),%xmm8        # 5600 <_sk_callback_sse41+0xe47>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
   DB  102,15,113,241,8                    ; psllw         $0x8,%xmm1
@@ -14855,14 +14919,14 @@ _sk_load_rgb_u16_be_sse41 LABEL PROC
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,221,28,0,0                 ; movaps        0x1cdd(%rip),%xmm3        # 5550 <_sk_callback_sse41+0xe51>
+  DB  15,40,29,157,29,0,0                 ; movaps        0x1d9d(%rip),%xmm3        # 5610 <_sk_callback_sse41+0xe57>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_store_u16_be_sse41
 _sk_store_u16_be_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,13,222,28,0,0              ; movaps        0x1cde(%rip),%xmm9        # 5560 <_sk_callback_sse41+0xe61>
+  DB  68,15,40,13,158,29,0,0              ; movaps        0x1d9e(%rip),%xmm9        # 5620 <_sk_callback_sse41+0xe67>
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
@@ -15055,10 +15119,10 @@ _sk_mirror_y_sse41 LABEL PROC
 PUBLIC _sk_luminance_to_alpha_sse41
 _sk_luminance_to_alpha_sse41 LABEL PROC
   DB  15,40,218                           ; movaps        %xmm2,%xmm3
-  DB  15,89,5,58,26,0,0                   ; mulps         0x1a3a(%rip),%xmm0        # 5570 <_sk_callback_sse41+0xe71>
-  DB  15,89,13,67,26,0,0                  ; mulps         0x1a43(%rip),%xmm1        # 5580 <_sk_callback_sse41+0xe81>
+  DB  15,89,5,250,26,0,0                  ; mulps         0x1afa(%rip),%xmm0        # 5630 <_sk_callback_sse41+0xe77>
+  DB  15,89,13,3,27,0,0                   ; mulps         0x1b03(%rip),%xmm1        # 5640 <_sk_callback_sse41+0xe87>
   DB  15,88,200                           ; addps         %xmm0,%xmm1
-  DB  15,89,29,73,26,0,0                  ; mulps         0x1a49(%rip),%xmm3        # 5590 <_sk_callback_sse41+0xe91>
+  DB  15,89,29,9,27,0,0                   ; mulps         0x1b09(%rip),%xmm3        # 5650 <_sk_callback_sse41+0xe97>
   DB  15,88,217                           ; addps         %xmm1,%xmm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
@@ -15227,6 +15291,54 @@ _sk_matrix_4x5_sse41 LABEL PROC
   DB  65,15,40,219                        ; movaps        %xmm11,%xmm3
   DB  255,224                             ; jmpq          *%rax
 
+PUBLIC _sk_matrix_4x3_sse41
+_sk_matrix_4x3_sse41 LABEL PROC
+  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
+  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  243,15,16,0                         ; movss         (%rax),%xmm0
+  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
+  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
+  DB  243,15,16,80,16                     ; movss         0x10(%rax),%xmm2
+  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
+  DB  243,15,16,88,32                     ; movss         0x20(%rax),%xmm3
+  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
+  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
+  DB  15,88,211                           ; addps         %xmm3,%xmm2
+  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
+  DB  15,88,194                           ; addps         %xmm2,%xmm0
+  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
+  DB  243,15,16,80,20                     ; movss         0x14(%rax),%xmm2
+  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
+  DB  243,15,16,88,36                     ; movss         0x24(%rax),%xmm3
+  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
+  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
+  DB  15,88,211                           ; addps         %xmm3,%xmm2
+  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
+  DB  15,88,202                           ; addps         %xmm2,%xmm1
+  DB  243,15,16,80,8                      ; movss         0x8(%rax),%xmm2
+  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
+  DB  243,15,16,88,24                     ; movss         0x18(%rax),%xmm3
+  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
+  DB  243,68,15,16,80,40                  ; movss         0x28(%rax),%xmm10
+  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
+  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
+  DB  65,15,88,218                        ; addps         %xmm10,%xmm3
+  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
+  DB  15,88,211                           ; addps         %xmm3,%xmm2
+  DB  243,15,16,88,12                     ; movss         0xc(%rax),%xmm3
+  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
+  DB  243,68,15,16,80,28                  ; movss         0x1c(%rax),%xmm10
+  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
+  DB  243,68,15,16,88,44                  ; movss         0x2c(%rax),%xmm11
+  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
+  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
+  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
+  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
+  DB  65,15,88,218                        ; addps         %xmm10,%xmm3
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  255,224                             ; jmpq          *%rax
+
 PUBLIC _sk_matrix_perspective_sse41
 _sk_matrix_perspective_sse41 LABEL PROC
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
@@ -15274,9 +15386,9 @@ _sk_evenly_spaced_gradient_sse41 LABEL PROC
   DB  72,139,8                            ; mov           (%rax),%rcx
   DB  76,139,88,8                         ; mov           0x8(%rax),%r11
   DB  72,255,201                          ; dec           %rcx
-  DB  120,7                               ; js            3ec6 <_sk_evenly_spaced_gradient_sse41+0x15>
+  DB  120,7                               ; js            3f80 <_sk_evenly_spaced_gradient_sse41+0x15>
   DB  243,72,15,42,201                    ; cvtsi2ss      %rcx,%xmm1
-  DB  235,21                              ; jmp           3edb <_sk_evenly_spaced_gradient_sse41+0x2a>
+  DB  235,21                              ; jmp           3f95 <_sk_evenly_spaced_gradient_sse41+0x2a>
   DB  73,137,200                          ; mov           %rcx,%r8
   DB  73,209,232                          ; shr           %r8
   DB  131,225,1                           ; and           $0x1,%ecx
@@ -15365,12 +15477,12 @@ _sk_gradient_sse41 LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
   DB  73,131,248,2                        ; cmp           $0x2,%r8
-  DB  114,50                              ; jb            40be <_sk_gradient_sse41+0x41>
+  DB  114,50                              ; jb            4178 <_sk_gradient_sse41+0x41>
   DB  72,139,72,72                        ; mov           0x48(%rax),%rcx
   DB  73,255,200                          ; dec           %r8
   DB  72,131,193,4                        ; add           $0x4,%rcx
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
-  DB  15,40,21,254,20,0,0                 ; movaps        0x14fe(%rip),%xmm2        # 55a0 <_sk_callback_sse41+0xea1>
+  DB  15,40,21,4,21,0,0                   ; movaps        0x1504(%rip),%xmm2        # 5660 <_sk_callback_sse41+0xea7>
   DB  243,15,16,25                        ; movss         (%rcx),%xmm3
   DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
   DB  15,194,216,2                        ; cmpleps       %xmm0,%xmm3
@@ -15378,7 +15490,7 @@ _sk_gradient_sse41 LABEL PROC
   DB  102,15,254,203                      ; paddd         %xmm3,%xmm1
   DB  72,131,193,4                        ; add           $0x4,%rcx
   DB  73,255,200                          ; dec           %r8
-  DB  117,228                             ; jne           40a2 <_sk_gradient_sse41+0x25>
+  DB  117,228                             ; jne           415c <_sk_gradient_sse41+0x25>
   DB  65,86                               ; push          %r14
   DB  83                                  ; push          %rbx
   DB  102,73,15,58,22,201,1               ; pextrq        $0x1,%xmm1,%r9
@@ -15505,26 +15617,26 @@ _sk_xy_to_unit_angle_sse41 LABEL PROC
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,40,236                        ; movaps        %xmm12,%xmm13
   DB  69,15,89,237                        ; mulps         %xmm13,%xmm13
-  DB  68,15,40,21,160,18,0,0              ; movaps        0x12a0(%rip),%xmm10        # 55b0 <_sk_callback_sse41+0xeb1>
+  DB  68,15,40,21,166,18,0,0              ; movaps        0x12a6(%rip),%xmm10        # 5670 <_sk_callback_sse41+0xeb7>
   DB  69,15,89,213                        ; mulps         %xmm13,%xmm10
-  DB  68,15,88,21,164,18,0,0              ; addps         0x12a4(%rip),%xmm10        # 55c0 <_sk_callback_sse41+0xec1>
+  DB  68,15,88,21,170,18,0,0              ; addps         0x12aa(%rip),%xmm10        # 5680 <_sk_callback_sse41+0xec7>
   DB  69,15,89,213                        ; mulps         %xmm13,%xmm10
-  DB  68,15,88,21,168,18,0,0              ; addps         0x12a8(%rip),%xmm10        # 55d0 <_sk_callback_sse41+0xed1>
+  DB  68,15,88,21,174,18,0,0              ; addps         0x12ae(%rip),%xmm10        # 5690 <_sk_callback_sse41+0xed7>
   DB  69,15,89,213                        ; mulps         %xmm13,%xmm10
-  DB  68,15,88,21,172,18,0,0              ; addps         0x12ac(%rip),%xmm10        # 55e0 <_sk_callback_sse41+0xee1>
+  DB  68,15,88,21,178,18,0,0              ; addps         0x12b2(%rip),%xmm10        # 56a0 <_sk_callback_sse41+0xee7>
   DB  69,15,89,212                        ; mulps         %xmm12,%xmm10
   DB  65,15,194,195,1                     ; cmpltps       %xmm11,%xmm0
-  DB  68,15,40,29,171,18,0,0              ; movaps        0x12ab(%rip),%xmm11        # 55f0 <_sk_callback_sse41+0xef1>
+  DB  68,15,40,29,177,18,0,0              ; movaps        0x12b1(%rip),%xmm11        # 56b0 <_sk_callback_sse41+0xef7>
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
   DB  102,69,15,56,20,211                 ; blendvps      %xmm0,%xmm11,%xmm10
   DB  69,15,194,200,1                     ; cmpltps       %xmm8,%xmm9
-  DB  68,15,40,29,164,18,0,0              ; movaps        0x12a4(%rip),%xmm11        # 5600 <_sk_callback_sse41+0xf01>
+  DB  68,15,40,29,170,18,0,0              ; movaps        0x12aa(%rip),%xmm11        # 56c0 <_sk_callback_sse41+0xf07>
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  102,69,15,56,20,211                 ; blendvps      %xmm0,%xmm11,%xmm10
   DB  15,40,193                           ; movaps        %xmm1,%xmm0
   DB  65,15,194,192,1                     ; cmpltps       %xmm8,%xmm0
-  DB  68,15,40,13,150,18,0,0              ; movaps        0x1296(%rip),%xmm9        # 5610 <_sk_callback_sse41+0xf11>
+  DB  68,15,40,13,156,18,0,0              ; movaps        0x129c(%rip),%xmm9        # 56d0 <_sk_callback_sse41+0xf17>
   DB  69,15,92,202                        ; subps         %xmm10,%xmm9
   DB  102,69,15,56,20,209                 ; blendvps      %xmm0,%xmm9,%xmm10
   DB  69,15,194,194,7                     ; cmpordps      %xmm10,%xmm8
@@ -15546,7 +15658,7 @@ _sk_xy_to_radius_sse41 LABEL PROC
 PUBLIC _sk_save_xy_sse41
 _sk_save_xy_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,106,18,0,0               ; movaps        0x126a(%rip),%xmm8        # 5620 <_sk_callback_sse41+0xf21>
+  DB  68,15,40,5,112,18,0,0               ; movaps        0x1270(%rip),%xmm8        # 56e0 <_sk_callback_sse41+0xf27>
   DB  15,17,0                             ; movups        %xmm0,(%rax)
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,88,200                        ; addps         %xmm8,%xmm9
@@ -15586,8 +15698,8 @@ _sk_bilinear_nx_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,236,17,0,0                  ; addps         0x11ec(%rip),%xmm0        # 5630 <_sk_callback_sse41+0xf31>
-  DB  68,15,40,13,244,17,0,0              ; movaps        0x11f4(%rip),%xmm9        # 5640 <_sk_callback_sse41+0xf41>
+  DB  15,88,5,242,17,0,0                  ; addps         0x11f2(%rip),%xmm0        # 56f0 <_sk_callback_sse41+0xf37>
+  DB  68,15,40,13,250,17,0,0              ; movaps        0x11fa(%rip),%xmm9        # 5700 <_sk_callback_sse41+0xf47>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,128,0,0,0              ; movups        %xmm9,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -15598,7 +15710,7 @@ _sk_bilinear_px_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,227,17,0,0                  ; addps         0x11e3(%rip),%xmm0        # 5650 <_sk_callback_sse41+0xf51>
+  DB  15,88,5,233,17,0,0                  ; addps         0x11e9(%rip),%xmm0        # 5710 <_sk_callback_sse41+0xf57>
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -15608,8 +15720,8 @@ _sk_bilinear_ny_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,213,17,0,0                 ; addps         0x11d5(%rip),%xmm1        # 5660 <_sk_callback_sse41+0xf61>
-  DB  68,15,40,13,221,17,0,0              ; movaps        0x11dd(%rip),%xmm9        # 5670 <_sk_callback_sse41+0xf71>
+  DB  15,88,13,219,17,0,0                 ; addps         0x11db(%rip),%xmm1        # 5720 <_sk_callback_sse41+0xf67>
+  DB  68,15,40,13,227,17,0,0              ; movaps        0x11e3(%rip),%xmm9        # 5730 <_sk_callback_sse41+0xf77>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,160,0,0,0              ; movups        %xmm9,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -15620,7 +15732,7 @@ _sk_bilinear_py_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,203,17,0,0                 ; addps         0x11cb(%rip),%xmm1        # 5680 <_sk_callback_sse41+0xf81>
+  DB  15,88,13,209,17,0,0                 ; addps         0x11d1(%rip),%xmm1        # 5740 <_sk_callback_sse41+0xf87>
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -15630,13 +15742,13 @@ _sk_bicubic_n3x_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,190,17,0,0                  ; addps         0x11be(%rip),%xmm0        # 5690 <_sk_callback_sse41+0xf91>
-  DB  68,15,40,13,198,17,0,0              ; movaps        0x11c6(%rip),%xmm9        # 56a0 <_sk_callback_sse41+0xfa1>
+  DB  15,88,5,196,17,0,0                  ; addps         0x11c4(%rip),%xmm0        # 5750 <_sk_callback_sse41+0xf97>
+  DB  68,15,40,13,204,17,0,0              ; movaps        0x11cc(%rip),%xmm9        # 5760 <_sk_callback_sse41+0xfa7>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,194,17,0,0              ; mulps         0x11c2(%rip),%xmm9        # 56b0 <_sk_callback_sse41+0xfb1>
-  DB  68,15,88,13,202,17,0,0              ; addps         0x11ca(%rip),%xmm9        # 56c0 <_sk_callback_sse41+0xfc1>
+  DB  68,15,89,13,200,17,0,0              ; mulps         0x11c8(%rip),%xmm9        # 5770 <_sk_callback_sse41+0xfb7>
+  DB  68,15,88,13,208,17,0,0              ; addps         0x11d0(%rip),%xmm9        # 5780 <_sk_callback_sse41+0xfc7>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,128,0,0,0              ; movups        %xmm9,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -15647,16 +15759,16 @@ _sk_bicubic_n1x_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,185,17,0,0                  ; addps         0x11b9(%rip),%xmm0        # 56d0 <_sk_callback_sse41+0xfd1>
-  DB  68,15,40,13,193,17,0,0              ; movaps        0x11c1(%rip),%xmm9        # 56e0 <_sk_callback_sse41+0xfe1>
+  DB  15,88,5,191,17,0,0                  ; addps         0x11bf(%rip),%xmm0        # 5790 <_sk_callback_sse41+0xfd7>
+  DB  68,15,40,13,199,17,0,0              ; movaps        0x11c7(%rip),%xmm9        # 57a0 <_sk_callback_sse41+0xfe7>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,197,17,0,0               ; movaps        0x11c5(%rip),%xmm8        # 56f0 <_sk_callback_sse41+0xff1>
+  DB  68,15,40,5,203,17,0,0               ; movaps        0x11cb(%rip),%xmm8        # 57b0 <_sk_callback_sse41+0xff7>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,201,17,0,0               ; addps         0x11c9(%rip),%xmm8        # 5700 <_sk_callback_sse41+0x1001>
+  DB  68,15,88,5,207,17,0,0               ; addps         0x11cf(%rip),%xmm8        # 57c0 <_sk_callback_sse41+0x1007>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,205,17,0,0               ; addps         0x11cd(%rip),%xmm8        # 5710 <_sk_callback_sse41+0x1011>
+  DB  68,15,88,5,211,17,0,0               ; addps         0x11d3(%rip),%xmm8        # 57d0 <_sk_callback_sse41+0x1017>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,209,17,0,0               ; addps         0x11d1(%rip),%xmm8        # 5720 <_sk_callback_sse41+0x1021>
+  DB  68,15,88,5,215,17,0,0               ; addps         0x11d7(%rip),%xmm8        # 57e0 <_sk_callback_sse41+0x1027>
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -15664,17 +15776,17 @@ _sk_bicubic_n1x_sse41 LABEL PROC
 PUBLIC _sk_bicubic_p1x_sse41
 _sk_bicubic_p1x_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,203,17,0,0               ; movaps        0x11cb(%rip),%xmm8        # 5730 <_sk_callback_sse41+0x1031>
+  DB  68,15,40,5,209,17,0,0               ; movaps        0x11d1(%rip),%xmm8        # 57f0 <_sk_callback_sse41+0x1037>
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,72,64                      ; movups        0x40(%rax),%xmm9
   DB  65,15,88,192                        ; addps         %xmm8,%xmm0
-  DB  68,15,40,21,199,17,0,0              ; movaps        0x11c7(%rip),%xmm10        # 5740 <_sk_callback_sse41+0x1041>
+  DB  68,15,40,21,205,17,0,0              ; movaps        0x11cd(%rip),%xmm10        # 5800 <_sk_callback_sse41+0x1047>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,203,17,0,0              ; addps         0x11cb(%rip),%xmm10        # 5750 <_sk_callback_sse41+0x1051>
+  DB  68,15,88,21,209,17,0,0              ; addps         0x11d1(%rip),%xmm10        # 5810 <_sk_callback_sse41+0x1057>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,199,17,0,0              ; addps         0x11c7(%rip),%xmm10        # 5760 <_sk_callback_sse41+0x1061>
+  DB  68,15,88,21,205,17,0,0              ; addps         0x11cd(%rip),%xmm10        # 5820 <_sk_callback_sse41+0x1067>
   DB  68,15,17,144,128,0,0,0              ; movups        %xmm10,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -15684,11 +15796,11 @@ _sk_bicubic_p3x_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,186,17,0,0                  ; addps         0x11ba(%rip),%xmm0        # 5770 <_sk_callback_sse41+0x1071>
+  DB  15,88,5,192,17,0,0                  ; addps         0x11c0(%rip),%xmm0        # 5830 <_sk_callback_sse41+0x1077>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,186,17,0,0               ; mulps         0x11ba(%rip),%xmm8        # 5780 <_sk_callback_sse41+0x1081>
-  DB  68,15,88,5,194,17,0,0               ; addps         0x11c2(%rip),%xmm8        # 5790 <_sk_callback_sse41+0x1091>
+  DB  68,15,89,5,192,17,0,0               ; mulps         0x11c0(%rip),%xmm8        # 5840 <_sk_callback_sse41+0x1087>
+  DB  68,15,88,5,200,17,0,0               ; addps         0x11c8(%rip),%xmm8        # 5850 <_sk_callback_sse41+0x1097>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -15699,13 +15811,13 @@ _sk_bicubic_n3y_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,176,17,0,0                 ; addps         0x11b0(%rip),%xmm1        # 57a0 <_sk_callback_sse41+0x10a1>
-  DB  68,15,40,13,184,17,0,0              ; movaps        0x11b8(%rip),%xmm9        # 57b0 <_sk_callback_sse41+0x10b1>
+  DB  15,88,13,182,17,0,0                 ; addps         0x11b6(%rip),%xmm1        # 5860 <_sk_callback_sse41+0x10a7>
+  DB  68,15,40,13,190,17,0,0              ; movaps        0x11be(%rip),%xmm9        # 5870 <_sk_callback_sse41+0x10b7>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,180,17,0,0              ; mulps         0x11b4(%rip),%xmm9        # 57c0 <_sk_callback_sse41+0x10c1>
-  DB  68,15,88,13,188,17,0,0              ; addps         0x11bc(%rip),%xmm9        # 57d0 <_sk_callback_sse41+0x10d1>
+  DB  68,15,89,13,186,17,0,0              ; mulps         0x11ba(%rip),%xmm9        # 5880 <_sk_callback_sse41+0x10c7>
+  DB  68,15,88,13,194,17,0,0              ; addps         0x11c2(%rip),%xmm9        # 5890 <_sk_callback_sse41+0x10d7>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,160,0,0,0              ; movups        %xmm9,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -15716,16 +15828,16 @@ _sk_bicubic_n1y_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,170,17,0,0                 ; addps         0x11aa(%rip),%xmm1        # 57e0 <_sk_callback_sse41+0x10e1>
-  DB  68,15,40,13,178,17,0,0              ; movaps        0x11b2(%rip),%xmm9        # 57f0 <_sk_callback_sse41+0x10f1>
+  DB  15,88,13,176,17,0,0                 ; addps         0x11b0(%rip),%xmm1        # 58a0 <_sk_callback_sse41+0x10e7>
+  DB  68,15,40,13,184,17,0,0              ; movaps        0x11b8(%rip),%xmm9        # 58b0 <_sk_callback_sse41+0x10f7>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,182,17,0,0               ; movaps        0x11b6(%rip),%xmm8        # 5800 <_sk_callback_sse41+0x1101>
+  DB  68,15,40,5,188,17,0,0               ; movaps        0x11bc(%rip),%xmm8        # 58c0 <_sk_callback_sse41+0x1107>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,186,17,0,0               ; addps         0x11ba(%rip),%xmm8        # 5810 <_sk_callback_sse41+0x1111>
+  DB  68,15,88,5,192,17,0,0               ; addps         0x11c0(%rip),%xmm8        # 58d0 <_sk_callback_sse41+0x1117>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,190,17,0,0               ; addps         0x11be(%rip),%xmm8        # 5820 <_sk_callback_sse41+0x1121>
+  DB  68,15,88,5,196,17,0,0               ; addps         0x11c4(%rip),%xmm8        # 58e0 <_sk_callback_sse41+0x1127>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,194,17,0,0               ; addps         0x11c2(%rip),%xmm8        # 5830 <_sk_callback_sse41+0x1131>
+  DB  68,15,88,5,200,17,0,0               ; addps         0x11c8(%rip),%xmm8        # 58f0 <_sk_callback_sse41+0x1137>
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -15733,17 +15845,17 @@ _sk_bicubic_n1y_sse41 LABEL PROC
 PUBLIC _sk_bicubic_p1y_sse41
 _sk_bicubic_p1y_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,188,17,0,0               ; movaps        0x11bc(%rip),%xmm8        # 5840 <_sk_callback_sse41+0x1141>
+  DB  68,15,40,5,194,17,0,0               ; movaps        0x11c2(%rip),%xmm8        # 5900 <_sk_callback_sse41+0x1147>
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,72,96                      ; movups        0x60(%rax),%xmm9
   DB  65,15,88,200                        ; addps         %xmm8,%xmm1
-  DB  68,15,40,21,183,17,0,0              ; movaps        0x11b7(%rip),%xmm10        # 5850 <_sk_callback_sse41+0x1151>
+  DB  68,15,40,21,189,17,0,0              ; movaps        0x11bd(%rip),%xmm10        # 5910 <_sk_callback_sse41+0x1157>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,187,17,0,0              ; addps         0x11bb(%rip),%xmm10        # 5860 <_sk_callback_sse41+0x1161>
+  DB  68,15,88,21,193,17,0,0              ; addps         0x11c1(%rip),%xmm10        # 5920 <_sk_callback_sse41+0x1167>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,183,17,0,0              ; addps         0x11b7(%rip),%xmm10        # 5870 <_sk_callback_sse41+0x1171>
+  DB  68,15,88,21,189,17,0,0              ; addps         0x11bd(%rip),%xmm10        # 5930 <_sk_callback_sse41+0x1177>
   DB  68,15,17,144,160,0,0,0              ; movups        %xmm10,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -15753,11 +15865,11 @@ _sk_bicubic_p3y_sse41 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,169,17,0,0                 ; addps         0x11a9(%rip),%xmm1        # 5880 <_sk_callback_sse41+0x1181>
+  DB  15,88,13,175,17,0,0                 ; addps         0x11af(%rip),%xmm1        # 5940 <_sk_callback_sse41+0x1187>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,169,17,0,0               ; mulps         0x11a9(%rip),%xmm8        # 5890 <_sk_callback_sse41+0x1191>
-  DB  68,15,88,5,177,17,0,0               ; addps         0x11b1(%rip),%xmm8        # 58a0 <_sk_callback_sse41+0x11a1>
+  DB  68,15,89,5,175,17,0,0               ; mulps         0x11af(%rip),%xmm8        # 5950 <_sk_callback_sse41+0x1197>
+  DB  68,15,88,5,183,17,0,0               ; addps         0x11b7(%rip),%xmm8        # 5960 <_sk_callback_sse41+0x11a7>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -15962,11 +16074,11 @@ ALIGN 16
   DB  128,191,0,0,128,191,0               ; cmpb          $0x0,-0x40800000(%rdi)
   DB  0,224                               ; add           %ah,%al
   DB  64,0,0                              ; add           %al,(%rax)
-  DB  224,64                              ; loopne        4988 <.literal16+0x1d8>
+  DB  224,64                              ; loopne        4a48 <.literal16+0x1d8>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,64                              ; loopne        498c <.literal16+0x1dc>
+  DB  224,64                              ; loopne        4a4c <.literal16+0x1dc>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,64                              ; loopne        4990 <.literal16+0x1e0>
+  DB  224,64                              ; loopne        4a50 <.literal16+0x1e0>
   DB  154                                 ; (bad)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
@@ -15986,13 +16098,13 @@ ALIGN 16
   DB  10,23                               ; or            (%rdi),%dl
   DB  63                                  ; (bad)
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 49b1 <.literal16+0x201>
+  DB  71,225,61                           ; rex.RXB       loope 4a71 <.literal16+0x201>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 49b5 <.literal16+0x205>
+  DB  71,225,61                           ; rex.RXB       loope 4a75 <.literal16+0x205>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 49b9 <.literal16+0x209>
+  DB  71,225,61                           ; rex.RXB       loope 4a79 <.literal16+0x209>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 49bd <.literal16+0x20d>
+  DB  71,225,61                           ; rex.RXB       loope 4a7d <.literal16+0x20d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -16017,13 +16129,13 @@ ALIGN 16
   DB  10,23                               ; or            (%rdi),%dl
   DB  63                                  ; (bad)
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 49f1 <.literal16+0x241>
+  DB  71,225,61                           ; rex.RXB       loope 4ab1 <.literal16+0x241>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 49f5 <.literal16+0x245>
+  DB  71,225,61                           ; rex.RXB       loope 4ab5 <.literal16+0x245>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 49f9 <.literal16+0x249>
+  DB  71,225,61                           ; rex.RXB       loope 4ab9 <.literal16+0x249>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 49fd <.literal16+0x24d>
+  DB  71,225,61                           ; rex.RXB       loope 4abd <.literal16+0x24d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -16048,13 +16160,13 @@ ALIGN 16
   DB  10,23                               ; or            (%rdi),%dl
   DB  63                                  ; (bad)
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4a31 <.literal16+0x281>
+  DB  71,225,61                           ; rex.RXB       loope 4af1 <.literal16+0x281>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4a35 <.literal16+0x285>
+  DB  71,225,61                           ; rex.RXB       loope 4af5 <.literal16+0x285>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4a39 <.literal16+0x289>
+  DB  71,225,61                           ; rex.RXB       loope 4af9 <.literal16+0x289>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4a3d <.literal16+0x28d>
+  DB  71,225,61                           ; rex.RXB       loope 4afd <.literal16+0x28d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -16079,13 +16191,13 @@ ALIGN 16
   DB  10,23                               ; or            (%rdi),%dl
   DB  63                                  ; (bad)
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4a71 <.literal16+0x2c1>
+  DB  71,225,61                           ; rex.RXB       loope 4b31 <.literal16+0x2c1>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4a75 <.literal16+0x2c5>
+  DB  71,225,61                           ; rex.RXB       loope 4b35 <.literal16+0x2c5>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4a79 <.literal16+0x2c9>
+  DB  71,225,61                           ; rex.RXB       loope 4b39 <.literal16+0x2c9>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4a7d <.literal16+0x2cd>
+  DB  71,225,61                           ; rex.RXB       loope 4b3d <.literal16+0x2cd>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -16301,13 +16413,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        4c49 <.literal16+0x499>
+  DB  224,7                               ; loopne        4d09 <.literal16+0x499>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        4c4d <.literal16+0x49d>
+  DB  224,7                               ; loopne        4d0d <.literal16+0x49d>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        4c51 <.literal16+0x4a1>
+  DB  224,7                               ; loopne        4d11 <.literal16+0x4a1>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        4c55 <.literal16+0x4a5>
+  DB  224,7                               ; loopne        4d15 <.literal16+0x4a5>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -16341,10 +16453,10 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  1,255                               ; add           %edi,%edi
   DB  255                                 ; (bad)
-  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004c98 <_sk_callback_sse41+0xa000599>
+  DB  255,5,255,255,255,9                 ; incl          0x9ffffff(%rip)        # a004d58 <_sk_callback_sse41+0xa00059f>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 3004ca0 <_sk_callback_sse41+0x30005a1>
+  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 3004d60 <_sk_callback_sse41+0x30005a7>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -16399,11 +16511,11 @@ ALIGN 16
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,127,67                            ; add           %bh,0x43(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            4d6b <.literal16+0x5bb>
+  DB  127,67                              ; jg            4e2b <.literal16+0x5bb>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            4d6f <.literal16+0x5bf>
+  DB  127,67                              ; jg            4e2f <.literal16+0x5bf>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            4d73 <.literal16+0x5c3>
+  DB  127,67                              ; jg            4e33 <.literal16+0x5c3>
   DB  129,128,128,59,129,128,128,59,129,128; addl          $0x80813b80,-0x7f7ec480(%rax)
   DB  128,59,129                          ; cmpb          $0x81,(%rbx)
   DB  128,128,59,129,128,128,59           ; addb          $0x3b,-0x7f7f7ec5(%rax)
@@ -16418,16 +16530,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4d64 <.literal16+0x5b4>
+  DB  127,0                               ; jg            4e24 <.literal16+0x5b4>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4d68 <.literal16+0x5b8>
+  DB  127,0                               ; jg            4e28 <.literal16+0x5b8>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4d6c <.literal16+0x5bc>
+  DB  127,0                               ; jg            4e2c <.literal16+0x5bc>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4d70 <.literal16+0x5c0>
+  DB  127,0                               ; jg            4e30 <.literal16+0x5c0>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -16436,7 +16548,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4df5 <.literal16+0x645>
+  DB  119,115                             ; ja            4eb5 <.literal16+0x645>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -16447,7 +16559,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           4d59 <.literal16+0x5a9>
+  DB  117,191                             ; jne           4e19 <.literal16+0x5a9>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -16459,7 +16571,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a38d9a <_sk_callback_sse41+0xffffffffe9a3469b>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a38e5a <_sk_callback_sse41+0xffffffffe9a346a1>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  81                                  ; push          %rcx
   DB  140,242                             ; mov           %?,%edx
@@ -16514,16 +16626,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4e34 <.literal16+0x684>
+  DB  127,0                               ; jg            4ef4 <.literal16+0x684>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4e38 <.literal16+0x688>
+  DB  127,0                               ; jg            4ef8 <.literal16+0x688>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4e3c <.literal16+0x68c>
+  DB  127,0                               ; jg            4efc <.literal16+0x68c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4e40 <.literal16+0x690>
+  DB  127,0                               ; jg            4f00 <.literal16+0x690>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -16532,7 +16644,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4ec5 <.literal16+0x715>
+  DB  119,115                             ; ja            4f85 <.literal16+0x715>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -16543,7 +16655,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           4e29 <.literal16+0x679>
+  DB  117,191                             ; jne           4ee9 <.literal16+0x679>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -16555,7 +16667,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a38e6a <_sk_callback_sse41+0xffffffffe9a3476b>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a38f2a <_sk_callback_sse41+0xffffffffe9a34771>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  81                                  ; push          %rcx
   DB  140,242                             ; mov           %?,%edx
@@ -16610,16 +16722,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4f04 <.literal16+0x754>
+  DB  127,0                               ; jg            4fc4 <.literal16+0x754>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4f08 <.literal16+0x758>
+  DB  127,0                               ; jg            4fc8 <.literal16+0x758>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4f0c <.literal16+0x75c>
+  DB  127,0                               ; jg            4fcc <.literal16+0x75c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4f10 <.literal16+0x760>
+  DB  127,0                               ; jg            4fd0 <.literal16+0x760>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -16628,7 +16740,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            4f95 <.literal16+0x7e5>
+  DB  119,115                             ; ja            5055 <.literal16+0x7e5>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -16639,7 +16751,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           4ef9 <.literal16+0x749>
+  DB  117,191                             ; jne           4fb9 <.literal16+0x749>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -16651,7 +16763,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a38f3a <_sk_callback_sse41+0xffffffffe9a3483b>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a38ffa <_sk_callback_sse41+0xffffffffe9a34841>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  81                                  ; push          %rcx
   DB  140,242                             ; mov           %?,%edx
@@ -16706,16 +16818,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4fd4 <.literal16+0x824>
+  DB  127,0                               ; jg            5094 <.literal16+0x824>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4fd8 <.literal16+0x828>
+  DB  127,0                               ; jg            5098 <.literal16+0x828>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4fdc <.literal16+0x82c>
+  DB  127,0                               ; jg            509c <.literal16+0x82c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            4fe0 <.literal16+0x830>
+  DB  127,0                               ; jg            50a0 <.literal16+0x830>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -16724,7 +16836,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            5065 <.literal16+0x8b5>
+  DB  119,115                             ; ja            5125 <.literal16+0x8b5>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -16735,7 +16847,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           4fc9 <.literal16+0x819>
+  DB  117,191                             ; jne           5089 <.literal16+0x819>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -16747,7 +16859,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3900a <_sk_callback_sse41+0xffffffffe9a3490b>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a390ca <_sk_callback_sse41+0xffffffffe9a34911>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  81                                  ; push          %rcx
   DB  140,242                             ; mov           %?,%edx
@@ -16798,13 +16910,13 @@ ALIGN 16
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
-  DB  127,67                              ; jg            50e7 <.literal16+0x937>
+  DB  127,67                              ; jg            51a7 <.literal16+0x937>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            50eb <.literal16+0x93b>
+  DB  127,67                              ; jg            51ab <.literal16+0x93b>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            50ef <.literal16+0x93f>
+  DB  127,67                              ; jg            51af <.literal16+0x93f>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            50f3 <.literal16+0x943>
+  DB  127,67                              ; jg            51b3 <.literal16+0x943>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,195                               ; add           %al,%bl
   DB  0,0                                 ; add           %al,(%rax)
@@ -16851,16 +16963,16 @@ ALIGN 16
   DB  128,3,62                            ; addb          $0x3e,(%rbx)
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           5173 <.literal16+0x9c3>
+  DB  118,63                              ; jbe           5233 <.literal16+0x9c3>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           5177 <.literal16+0x9c7>
+  DB  118,63                              ; jbe           5237 <.literal16+0x9c7>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           517b <.literal16+0x9cb>
+  DB  118,63                              ; jbe           523b <.literal16+0x9cb>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           517f <.literal16+0x9cf>
+  DB  118,63                              ; jbe           523f <.literal16+0x9cf>
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
@@ -16872,11 +16984,11 @@ ALIGN 16
   DB  128,59,0                            ; cmpb          $0x0,(%rbx)
   DB  0,127,67                            ; add           %bh,0x43(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            51bb <.literal16+0xa0b>
+  DB  127,67                              ; jg            527b <.literal16+0xa0b>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            51bf <.literal16+0xa0f>
+  DB  127,67                              ; jg            527f <.literal16+0xa0f>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            51c3 <.literal16+0xa13>
+  DB  127,67                              ; jg            5283 <.literal16+0xa13>
   DB  129,128,128,59,129,128,128,59,129,128; addl          $0x80813b80,-0x7f7ec480(%rax)
   DB  128,59,129                          ; cmpb          $0x81,(%rbx)
   DB  128,128,59,0,0,128,63               ; addb          $0x3f,-0x7fffffc5(%rax)
@@ -16905,7 +17017,7 @@ ALIGN 16
   DB  5,255,255,255,9                     ; add           $0x9ffffff,%eax
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 30051f0 <_sk_callback_sse41+0x3000af1>
+  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 30052b0 <_sk_callback_sse41+0x3000af7>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -16934,13 +17046,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        5229 <.literal16+0xa79>
+  DB  224,7                               ; loopne        52e9 <.literal16+0xa79>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        522d <.literal16+0xa7d>
+  DB  224,7                               ; loopne        52ed <.literal16+0xa7d>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        5231 <.literal16+0xa81>
+  DB  224,7                               ; loopne        52f1 <.literal16+0xa81>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        5235 <.literal16+0xa85>
+  DB  224,7                               ; loopne        52f5 <.literal16+0xa85>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -16986,13 +17098,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        5299 <.literal16+0xae9>
+  DB  224,7                               ; loopne        5359 <.literal16+0xae9>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        529d <.literal16+0xaed>
+  DB  224,7                               ; loopne        535d <.literal16+0xaed>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        52a1 <.literal16+0xaf1>
+  DB  224,7                               ; loopne        5361 <.literal16+0xaf1>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        52a5 <.literal16+0xaf5>
+  DB  224,7                               ; loopne        5365 <.literal16+0xaf5>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -17030,13 +17142,13 @@ ALIGN 16
   DB  65,0,0                              ; add           %al,(%r8)
   DB  248                                 ; clc
   DB  65,0,0                              ; add           %al,(%r8)
-  DB  124,66                              ; jl            5336 <.literal16+0xb86>
+  DB  124,66                              ; jl            53f6 <.literal16+0xb86>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            533a <.literal16+0xb8a>
+  DB  124,66                              ; jl            53fa <.literal16+0xb8a>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            533e <.literal16+0xb8e>
+  DB  124,66                              ; jl            53fe <.literal16+0xb8e>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            5342 <.literal16+0xb92>
+  DB  124,66                              ; jl            5402 <.literal16+0xb92>
   DB  0,240                               ; add           %dh,%al
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,240                               ; add           %dh,%al
@@ -17126,13 +17238,13 @@ ALIGN 16
   DB  136,136,61,137,136,136              ; mov           %cl,-0x777776c3(%rax)
   DB  61,137,136,136,61                   ; cmp           $0x3d888889,%eax
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            5445 <.literal16+0xc95>
+  DB  112,65                              ; jo            5505 <.literal16+0xc95>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            5449 <.literal16+0xc99>
+  DB  112,65                              ; jo            5509 <.literal16+0xc99>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            544d <.literal16+0xc9d>
+  DB  112,65                              ; jo            550d <.literal16+0xc9d>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            5451 <.literal16+0xca1>
+  DB  112,65                              ; jo            5511 <.literal16+0xca1>
   DB  255,0                               ; incl          (%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  255,0                               ; incl          (%rax)
@@ -17147,7 +17259,7 @@ ALIGN 16
   DB  5,255,255,255,9                     ; add           $0x9ffffff,%eax
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 3005440 <_sk_callback_sse41+0x3000d41>
+  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 3005500 <_sk_callback_sse41+0x3000d47>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -17174,7 +17286,7 @@ ALIGN 16
   DB  5,255,255,255,9                     ; add           $0x9ffffff,%eax
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 3005480 <_sk_callback_sse41+0x3000d81>
+  DB  255,13,255,255,255,2                ; decl          0x2ffffff(%rip)        # 3005540 <_sk_callback_sse41+0x3000d87>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
   DB  255,6                               ; incl          (%rsi)
@@ -17189,11 +17301,11 @@ ALIGN 16
   DB  255,0                               ; incl          (%rax)
   DB  0,127,67                            ; add           %bh,0x43(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            54db <.literal16+0xd2b>
+  DB  127,67                              ; jg            559b <.literal16+0xd2b>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            54df <.literal16+0xd2f>
+  DB  127,67                              ; jg            559f <.literal16+0xd2f>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            54e3 <.literal16+0xd33>
+  DB  127,67                              ; jg            55a3 <.literal16+0xd33>
   DB  0,128,0,0,0,128                     ; add           %al,-0x80000000(%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,128,0,0,0,128                     ; add           %al,-0x80000000(%rax)
@@ -17269,13 +17381,13 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  255                                 ; (bad)
-  DB  127,71                              ; jg            55ab <.literal16+0xdfb>
+  DB  127,71                              ; jg            566b <.literal16+0xdfb>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            55af <.literal16+0xdff>
+  DB  127,71                              ; jg            566f <.literal16+0xdff>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            55b3 <.literal16+0xe03>
+  DB  127,71                              ; jg            5673 <.literal16+0xe03>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            55b7 <.literal16+0xe07>
+  DB  127,71                              ; jg            5677 <.literal16+0xe07>
   DB  208                                 ; (bad)
   DB  179,89                              ; mov           $0x59,%bl
   DB  62,208                              ; ds            (bad)
@@ -17409,11 +17521,11 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,114                          ; cmpb          $0x72,(%rdi)
   DB  28,199                              ; sbb           $0xc7,%al
-  DB  62,114,28                           ; jb,pt         56d2 <.literal16+0xf22>
+  DB  62,114,28                           ; jb,pt         5792 <.literal16+0xf22>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         56d6 <.literal16+0xf26>
+  DB  62,114,28                           ; jb,pt         5796 <.literal16+0xf26>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         56da <.literal16+0xf2a>
+  DB  62,114,28                           ; jb,pt         579a <.literal16+0xf2a>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -17457,7 +17569,7 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63e565 <_sk_callback_sse41+0x3d639e66>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63e625 <_sk_callback_sse41+0x3d639e6c>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -17483,7 +17595,7 @@ ALIGN 16
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63e5a5 <_sk_callback_sse41+0x3d639ea6>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63e665 <_sk_callback_sse41+0x3d639eac>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
@@ -17492,13 +17604,13 @@ ALIGN 16
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
-  DB  114,28                              ; jb            579e <.literal16+0xfee>
+  DB  114,28                              ; jb            585e <.literal16+0xfee>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         57a2 <.literal16+0xff2>
+  DB  62,114,28                           ; jb,pt         5862 <.literal16+0xff2>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         57a6 <.literal16+0xff6>
+  DB  62,114,28                           ; jb,pt         5866 <.literal16+0xff6>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         57aa <.literal16+0xffa>
+  DB  62,114,28                           ; jb,pt         586a <.literal16+0xffa>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -17519,11 +17631,11 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,114                          ; cmpb          $0x72,(%rdi)
   DB  28,199                              ; sbb           $0xc7,%al
-  DB  62,114,28                           ; jb,pt         57e2 <.literal16+0x1032>
+  DB  62,114,28                           ; jb,pt         58a2 <.literal16+0x1032>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         57e6 <.literal16+0x1036>
+  DB  62,114,28                           ; jb,pt         58a6 <.literal16+0x1036>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         57ea <.literal16+0x103a>
+  DB  62,114,28                           ; jb,pt         58aa <.literal16+0x103a>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -17567,7 +17679,7 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63e675 <_sk_callback_sse41+0x3d639f76>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63e735 <_sk_callback_sse41+0x3d639f7c>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -17593,7 +17705,7 @@ ALIGN 16
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63e6b5 <_sk_callback_sse41+0x3d639fb6>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63e775 <_sk_callback_sse41+0x3d639fbc>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
@@ -17602,13 +17714,13 @@ ALIGN 16
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
-  DB  114,28                              ; jb            58ae <.literal16+0x10fe>
+  DB  114,28                              ; jb            596e <.literal16+0x10fe>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         58b2 <_sk_callback_sse41+0x11b3>
+  DB  62,114,28                           ; jb,pt         5972 <_sk_callback_sse41+0x11b9>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         58b6 <_sk_callback_sse41+0x11b7>
+  DB  62,114,28                           ; jb,pt         5976 <_sk_callback_sse41+0x11bd>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         58ba <_sk_callback_sse41+0x11bb>
+  DB  62,114,28                           ; jb,pt         597a <_sk_callback_sse41+0x11c1>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -17699,7 +17811,7 @@ _sk_seed_shader_sse2 LABEL PROC
   DB  102,15,110,199                      ; movd          %edi,%xmm0
   DB  102,15,112,192,0                    ; pshufd        $0x0,%xmm0,%xmm0
   DB  15,91,200                           ; cvtdq2ps      %xmm0,%xmm1
-  DB  15,40,21,17,75,0,0                  ; movaps        0x4b11(%rip),%xmm2        # 4c20 <_sk_callback_sse2+0xba>
+  DB  15,40,21,193,75,0,0                 ; movaps        0x4bc1(%rip),%xmm2        # 4cd0 <_sk_callback_sse2+0xb0>
   DB  15,88,202                           ; addps         %xmm2,%xmm1
   DB  15,16,2                             ; movups        (%rdx),%xmm0
   DB  15,88,193                           ; addps         %xmm1,%xmm0
@@ -17708,7 +17820,7 @@ _sk_seed_shader_sse2 LABEL PROC
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  15,88,202                           ; addps         %xmm2,%xmm1
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,21,0,75,0,0                   ; movaps        0x4b00(%rip),%xmm2        # 4c30 <_sk_callback_sse2+0xca>
+  DB  15,40,21,176,75,0,0                 ; movaps        0x4bb0(%rip),%xmm2        # 4ce0 <_sk_callback_sse2+0xc0>
   DB  15,87,219                           ; xorps         %xmm3,%xmm3
   DB  15,87,228                           ; xorps         %xmm4,%xmm4
   DB  15,87,237                           ; xorps         %xmm5,%xmm5
@@ -17729,14 +17841,14 @@ _sk_dither_sse2 LABEL PROC
   DB  102,68,15,110,1                     ; movd          (%rcx),%xmm8
   DB  102,69,15,112,192,0                 ; pshufd        $0x0,%xmm8,%xmm8
   DB  102,69,15,239,193                   ; pxor          %xmm9,%xmm8
-  DB  102,68,15,111,21,197,74,0,0         ; movdqa        0x4ac5(%rip),%xmm10        # 4c40 <_sk_callback_sse2+0xda>
+  DB  102,68,15,111,21,117,75,0,0         ; movdqa        0x4b75(%rip),%xmm10        # 4cf0 <_sk_callback_sse2+0xd0>
   DB  102,69,15,111,216                   ; movdqa        %xmm8,%xmm11
   DB  102,69,15,219,218                   ; pand          %xmm10,%xmm11
   DB  102,65,15,114,243,5                 ; pslld         $0x5,%xmm11
   DB  102,69,15,219,209                   ; pand          %xmm9,%xmm10
   DB  102,65,15,114,242,4                 ; pslld         $0x4,%xmm10
-  DB  102,68,15,111,37,177,74,0,0         ; movdqa        0x4ab1(%rip),%xmm12        # 4c50 <_sk_callback_sse2+0xea>
-  DB  102,68,15,111,45,184,74,0,0         ; movdqa        0x4ab8(%rip),%xmm13        # 4c60 <_sk_callback_sse2+0xfa>
+  DB  102,68,15,111,37,97,75,0,0          ; movdqa        0x4b61(%rip),%xmm12        # 4d00 <_sk_callback_sse2+0xe0>
+  DB  102,68,15,111,45,104,75,0,0         ; movdqa        0x4b68(%rip),%xmm13        # 4d10 <_sk_callback_sse2+0xf0>
   DB  102,69,15,111,240                   ; movdqa        %xmm8,%xmm14
   DB  102,69,15,219,245                   ; pand          %xmm13,%xmm14
   DB  102,65,15,114,246,2                 ; pslld         $0x2,%xmm14
@@ -17752,8 +17864,8 @@ _sk_dither_sse2 LABEL PROC
   DB  102,69,15,235,245                   ; por           %xmm13,%xmm14
   DB  102,69,15,235,240                   ; por           %xmm8,%xmm14
   DB  69,15,91,198                        ; cvtdq2ps      %xmm14,%xmm8
-  DB  68,15,89,5,115,74,0,0               ; mulps         0x4a73(%rip),%xmm8        # 4c70 <_sk_callback_sse2+0x10a>
-  DB  68,15,88,5,123,74,0,0               ; addps         0x4a7b(%rip),%xmm8        # 4c80 <_sk_callback_sse2+0x11a>
+  DB  68,15,89,5,35,75,0,0                ; mulps         0x4b23(%rip),%xmm8        # 4d20 <_sk_callback_sse2+0x100>
+  DB  68,15,88,5,43,75,0,0                ; addps         0x4b2b(%rip),%xmm8        # 4d30 <_sk_callback_sse2+0x110>
   DB  243,68,15,16,80,8                   ; movss         0x8(%rax),%xmm10
   DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
   DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
@@ -17820,7 +17932,7 @@ _sk_clear_sse2 LABEL PROC
 PUBLIC _sk_srcatop_sse2
 _sk_srcatop_sse2 LABEL PROC
   DB  15,89,199                           ; mulps         %xmm7,%xmm0
-  DB  68,15,40,5,212,73,0,0               ; movaps        0x49d4(%rip),%xmm8        # 4c90 <_sk_callback_sse2+0x12a>
+  DB  68,15,40,5,132,74,0,0               ; movaps        0x4a84(%rip),%xmm8        # 4d40 <_sk_callback_sse2+0x120>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
@@ -17843,7 +17955,7 @@ PUBLIC _sk_dstatop_sse2
 _sk_dstatop_sse2 LABEL PROC
   DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
   DB  68,15,89,196                        ; mulps         %xmm4,%xmm8
-  DB  68,15,40,13,151,73,0,0              ; movaps        0x4997(%rip),%xmm9        # 4ca0 <_sk_callback_sse2+0x13a>
+  DB  68,15,40,13,71,74,0,0               ; movaps        0x4a47(%rip),%xmm9        # 4d50 <_sk_callback_sse2+0x130>
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  65,15,88,192                        ; addps         %xmm8,%xmm0
@@ -17884,7 +17996,7 @@ _sk_dstin_sse2 LABEL PROC
 
 PUBLIC _sk_srcout_sse2
 _sk_srcout_sse2 LABEL PROC
-  DB  68,15,40,5,59,73,0,0                ; movaps        0x493b(%rip),%xmm8        # 4cb0 <_sk_callback_sse2+0x14a>
+  DB  68,15,40,5,235,73,0,0               ; movaps        0x49eb(%rip),%xmm8        # 4d60 <_sk_callback_sse2+0x140>
   DB  68,15,92,199                        ; subps         %xmm7,%xmm8
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
@@ -17895,7 +18007,7 @@ _sk_srcout_sse2 LABEL PROC
 
 PUBLIC _sk_dstout_sse2
 _sk_dstout_sse2 LABEL PROC
-  DB  68,15,40,5,43,73,0,0                ; movaps        0x492b(%rip),%xmm8        # 4cc0 <_sk_callback_sse2+0x15a>
+  DB  68,15,40,5,219,73,0,0               ; movaps        0x49db(%rip),%xmm8        # 4d70 <_sk_callback_sse2+0x150>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  15,89,196                           ; mulps         %xmm4,%xmm0
@@ -17910,7 +18022,7 @@ _sk_dstout_sse2 LABEL PROC
 
 PUBLIC _sk_srcover_sse2
 _sk_srcover_sse2 LABEL PROC
-  DB  68,15,40,5,14,73,0,0                ; movaps        0x490e(%rip),%xmm8        # 4cd0 <_sk_callback_sse2+0x16a>
+  DB  68,15,40,5,190,73,0,0               ; movaps        0x49be(%rip),%xmm8        # 4d80 <_sk_callback_sse2+0x160>
   DB  68,15,92,195                        ; subps         %xmm3,%xmm8
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
@@ -17928,7 +18040,7 @@ _sk_srcover_sse2 LABEL PROC
 
 PUBLIC _sk_dstover_sse2
 _sk_dstover_sse2 LABEL PROC
-  DB  68,15,40,5,226,72,0,0               ; movaps        0x48e2(%rip),%xmm8        # 4ce0 <_sk_callback_sse2+0x17a>
+  DB  68,15,40,5,146,73,0,0               ; movaps        0x4992(%rip),%xmm8        # 4d90 <_sk_callback_sse2+0x170>
   DB  68,15,92,199                        ; subps         %xmm7,%xmm8
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -17952,7 +18064,7 @@ _sk_modulate_sse2 LABEL PROC
 
 PUBLIC _sk_multiply_sse2
 _sk_multiply_sse2 LABEL PROC
-  DB  68,15,40,5,182,72,0,0               ; movaps        0x48b6(%rip),%xmm8        # 4cf0 <_sk_callback_sse2+0x18a>
+  DB  68,15,40,5,102,73,0,0               ; movaps        0x4966(%rip),%xmm8        # 4da0 <_sk_callback_sse2+0x180>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
@@ -18022,7 +18134,7 @@ _sk_screen_sse2 LABEL PROC
 PUBLIC _sk_xor__sse2
 _sk_xor__sse2 LABEL PROC
   DB  68,15,40,195                        ; movaps        %xmm3,%xmm8
-  DB  15,40,29,231,71,0,0                 ; movaps        0x47e7(%rip),%xmm3        # 4d00 <_sk_callback_sse2+0x19a>
+  DB  15,40,29,151,72,0,0                 ; movaps        0x4897(%rip),%xmm3        # 4db0 <_sk_callback_sse2+0x190>
   DB  68,15,40,203                        ; movaps        %xmm3,%xmm9
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
@@ -18068,7 +18180,7 @@ _sk_darken_sse2 LABEL PROC
   DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
   DB  65,15,95,209                        ; maxps         %xmm9,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,82,71,0,0                  ; movaps        0x4752(%rip),%xmm2        # 4d10 <_sk_callback_sse2+0x1aa>
+  DB  15,40,21,2,72,0,0                   ; movaps        0x4802(%rip),%xmm2        # 4dc0 <_sk_callback_sse2+0x1a0>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -18100,7 +18212,7 @@ _sk_lighten_sse2 LABEL PROC
   DB  68,15,89,206                        ; mulps         %xmm6,%xmm9
   DB  65,15,93,209                        ; minps         %xmm9,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,247,70,0,0                 ; movaps        0x46f7(%rip),%xmm2        # 4d20 <_sk_callback_sse2+0x1ba>
+  DB  15,40,21,167,71,0,0                 ; movaps        0x47a7(%rip),%xmm2        # 4dd0 <_sk_callback_sse2+0x1b0>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -18135,7 +18247,7 @@ _sk_difference_sse2 LABEL PROC
   DB  65,15,93,209                        ; minps         %xmm9,%xmm2
   DB  15,88,210                           ; addps         %xmm2,%xmm2
   DB  68,15,92,194                        ; subps         %xmm2,%xmm8
-  DB  15,40,21,145,70,0,0                 ; movaps        0x4691(%rip),%xmm2        # 4d30 <_sk_callback_sse2+0x1ca>
+  DB  15,40,21,65,71,0,0                  ; movaps        0x4741(%rip),%xmm2        # 4de0 <_sk_callback_sse2+0x1c0>
   DB  15,92,211                           ; subps         %xmm3,%xmm2
   DB  15,89,215                           ; mulps         %xmm7,%xmm2
   DB  15,88,218                           ; addps         %xmm2,%xmm3
@@ -18160,7 +18272,7 @@ _sk_exclusion_sse2 LABEL PROC
   DB  15,89,214                           ; mulps         %xmm6,%xmm2
   DB  15,88,210                           ; addps         %xmm2,%xmm2
   DB  68,15,92,202                        ; subps         %xmm2,%xmm9
-  DB  15,40,13,82,70,0,0                  ; movaps        0x4652(%rip),%xmm1        # 4d40 <_sk_callback_sse2+0x1da>
+  DB  15,40,13,2,71,0,0                   ; movaps        0x4702(%rip),%xmm1        # 4df0 <_sk_callback_sse2+0x1d0>
   DB  15,92,203                           ; subps         %xmm3,%xmm1
   DB  15,89,207                           ; mulps         %xmm7,%xmm1
   DB  15,88,217                           ; addps         %xmm1,%xmm3
@@ -18172,7 +18284,7 @@ _sk_exclusion_sse2 LABEL PROC
 PUBLIC _sk_colorburn_sse2
 _sk_colorburn_sse2 LABEL PROC
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
-  DB  68,15,40,21,65,70,0,0               ; movaps        0x4641(%rip),%xmm10        # 4d50 <_sk_callback_sse2+0x1ea>
+  DB  68,15,40,21,241,70,0,0              ; movaps        0x46f1(%rip),%xmm10        # 4e00 <_sk_callback_sse2+0x1e0>
   DB  69,15,40,202                        ; movaps        %xmm10,%xmm9
   DB  68,15,92,207                        ; subps         %xmm7,%xmm9
   DB  69,15,40,217                        ; movaps        %xmm9,%xmm11
@@ -18264,7 +18376,7 @@ _sk_colorburn_sse2 LABEL PROC
 PUBLIC _sk_colordodge_sse2
 _sk_colordodge_sse2 LABEL PROC
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
-  DB  68,15,40,21,247,68,0,0              ; movaps        0x44f7(%rip),%xmm10        # 4d60 <_sk_callback_sse2+0x1fa>
+  DB  68,15,40,21,167,69,0,0              ; movaps        0x45a7(%rip),%xmm10        # 4e10 <_sk_callback_sse2+0x1f0>
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
   DB  68,15,92,223                        ; subps         %xmm7,%xmm11
   DB  69,15,40,227                        ; movaps        %xmm11,%xmm12
@@ -18357,7 +18469,7 @@ _sk_hardlight_sse2 LABEL PROC
   DB  15,41,52,36                         ; movaps        %xmm6,(%rsp)
   DB  15,40,245                           ; movaps        %xmm5,%xmm6
   DB  15,40,236                           ; movaps        %xmm4,%xmm5
-  DB  68,15,40,29,169,67,0,0              ; movaps        0x43a9(%rip),%xmm11        # 4d70 <_sk_callback_sse2+0x20a>
+  DB  68,15,40,29,89,68,0,0               ; movaps        0x4459(%rip),%xmm11        # 4e20 <_sk_callback_sse2+0x200>
   DB  69,15,40,211                        ; movaps        %xmm11,%xmm10
   DB  68,15,92,215                        ; subps         %xmm7,%xmm10
   DB  69,15,40,194                        ; movaps        %xmm10,%xmm8
@@ -18444,7 +18556,7 @@ PUBLIC _sk_overlay_sse2
 _sk_overlay_sse2 LABEL PROC
   DB  68,15,40,193                        ; movaps        %xmm1,%xmm8
   DB  68,15,40,232                        ; movaps        %xmm0,%xmm13
-  DB  68,15,40,13,116,66,0,0              ; movaps        0x4274(%rip),%xmm9        # 4d80 <_sk_callback_sse2+0x21a>
+  DB  68,15,40,13,36,67,0,0               ; movaps        0x4324(%rip),%xmm9        # 4e30 <_sk_callback_sse2+0x210>
   DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
   DB  68,15,92,215                        ; subps         %xmm7,%xmm10
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
@@ -18534,7 +18646,7 @@ _sk_softlight_sse2 LABEL PROC
   DB  68,15,40,213                        ; movaps        %xmm5,%xmm10
   DB  68,15,94,215                        ; divps         %xmm7,%xmm10
   DB  69,15,84,212                        ; andps         %xmm12,%xmm10
-  DB  68,15,40,13,46,65,0,0               ; movaps        0x412e(%rip),%xmm9        # 4d90 <_sk_callback_sse2+0x22a>
+  DB  68,15,40,13,222,65,0,0              ; movaps        0x41de(%rip),%xmm9        # 4e40 <_sk_callback_sse2+0x220>
   DB  69,15,40,249                        ; movaps        %xmm9,%xmm15
   DB  69,15,92,250                        ; subps         %xmm10,%xmm15
   DB  69,15,40,218                        ; movaps        %xmm10,%xmm11
@@ -18547,10 +18659,10 @@ _sk_softlight_sse2 LABEL PROC
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  15,89,192                           ; mulps         %xmm0,%xmm0
   DB  65,15,88,194                        ; addps         %xmm10,%xmm0
-  DB  68,15,40,53,8,65,0,0                ; movaps        0x4108(%rip),%xmm14        # 4da0 <_sk_callback_sse2+0x23a>
+  DB  68,15,40,53,184,65,0,0              ; movaps        0x41b8(%rip),%xmm14        # 4e50 <_sk_callback_sse2+0x230>
   DB  69,15,88,222                        ; addps         %xmm14,%xmm11
   DB  68,15,89,216                        ; mulps         %xmm0,%xmm11
-  DB  68,15,40,21,8,65,0,0                ; movaps        0x4108(%rip),%xmm10        # 4db0 <_sk_callback_sse2+0x24a>
+  DB  68,15,40,21,184,65,0,0              ; movaps        0x41b8(%rip),%xmm10        # 4e60 <_sk_callback_sse2+0x240>
   DB  69,15,89,234                        ; mulps         %xmm10,%xmm13
   DB  69,15,88,235                        ; addps         %xmm11,%xmm13
   DB  15,88,228                           ; addps         %xmm4,%xmm4
@@ -18695,7 +18807,7 @@ _sk_hue_sse2 LABEL PROC
   DB  68,15,40,209                        ; movaps        %xmm1,%xmm10
   DB  68,15,40,225                        ; movaps        %xmm1,%xmm12
   DB  68,15,89,211                        ; mulps         %xmm3,%xmm10
-  DB  68,15,40,5,68,63,0,0                ; movaps        0x3f44(%rip),%xmm8        # 4df0 <_sk_callback_sse2+0x28a>
+  DB  68,15,40,5,244,63,0,0               ; movaps        0x3ff4(%rip),%xmm8        # 4ea0 <_sk_callback_sse2+0x280>
   DB  69,15,40,216                        ; movaps        %xmm8,%xmm11
   DB  15,40,207                           ; movaps        %xmm7,%xmm1
   DB  68,15,92,217                        ; subps         %xmm1,%xmm11
@@ -18741,12 +18853,12 @@ _sk_hue_sse2 LABEL PROC
   DB  69,15,84,206                        ; andps         %xmm14,%xmm9
   DB  69,15,84,214                        ; andps         %xmm14,%xmm10
   DB  65,15,84,214                        ; andps         %xmm14,%xmm2
-  DB  68,15,40,61,88,62,0,0               ; movaps        0x3e58(%rip),%xmm15        # 4dc0 <_sk_callback_sse2+0x25a>
+  DB  68,15,40,61,8,63,0,0                ; movaps        0x3f08(%rip),%xmm15        # 4e70 <_sk_callback_sse2+0x250>
   DB  65,15,89,231                        ; mulps         %xmm15,%xmm4
-  DB  15,40,5,93,62,0,0                   ; movaps        0x3e5d(%rip),%xmm0        # 4dd0 <_sk_callback_sse2+0x26a>
+  DB  15,40,5,13,63,0,0                   ; movaps        0x3f0d(%rip),%xmm0        # 4e80 <_sk_callback_sse2+0x260>
   DB  15,89,240                           ; mulps         %xmm0,%xmm6
   DB  15,88,244                           ; addps         %xmm4,%xmm6
-  DB  68,15,40,53,95,62,0,0               ; movaps        0x3e5f(%rip),%xmm14        # 4de0 <_sk_callback_sse2+0x27a>
+  DB  68,15,40,53,15,63,0,0               ; movaps        0x3f0f(%rip),%xmm14        # 4e90 <_sk_callback_sse2+0x270>
   DB  68,15,40,239                        ; movaps        %xmm7,%xmm13
   DB  69,15,89,238                        ; mulps         %xmm14,%xmm13
   DB  68,15,88,238                        ; addps         %xmm6,%xmm13
@@ -18923,14 +19035,14 @@ _sk_saturation_sse2 LABEL PROC
   DB  68,15,84,211                        ; andps         %xmm3,%xmm10
   DB  68,15,84,203                        ; andps         %xmm3,%xmm9
   DB  15,84,195                           ; andps         %xmm3,%xmm0
-  DB  68,15,40,5,239,59,0,0               ; movaps        0x3bef(%rip),%xmm8        # 4e00 <_sk_callback_sse2+0x29a>
+  DB  68,15,40,5,159,60,0,0               ; movaps        0x3c9f(%rip),%xmm8        # 4eb0 <_sk_callback_sse2+0x290>
   DB  15,40,214                           ; movaps        %xmm6,%xmm2
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
-  DB  15,40,13,241,59,0,0                 ; movaps        0x3bf1(%rip),%xmm1        # 4e10 <_sk_callback_sse2+0x2aa>
+  DB  15,40,13,161,60,0,0                 ; movaps        0x3ca1(%rip),%xmm1        # 4ec0 <_sk_callback_sse2+0x2a0>
   DB  15,40,221                           ; movaps        %xmm5,%xmm3
   DB  15,89,217                           ; mulps         %xmm1,%xmm3
   DB  15,88,218                           ; addps         %xmm2,%xmm3
-  DB  68,15,40,37,240,59,0,0              ; movaps        0x3bf0(%rip),%xmm12        # 4e20 <_sk_callback_sse2+0x2ba>
+  DB  68,15,40,37,160,60,0,0              ; movaps        0x3ca0(%rip),%xmm12        # 4ed0 <_sk_callback_sse2+0x2b0>
   DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
   DB  68,15,88,235                        ; addps         %xmm3,%xmm13
   DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
@@ -18975,7 +19087,7 @@ _sk_saturation_sse2 LABEL PROC
   DB  15,40,223                           ; movaps        %xmm7,%xmm3
   DB  15,40,236                           ; movaps        %xmm4,%xmm5
   DB  15,89,221                           ; mulps         %xmm5,%xmm3
-  DB  68,15,40,5,85,59,0,0                ; movaps        0x3b55(%rip),%xmm8        # 4e30 <_sk_callback_sse2+0x2ca>
+  DB  68,15,40,5,5,60,0,0                 ; movaps        0x3c05(%rip),%xmm8        # 4ee0 <_sk_callback_sse2+0x2c0>
   DB  65,15,40,224                        ; movaps        %xmm8,%xmm4
   DB  68,15,92,199                        ; subps         %xmm7,%xmm8
   DB  15,88,253                           ; addps         %xmm5,%xmm7
@@ -19076,14 +19188,14 @@ _sk_color_sse2 LABEL PROC
   DB  68,15,40,213                        ; movaps        %xmm5,%xmm10
   DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
   DB  65,15,40,208                        ; movaps        %xmm8,%xmm2
-  DB  68,15,40,45,237,57,0,0              ; movaps        0x39ed(%rip),%xmm13        # 4e40 <_sk_callback_sse2+0x2da>
+  DB  68,15,40,45,157,58,0,0              ; movaps        0x3a9d(%rip),%xmm13        # 4ef0 <_sk_callback_sse2+0x2d0>
   DB  68,15,40,198                        ; movaps        %xmm6,%xmm8
   DB  69,15,89,197                        ; mulps         %xmm13,%xmm8
-  DB  68,15,40,53,237,57,0,0              ; movaps        0x39ed(%rip),%xmm14        # 4e50 <_sk_callback_sse2+0x2ea>
+  DB  68,15,40,53,157,58,0,0              ; movaps        0x3a9d(%rip),%xmm14        # 4f00 <_sk_callback_sse2+0x2e0>
   DB  65,15,40,195                        ; movaps        %xmm11,%xmm0
   DB  65,15,89,198                        ; mulps         %xmm14,%xmm0
   DB  65,15,88,192                        ; addps         %xmm8,%xmm0
-  DB  68,15,40,29,233,57,0,0              ; movaps        0x39e9(%rip),%xmm11        # 4e60 <_sk_callback_sse2+0x2fa>
+  DB  68,15,40,29,153,58,0,0              ; movaps        0x3a99(%rip),%xmm11        # 4f10 <_sk_callback_sse2+0x2f0>
   DB  69,15,89,227                        ; mulps         %xmm11,%xmm12
   DB  68,15,88,224                        ; addps         %xmm0,%xmm12
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
@@ -19091,7 +19203,7 @@ _sk_color_sse2 LABEL PROC
   DB  69,15,40,250                        ; movaps        %xmm10,%xmm15
   DB  69,15,89,254                        ; mulps         %xmm14,%xmm15
   DB  68,15,88,248                        ; addps         %xmm0,%xmm15
-  DB  68,15,40,5,213,57,0,0               ; movaps        0x39d5(%rip),%xmm8        # 4e70 <_sk_callback_sse2+0x30a>
+  DB  68,15,40,5,133,58,0,0               ; movaps        0x3a85(%rip),%xmm8        # 4f20 <_sk_callback_sse2+0x300>
   DB  65,15,40,224                        ; movaps        %xmm8,%xmm4
   DB  15,92,226                           ; subps         %xmm2,%xmm4
   DB  15,89,252                           ; mulps         %xmm4,%xmm7
@@ -19227,15 +19339,15 @@ _sk_luminosity_sse2 LABEL PROC
   DB  68,15,40,205                        ; movaps        %xmm5,%xmm9
   DB  68,15,89,204                        ; mulps         %xmm4,%xmm9
   DB  15,89,222                           ; mulps         %xmm6,%xmm3
-  DB  68,15,40,37,231,55,0,0              ; movaps        0x37e7(%rip),%xmm12        # 4e80 <_sk_callback_sse2+0x31a>
+  DB  68,15,40,37,151,56,0,0              ; movaps        0x3897(%rip),%xmm12        # 4f30 <_sk_callback_sse2+0x310>
   DB  68,15,40,199                        ; movaps        %xmm7,%xmm8
   DB  69,15,89,196                        ; mulps         %xmm12,%xmm8
-  DB  68,15,40,45,231,55,0,0              ; movaps        0x37e7(%rip),%xmm13        # 4e90 <_sk_callback_sse2+0x32a>
+  DB  68,15,40,45,151,56,0,0              ; movaps        0x3897(%rip),%xmm13        # 4f40 <_sk_callback_sse2+0x320>
   DB  68,15,40,241                        ; movaps        %xmm1,%xmm14
   DB  69,15,89,245                        ; mulps         %xmm13,%xmm14
   DB  69,15,88,240                        ; addps         %xmm8,%xmm14
-  DB  68,15,40,29,227,55,0,0              ; movaps        0x37e3(%rip),%xmm11        # 4ea0 <_sk_callback_sse2+0x33a>
-  DB  68,15,40,5,235,55,0,0               ; movaps        0x37eb(%rip),%xmm8        # 4eb0 <_sk_callback_sse2+0x34a>
+  DB  68,15,40,29,147,56,0,0              ; movaps        0x3893(%rip),%xmm11        # 4f50 <_sk_callback_sse2+0x330>
+  DB  68,15,40,5,155,56,0,0               ; movaps        0x389b(%rip),%xmm8        # 4f60 <_sk_callback_sse2+0x340>
   DB  69,15,40,248                        ; movaps        %xmm8,%xmm15
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  68,15,92,248                        ; subps         %xmm0,%xmm15
@@ -19377,7 +19489,7 @@ _sk_clamp_0_sse2 LABEL PROC
 
 PUBLIC _sk_clamp_1_sse2
 _sk_clamp_1_sse2 LABEL PROC
-  DB  68,15,40,5,242,53,0,0               ; movaps        0x35f2(%rip),%xmm8        # 4ec0 <_sk_callback_sse2+0x35a>
+  DB  68,15,40,5,162,54,0,0               ; movaps        0x36a2(%rip),%xmm8        # 4f70 <_sk_callback_sse2+0x350>
   DB  65,15,93,192                        ; minps         %xmm8,%xmm0
   DB  65,15,93,200                        ; minps         %xmm8,%xmm1
   DB  65,15,93,208                        ; minps         %xmm8,%xmm2
@@ -19387,7 +19499,7 @@ _sk_clamp_1_sse2 LABEL PROC
 
 PUBLIC _sk_clamp_a_sse2
 _sk_clamp_a_sse2 LABEL PROC
-  DB  15,93,29,231,53,0,0                 ; minps         0x35e7(%rip),%xmm3        # 4ed0 <_sk_callback_sse2+0x36a>
+  DB  15,93,29,151,54,0,0                 ; minps         0x3697(%rip),%xmm3        # 4f80 <_sk_callback_sse2+0x360>
   DB  15,93,195                           ; minps         %xmm3,%xmm0
   DB  15,93,203                           ; minps         %xmm3,%xmm1
   DB  15,93,211                           ; minps         %xmm3,%xmm2
@@ -19460,7 +19572,7 @@ _sk_premul_sse2 LABEL PROC
 PUBLIC _sk_unpremul_sse2
 _sk_unpremul_sse2 LABEL PROC
   DB  69,15,87,192                        ; xorps         %xmm8,%xmm8
-  DB  68,15,40,13,82,53,0,0               ; movaps        0x3552(%rip),%xmm9        # 4ee0 <_sk_callback_sse2+0x37a>
+  DB  68,15,40,13,2,54,0,0                ; movaps        0x3602(%rip),%xmm9        # 4f90 <_sk_callback_sse2+0x370>
   DB  68,15,94,203                        ; divps         %xmm3,%xmm9
   DB  68,15,194,195,4                     ; cmpneqps      %xmm3,%xmm8
   DB  69,15,84,193                        ; andps         %xmm9,%xmm8
@@ -19472,20 +19584,20 @@ _sk_unpremul_sse2 LABEL PROC
 
 PUBLIC _sk_from_srgb_sse2
 _sk_from_srgb_sse2 LABEL PROC
-  DB  68,15,40,5,61,53,0,0                ; movaps        0x353d(%rip),%xmm8        # 4ef0 <_sk_callback_sse2+0x38a>
+  DB  68,15,40,5,237,53,0,0               ; movaps        0x35ed(%rip),%xmm8        # 4fa0 <_sk_callback_sse2+0x380>
   DB  68,15,40,232                        ; movaps        %xmm0,%xmm13
   DB  69,15,89,232                        ; mulps         %xmm8,%xmm13
   DB  68,15,40,216                        ; movaps        %xmm0,%xmm11
   DB  69,15,89,219                        ; mulps         %xmm11,%xmm11
-  DB  68,15,40,13,53,53,0,0               ; movaps        0x3535(%rip),%xmm9        # 4f00 <_sk_callback_sse2+0x39a>
+  DB  68,15,40,13,229,53,0,0              ; movaps        0x35e5(%rip),%xmm9        # 4fb0 <_sk_callback_sse2+0x390>
   DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
   DB  69,15,89,241                        ; mulps         %xmm9,%xmm14
-  DB  68,15,40,21,53,53,0,0               ; movaps        0x3535(%rip),%xmm10        # 4f10 <_sk_callback_sse2+0x3aa>
+  DB  68,15,40,21,229,53,0,0              ; movaps        0x35e5(%rip),%xmm10        # 4fc0 <_sk_callback_sse2+0x3a0>
   DB  69,15,88,242                        ; addps         %xmm10,%xmm14
   DB  69,15,89,243                        ; mulps         %xmm11,%xmm14
-  DB  68,15,40,29,53,53,0,0               ; movaps        0x3535(%rip),%xmm11        # 4f20 <_sk_callback_sse2+0x3ba>
+  DB  68,15,40,29,229,53,0,0              ; movaps        0x35e5(%rip),%xmm11        # 4fd0 <_sk_callback_sse2+0x3b0>
   DB  69,15,88,243                        ; addps         %xmm11,%xmm14
-  DB  68,15,40,37,57,53,0,0               ; movaps        0x3539(%rip),%xmm12        # 4f30 <_sk_callback_sse2+0x3ca>
+  DB  68,15,40,37,233,53,0,0              ; movaps        0x35e9(%rip),%xmm12        # 4fe0 <_sk_callback_sse2+0x3c0>
   DB  65,15,194,196,1                     ; cmpltps       %xmm12,%xmm0
   DB  68,15,84,232                        ; andps         %xmm0,%xmm13
   DB  65,15,85,198                        ; andnps        %xmm14,%xmm0
@@ -19520,22 +19632,22 @@ _sk_from_srgb_sse2 LABEL PROC
 PUBLIC _sk_to_srgb_sse2
 _sk_to_srgb_sse2 LABEL PROC
   DB  68,15,82,232                        ; rsqrtps       %xmm0,%xmm13
-  DB  68,15,40,5,198,52,0,0               ; movaps        0x34c6(%rip),%xmm8        # 4f40 <_sk_callback_sse2+0x3da>
+  DB  68,15,40,5,118,53,0,0               ; movaps        0x3576(%rip),%xmm8        # 4ff0 <_sk_callback_sse2+0x3d0>
   DB  68,15,40,240                        ; movaps        %xmm0,%xmm14
   DB  69,15,89,240                        ; mulps         %xmm8,%xmm14
-  DB  68,15,40,13,198,52,0,0              ; movaps        0x34c6(%rip),%xmm9        # 4f50 <_sk_callback_sse2+0x3ea>
+  DB  68,15,40,13,118,53,0,0              ; movaps        0x3576(%rip),%xmm9        # 5000 <_sk_callback_sse2+0x3e0>
   DB  69,15,40,253                        ; movaps        %xmm13,%xmm15
   DB  69,15,89,249                        ; mulps         %xmm9,%xmm15
-  DB  68,15,40,21,198,52,0,0              ; movaps        0x34c6(%rip),%xmm10        # 4f60 <_sk_callback_sse2+0x3fa>
+  DB  68,15,40,21,118,53,0,0              ; movaps        0x3576(%rip),%xmm10        # 5010 <_sk_callback_sse2+0x3f0>
   DB  69,15,88,250                        ; addps         %xmm10,%xmm15
   DB  69,15,89,253                        ; mulps         %xmm13,%xmm15
-  DB  68,15,40,29,198,52,0,0              ; movaps        0x34c6(%rip),%xmm11        # 4f70 <_sk_callback_sse2+0x40a>
+  DB  68,15,40,29,118,53,0,0              ; movaps        0x3576(%rip),%xmm11        # 5020 <_sk_callback_sse2+0x400>
   DB  69,15,88,251                        ; addps         %xmm11,%xmm15
-  DB  68,15,40,37,202,52,0,0              ; movaps        0x34ca(%rip),%xmm12        # 4f80 <_sk_callback_sse2+0x41a>
+  DB  68,15,40,37,122,53,0,0              ; movaps        0x357a(%rip),%xmm12        # 5030 <_sk_callback_sse2+0x410>
   DB  69,15,88,236                        ; addps         %xmm12,%xmm13
   DB  69,15,83,237                        ; rcpps         %xmm13,%xmm13
   DB  69,15,89,239                        ; mulps         %xmm15,%xmm13
-  DB  68,15,40,61,198,52,0,0              ; movaps        0x34c6(%rip),%xmm15        # 4f90 <_sk_callback_sse2+0x42a>
+  DB  68,15,40,61,118,53,0,0              ; movaps        0x3576(%rip),%xmm15        # 5040 <_sk_callback_sse2+0x420>
   DB  65,15,194,199,1                     ; cmpltps       %xmm15,%xmm0
   DB  68,15,84,240                        ; andps         %xmm0,%xmm14
   DB  65,15,85,197                        ; andnps        %xmm13,%xmm0
@@ -19583,7 +19695,7 @@ _sk_rgb_to_hsl_sse2 LABEL PROC
   DB  68,15,93,218                        ; minps         %xmm2,%xmm11
   DB  65,15,40,202                        ; movaps        %xmm10,%xmm1
   DB  65,15,92,203                        ; subps         %xmm11,%xmm1
-  DB  68,15,40,45,31,52,0,0               ; movaps        0x341f(%rip),%xmm13        # 4fa0 <_sk_callback_sse2+0x43a>
+  DB  68,15,40,45,207,52,0,0              ; movaps        0x34cf(%rip),%xmm13        # 5050 <_sk_callback_sse2+0x430>
   DB  68,15,94,233                        ; divps         %xmm1,%xmm13
   DB  65,15,40,194                        ; movaps        %xmm10,%xmm0
   DB  65,15,194,192,0                     ; cmpeqps       %xmm8,%xmm0
@@ -19592,30 +19704,30 @@ _sk_rgb_to_hsl_sse2 LABEL PROC
   DB  69,15,89,229                        ; mulps         %xmm13,%xmm12
   DB  69,15,40,241                        ; movaps        %xmm9,%xmm14
   DB  68,15,194,242,1                     ; cmpltps       %xmm2,%xmm14
-  DB  68,15,84,53,5,52,0,0                ; andps         0x3405(%rip),%xmm14        # 4fb0 <_sk_callback_sse2+0x44a>
+  DB  68,15,84,53,181,52,0,0              ; andps         0x34b5(%rip),%xmm14        # 5060 <_sk_callback_sse2+0x440>
   DB  69,15,88,244                        ; addps         %xmm12,%xmm14
   DB  69,15,40,250                        ; movaps        %xmm10,%xmm15
   DB  69,15,194,249,0                     ; cmpeqps       %xmm9,%xmm15
   DB  65,15,92,208                        ; subps         %xmm8,%xmm2
   DB  65,15,89,213                        ; mulps         %xmm13,%xmm2
-  DB  68,15,40,37,248,51,0,0              ; movaps        0x33f8(%rip),%xmm12        # 4fc0 <_sk_callback_sse2+0x45a>
+  DB  68,15,40,37,168,52,0,0              ; movaps        0x34a8(%rip),%xmm12        # 5070 <_sk_callback_sse2+0x450>
   DB  65,15,88,212                        ; addps         %xmm12,%xmm2
   DB  69,15,92,193                        ; subps         %xmm9,%xmm8
   DB  69,15,89,197                        ; mulps         %xmm13,%xmm8
-  DB  68,15,88,5,244,51,0,0               ; addps         0x33f4(%rip),%xmm8        # 4fd0 <_sk_callback_sse2+0x46a>
+  DB  68,15,88,5,164,52,0,0               ; addps         0x34a4(%rip),%xmm8        # 5080 <_sk_callback_sse2+0x460>
   DB  65,15,84,215                        ; andps         %xmm15,%xmm2
   DB  69,15,85,248                        ; andnps        %xmm8,%xmm15
   DB  68,15,86,250                        ; orps          %xmm2,%xmm15
   DB  68,15,84,240                        ; andps         %xmm0,%xmm14
   DB  65,15,85,199                        ; andnps        %xmm15,%xmm0
   DB  65,15,86,198                        ; orps          %xmm14,%xmm0
-  DB  15,89,5,229,51,0,0                  ; mulps         0x33e5(%rip),%xmm0        # 4fe0 <_sk_callback_sse2+0x47a>
+  DB  15,89,5,149,52,0,0                  ; mulps         0x3495(%rip),%xmm0        # 5090 <_sk_callback_sse2+0x470>
   DB  69,15,40,194                        ; movaps        %xmm10,%xmm8
   DB  69,15,194,195,4                     ; cmpneqps      %xmm11,%xmm8
   DB  65,15,84,192                        ; andps         %xmm8,%xmm0
   DB  69,15,92,226                        ; subps         %xmm10,%xmm12
   DB  69,15,88,211                        ; addps         %xmm11,%xmm10
-  DB  68,15,40,13,216,51,0,0              ; movaps        0x33d8(%rip),%xmm9        # 4ff0 <_sk_callback_sse2+0x48a>
+  DB  68,15,40,13,136,52,0,0              ; movaps        0x3488(%rip),%xmm9        # 50a0 <_sk_callback_sse2+0x480>
   DB  65,15,40,210                        ; movaps        %xmm10,%xmm2
   DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
   DB  68,15,194,202,1                     ; cmpltps       %xmm2,%xmm9
@@ -19638,7 +19750,7 @@ _sk_hsl_to_rgb_sse2 LABEL PROC
   DB  15,41,92,36,32                      ; movaps        %xmm3,0x20(%rsp)
   DB  68,15,40,218                        ; movaps        %xmm2,%xmm11
   DB  15,40,240                           ; movaps        %xmm0,%xmm6
-  DB  68,15,40,13,147,51,0,0              ; movaps        0x3393(%rip),%xmm9        # 5000 <_sk_callback_sse2+0x49a>
+  DB  68,15,40,13,67,52,0,0               ; movaps        0x3443(%rip),%xmm9        # 50b0 <_sk_callback_sse2+0x490>
   DB  69,15,40,209                        ; movaps        %xmm9,%xmm10
   DB  69,15,194,211,2                     ; cmpleps       %xmm11,%xmm10
   DB  15,40,193                           ; movaps        %xmm1,%xmm0
@@ -19655,28 +19767,28 @@ _sk_hsl_to_rgb_sse2 LABEL PROC
   DB  69,15,88,211                        ; addps         %xmm11,%xmm10
   DB  69,15,88,219                        ; addps         %xmm11,%xmm11
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
-  DB  15,40,5,93,51,0,0                   ; movaps        0x335d(%rip),%xmm0        # 5010 <_sk_callback_sse2+0x4aa>
+  DB  15,40,5,13,52,0,0                   ; movaps        0x340d(%rip),%xmm0        # 50c0 <_sk_callback_sse2+0x4a0>
   DB  15,88,198                           ; addps         %xmm6,%xmm0
   DB  243,15,91,200                       ; cvttps2dq     %xmm0,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
   DB  15,40,216                           ; movaps        %xmm0,%xmm3
   DB  15,194,217,1                        ; cmpltps       %xmm1,%xmm3
-  DB  15,84,29,85,51,0,0                  ; andps         0x3355(%rip),%xmm3        # 5020 <_sk_callback_sse2+0x4ba>
+  DB  15,84,29,5,52,0,0                   ; andps         0x3405(%rip),%xmm3        # 50d0 <_sk_callback_sse2+0x4b0>
   DB  15,92,203                           ; subps         %xmm3,%xmm1
   DB  15,92,193                           ; subps         %xmm1,%xmm0
-  DB  68,15,40,45,87,51,0,0               ; movaps        0x3357(%rip),%xmm13        # 5030 <_sk_callback_sse2+0x4ca>
+  DB  68,15,40,45,7,52,0,0                ; movaps        0x3407(%rip),%xmm13        # 50e0 <_sk_callback_sse2+0x4c0>
   DB  69,15,40,197                        ; movaps        %xmm13,%xmm8
   DB  68,15,194,192,2                     ; cmpleps       %xmm0,%xmm8
   DB  69,15,40,242                        ; movaps        %xmm10,%xmm14
   DB  69,15,92,243                        ; subps         %xmm11,%xmm14
   DB  65,15,40,217                        ; movaps        %xmm9,%xmm3
   DB  15,194,216,2                        ; cmpleps       %xmm0,%xmm3
-  DB  15,40,21,103,51,0,0                 ; movaps        0x3367(%rip),%xmm2        # 5060 <_sk_callback_sse2+0x4fa>
+  DB  15,40,21,23,52,0,0                  ; movaps        0x3417(%rip),%xmm2        # 5110 <_sk_callback_sse2+0x4f0>
   DB  68,15,40,250                        ; movaps        %xmm2,%xmm15
   DB  68,15,194,248,2                     ; cmpleps       %xmm0,%xmm15
-  DB  15,40,13,55,51,0,0                  ; movaps        0x3337(%rip),%xmm1        # 5040 <_sk_callback_sse2+0x4da>
+  DB  15,40,13,231,51,0,0                 ; movaps        0x33e7(%rip),%xmm1        # 50f0 <_sk_callback_sse2+0x4d0>
   DB  15,89,193                           ; mulps         %xmm1,%xmm0
-  DB  15,40,45,61,51,0,0                  ; movaps        0x333d(%rip),%xmm5        # 5050 <_sk_callback_sse2+0x4ea>
+  DB  15,40,45,237,51,0,0                 ; movaps        0x33ed(%rip),%xmm5        # 5100 <_sk_callback_sse2+0x4e0>
   DB  15,40,229                           ; movaps        %xmm5,%xmm4
   DB  15,92,224                           ; subps         %xmm0,%xmm4
   DB  65,15,89,230                        ; mulps         %xmm14,%xmm4
@@ -19699,7 +19811,7 @@ _sk_hsl_to_rgb_sse2 LABEL PROC
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
   DB  15,40,222                           ; movaps        %xmm6,%xmm3
   DB  15,194,216,1                        ; cmpltps       %xmm0,%xmm3
-  DB  15,84,29,178,50,0,0                 ; andps         0x32b2(%rip),%xmm3        # 5020 <_sk_callback_sse2+0x4ba>
+  DB  15,84,29,98,51,0,0                  ; andps         0x3362(%rip),%xmm3        # 50d0 <_sk_callback_sse2+0x4b0>
   DB  15,92,195                           ; subps         %xmm3,%xmm0
   DB  68,15,40,230                        ; movaps        %xmm6,%xmm12
   DB  68,15,92,224                        ; subps         %xmm0,%xmm12
@@ -19729,12 +19841,12 @@ _sk_hsl_to_rgb_sse2 LABEL PROC
   DB  15,40,60,36                         ; movaps        (%rsp),%xmm7
   DB  15,40,231                           ; movaps        %xmm7,%xmm4
   DB  15,85,227                           ; andnps        %xmm3,%xmm4
-  DB  15,88,53,139,50,0,0                 ; addps         0x328b(%rip),%xmm6        # 5070 <_sk_callback_sse2+0x50a>
+  DB  15,88,53,59,51,0,0                  ; addps         0x333b(%rip),%xmm6        # 5120 <_sk_callback_sse2+0x500>
   DB  243,15,91,198                       ; cvttps2dq     %xmm6,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
   DB  15,40,222                           ; movaps        %xmm6,%xmm3
   DB  15,194,216,1                        ; cmpltps       %xmm0,%xmm3
-  DB  15,84,29,38,50,0,0                  ; andps         0x3226(%rip),%xmm3        # 5020 <_sk_callback_sse2+0x4ba>
+  DB  15,84,29,214,50,0,0                 ; andps         0x32d6(%rip),%xmm3        # 50d0 <_sk_callback_sse2+0x4b0>
   DB  15,92,195                           ; subps         %xmm3,%xmm0
   DB  15,92,240                           ; subps         %xmm0,%xmm6
   DB  15,89,206                           ; mulps         %xmm6,%xmm1
@@ -19795,7 +19907,7 @@ _sk_scale_u8_sse2 LABEL PROC
   DB  102,69,15,96,193                    ; punpcklbw     %xmm9,%xmm8
   DB  102,69,15,97,193                    ; punpcklwd     %xmm9,%xmm8
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,176,49,0,0               ; mulps         0x31b0(%rip),%xmm8        # 5080 <_sk_callback_sse2+0x51a>
+  DB  68,15,89,5,96,50,0,0                ; mulps         0x3260(%rip),%xmm8        # 5130 <_sk_callback_sse2+0x510>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
@@ -19832,7 +19944,7 @@ _sk_lerp_u8_sse2 LABEL PROC
   DB  102,69,15,96,193                    ; punpcklbw     %xmm9,%xmm8
   DB  102,69,15,97,193                    ; punpcklwd     %xmm9,%xmm8
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,78,49,0,0                ; mulps         0x314e(%rip),%xmm8        # 5090 <_sk_callback_sse2+0x52a>
+  DB  68,15,89,5,254,49,0,0               ; mulps         0x31fe(%rip),%xmm8        # 5140 <_sk_callback_sse2+0x520>
   DB  15,92,196                           ; subps         %xmm4,%xmm0
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -19855,17 +19967,17 @@ _sk_lerp_565_sse2 LABEL PROC
   DB  243,68,15,126,20,120                ; movq          (%rax,%rdi,2),%xmm10
   DB  102,69,15,239,192                   ; pxor          %xmm8,%xmm8
   DB  102,69,15,97,208                    ; punpcklwd     %xmm8,%xmm10
-  DB  102,68,15,111,5,20,49,0,0           ; movdqa        0x3114(%rip),%xmm8        # 50a0 <_sk_callback_sse2+0x53a>
+  DB  102,68,15,111,5,196,49,0,0          ; movdqa        0x31c4(%rip),%xmm8        # 5150 <_sk_callback_sse2+0x530>
   DB  102,69,15,219,194                   ; pand          %xmm10,%xmm8
   DB  69,15,91,192                        ; cvtdq2ps      %xmm8,%xmm8
-  DB  68,15,89,5,19,49,0,0                ; mulps         0x3113(%rip),%xmm8        # 50b0 <_sk_callback_sse2+0x54a>
-  DB  102,68,15,111,13,26,49,0,0          ; movdqa        0x311a(%rip),%xmm9        # 50c0 <_sk_callback_sse2+0x55a>
+  DB  68,15,89,5,195,49,0,0               ; mulps         0x31c3(%rip),%xmm8        # 5160 <_sk_callback_sse2+0x540>
+  DB  102,68,15,111,13,202,49,0,0         ; movdqa        0x31ca(%rip),%xmm9        # 5170 <_sk_callback_sse2+0x550>
   DB  102,69,15,219,202                   ; pand          %xmm10,%xmm9
   DB  69,15,91,201                        ; cvtdq2ps      %xmm9,%xmm9
-  DB  68,15,89,13,25,49,0,0               ; mulps         0x3119(%rip),%xmm9        # 50d0 <_sk_callback_sse2+0x56a>
-  DB  102,68,15,219,21,32,49,0,0          ; pand          0x3120(%rip),%xmm10        # 50e0 <_sk_callback_sse2+0x57a>
+  DB  68,15,89,13,201,49,0,0              ; mulps         0x31c9(%rip),%xmm9        # 5180 <_sk_callback_sse2+0x560>
+  DB  102,68,15,219,21,208,49,0,0         ; pand          0x31d0(%rip),%xmm10        # 5190 <_sk_callback_sse2+0x570>
   DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
-  DB  68,15,89,21,36,49,0,0               ; mulps         0x3124(%rip),%xmm10        # 50f0 <_sk_callback_sse2+0x58a>
+  DB  68,15,89,21,212,49,0,0              ; mulps         0x31d4(%rip),%xmm10        # 51a0 <_sk_callback_sse2+0x580>
   DB  15,92,196                           ; subps         %xmm4,%xmm0
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  15,88,196                           ; addps         %xmm4,%xmm0
@@ -19894,7 +20006,7 @@ _sk_load_tables_sse2 LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  76,139,72,8                         ; mov           0x8(%rax),%r9
   DB  243,69,15,111,12,184                ; movdqu        (%r8,%rdi,4),%xmm9
-  DB  102,68,15,111,5,212,48,0,0          ; movdqa        0x30d4(%rip),%xmm8        # 5100 <_sk_callback_sse2+0x59a>
+  DB  102,68,15,111,5,132,49,0,0          ; movdqa        0x3184(%rip),%xmm8        # 51b0 <_sk_callback_sse2+0x590>
   DB  102,65,15,111,193                   ; movdqa        %xmm9,%xmm0
   DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
   DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
@@ -19949,7 +20061,7 @@ _sk_load_tables_sse2 LABEL PROC
   DB  65,15,20,208                        ; unpcklps      %xmm8,%xmm2
   DB  102,65,15,114,209,24                ; psrld         $0x18,%xmm9
   DB  65,15,91,217                        ; cvtdq2ps      %xmm9,%xmm3
-  DB  15,89,29,225,47,0,0                 ; mulps         0x2fe1(%rip),%xmm3        # 5110 <_sk_callback_sse2+0x5aa>
+  DB  15,89,29,145,48,0,0                 ; mulps         0x3091(%rip),%xmm3        # 51c0 <_sk_callback_sse2+0x5a0>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -19966,7 +20078,7 @@ _sk_load_tables_u16_be_sse2 LABEL PROC
   DB  102,65,15,111,201                   ; movdqa        %xmm9,%xmm1
   DB  102,15,97,200                       ; punpcklwd     %xmm0,%xmm1
   DB  102,68,15,105,200                   ; punpckhwd     %xmm0,%xmm9
-  DB  102,68,15,111,21,180,47,0,0         ; movdqa        0x2fb4(%rip),%xmm10        # 5120 <_sk_callback_sse2+0x5ba>
+  DB  102,68,15,111,21,100,48,0,0         ; movdqa        0x3064(%rip),%xmm10        # 51d0 <_sk_callback_sse2+0x5b0>
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,65,15,219,194                   ; pand          %xmm10,%xmm0
   DB  102,69,15,239,192                   ; pxor          %xmm8,%xmm8
@@ -20027,7 +20139,7 @@ _sk_load_tables_u16_be_sse2 LABEL PROC
   DB  102,65,15,235,217                   ; por           %xmm9,%xmm3
   DB  102,65,15,97,216                    ; punpcklwd     %xmm8,%xmm3
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,163,46,0,0                 ; mulps         0x2ea3(%rip),%xmm3        # 5130 <_sk_callback_sse2+0x5ca>
+  DB  15,89,29,83,47,0,0                  ; mulps         0x2f53(%rip),%xmm3        # 51e0 <_sk_callback_sse2+0x5c0>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -20047,7 +20159,7 @@ _sk_load_tables_rgb_u16_be_sse2 LABEL PROC
   DB  102,68,15,97,208                    ; punpcklwd     %xmm0,%xmm10
   DB  102,65,15,111,195                   ; movdqa        %xmm11,%xmm0
   DB  102,65,15,97,194                    ; punpcklwd     %xmm10,%xmm0
-  DB  102,68,15,111,5,99,46,0,0           ; movdqa        0x2e63(%rip),%xmm8        # 5140 <_sk_callback_sse2+0x5da>
+  DB  102,68,15,111,5,19,47,0,0           ; movdqa        0x2f13(%rip),%xmm8        # 51f0 <_sk_callback_sse2+0x5d0>
   DB  102,15,112,200,78                   ; pshufd        $0x4e,%xmm0,%xmm1
   DB  102,65,15,219,192                   ; pand          %xmm8,%xmm0
   DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
@@ -20102,7 +20214,7 @@ _sk_load_tables_rgb_u16_be_sse2 LABEL PROC
   DB  15,20,211                           ; unpcklps      %xmm3,%xmm2
   DB  65,15,20,208                        ; unpcklps      %xmm8,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,114,45,0,0                 ; movaps        0x2d72(%rip),%xmm3        # 5150 <_sk_callback_sse2+0x5ea>
+  DB  15,40,29,34,46,0,0                  ; movaps        0x2e22(%rip),%xmm3        # 5200 <_sk_callback_sse2+0x5e0>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_byte_tables_sse2
@@ -20110,7 +20222,7 @@ _sk_byte_tables_sse2 LABEL PROC
   DB  65,86                               ; push          %r14
   DB  83                                  ; push          %rbx
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,115,45,0,0               ; movaps        0x2d73(%rip),%xmm8        # 5160 <_sk_callback_sse2+0x5fa>
+  DB  68,15,40,5,35,46,0,0                ; movaps        0x2e23(%rip),%xmm8        # 5210 <_sk_callback_sse2+0x5f0>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,91,192                       ; cvtps2dq      %xmm0,%xmm0
   DB  102,72,15,126,193                   ; movq          %xmm0,%rcx
@@ -20137,7 +20249,7 @@ _sk_byte_tables_sse2 LABEL PROC
   DB  102,65,15,96,193                    ; punpcklbw     %xmm9,%xmm0
   DB  102,65,15,97,193                    ; punpcklwd     %xmm9,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,21,16,45,0,0               ; movaps        0x2d10(%rip),%xmm10        # 5170 <_sk_callback_sse2+0x60a>
+  DB  68,15,40,21,192,45,0,0              ; movaps        0x2dc0(%rip),%xmm10        # 5220 <_sk_callback_sse2+0x600>
   DB  65,15,89,194                        ; mulps         %xmm10,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,91,201                       ; cvtps2dq      %xmm1,%xmm1
@@ -20251,7 +20363,7 @@ _sk_byte_tables_rgb_sse2 LABEL PROC
   DB  102,65,15,96,193                    ; punpcklbw     %xmm9,%xmm0
   DB  102,65,15,97,193                    ; punpcklwd     %xmm9,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,21,99,43,0,0               ; movaps        0x2b63(%rip),%xmm10        # 5180 <_sk_callback_sse2+0x61a>
+  DB  68,15,40,21,19,44,0,0               ; movaps        0x2c13(%rip),%xmm10        # 5230 <_sk_callback_sse2+0x610>
   DB  65,15,89,194                        ; mulps         %xmm10,%xmm0
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
   DB  102,15,91,201                       ; cvtps2dq      %xmm1,%xmm1
@@ -20438,15 +20550,15 @@ _sk_parametric_r_sse2 LABEL PROC
   DB  69,15,88,209                        ; addps         %xmm9,%xmm10
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,91,202                        ; cvtdq2ps      %xmm10,%xmm9
-  DB  68,15,89,13,162,40,0,0              ; mulps         0x28a2(%rip),%xmm9        # 5190 <_sk_callback_sse2+0x62a>
-  DB  68,15,84,21,170,40,0,0              ; andps         0x28aa(%rip),%xmm10        # 51a0 <_sk_callback_sse2+0x63a>
-  DB  68,15,86,21,178,40,0,0              ; orps          0x28b2(%rip),%xmm10        # 51b0 <_sk_callback_sse2+0x64a>
-  DB  68,15,88,13,186,40,0,0              ; addps         0x28ba(%rip),%xmm9        # 51c0 <_sk_callback_sse2+0x65a>
-  DB  68,15,40,37,194,40,0,0              ; movaps        0x28c2(%rip),%xmm12        # 51d0 <_sk_callback_sse2+0x66a>
+  DB  68,15,89,13,82,41,0,0               ; mulps         0x2952(%rip),%xmm9        # 5240 <_sk_callback_sse2+0x620>
+  DB  68,15,84,21,90,41,0,0               ; andps         0x295a(%rip),%xmm10        # 5250 <_sk_callback_sse2+0x630>
+  DB  68,15,86,21,98,41,0,0               ; orps          0x2962(%rip),%xmm10        # 5260 <_sk_callback_sse2+0x640>
+  DB  68,15,88,13,106,41,0,0              ; addps         0x296a(%rip),%xmm9        # 5270 <_sk_callback_sse2+0x650>
+  DB  68,15,40,37,114,41,0,0              ; movaps        0x2972(%rip),%xmm12        # 5280 <_sk_callback_sse2+0x660>
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,88,21,194,40,0,0              ; addps         0x28c2(%rip),%xmm10        # 51e0 <_sk_callback_sse2+0x67a>
-  DB  68,15,40,37,202,40,0,0              ; movaps        0x28ca(%rip),%xmm12        # 51f0 <_sk_callback_sse2+0x68a>
+  DB  68,15,88,21,114,41,0,0              ; addps         0x2972(%rip),%xmm10        # 5290 <_sk_callback_sse2+0x670>
+  DB  68,15,40,37,122,41,0,0              ; movaps        0x297a(%rip),%xmm12        # 52a0 <_sk_callback_sse2+0x680>
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
   DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
@@ -20454,22 +20566,22 @@ _sk_parametric_r_sse2 LABEL PROC
   DB  69,15,91,226                        ; cvtdq2ps      %xmm10,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,194,236,1                     ; cmpltps       %xmm12,%xmm13
-  DB  68,15,40,21,180,40,0,0              ; movaps        0x28b4(%rip),%xmm10        # 5200 <_sk_callback_sse2+0x69a>
+  DB  68,15,40,21,100,41,0,0              ; movaps        0x2964(%rip),%xmm10        # 52b0 <_sk_callback_sse2+0x690>
   DB  69,15,84,234                        ; andps         %xmm10,%xmm13
   DB  69,15,87,219                        ; xorps         %xmm11,%xmm11
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,92,236                        ; subps         %xmm12,%xmm13
-  DB  68,15,88,13,168,40,0,0              ; addps         0x28a8(%rip),%xmm9        # 5210 <_sk_callback_sse2+0x6aa>
-  DB  68,15,40,37,176,40,0,0              ; movaps        0x28b0(%rip),%xmm12        # 5220 <_sk_callback_sse2+0x6ba>
+  DB  68,15,88,13,88,41,0,0               ; addps         0x2958(%rip),%xmm9        # 52c0 <_sk_callback_sse2+0x6a0>
+  DB  68,15,40,37,96,41,0,0               ; movaps        0x2960(%rip),%xmm12        # 52d0 <_sk_callback_sse2+0x6b0>
   DB  69,15,89,229                        ; mulps         %xmm13,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,40,37,176,40,0,0              ; movaps        0x28b0(%rip),%xmm12        # 5230 <_sk_callback_sse2+0x6ca>
+  DB  68,15,40,37,96,41,0,0               ; movaps        0x2960(%rip),%xmm12        # 52e0 <_sk_callback_sse2+0x6c0>
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
-  DB  68,15,40,45,180,40,0,0              ; movaps        0x28b4(%rip),%xmm13        # 5240 <_sk_callback_sse2+0x6da>
+  DB  68,15,40,45,100,41,0,0              ; movaps        0x2964(%rip),%xmm13        # 52f0 <_sk_callback_sse2+0x6d0>
   DB  69,15,94,236                        ; divps         %xmm12,%xmm13
   DB  69,15,88,233                        ; addps         %xmm9,%xmm13
-  DB  68,15,89,45,180,40,0,0              ; mulps         0x28b4(%rip),%xmm13        # 5250 <_sk_callback_sse2+0x6ea>
+  DB  68,15,89,45,100,41,0,0              ; mulps         0x2964(%rip),%xmm13        # 5300 <_sk_callback_sse2+0x6e0>
   DB  102,69,15,91,205                    ; cvtps2dq      %xmm13,%xmm9
   DB  243,68,15,16,96,20                  ; movss         0x14(%rax),%xmm12
   DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
@@ -20503,15 +20615,15 @@ _sk_parametric_g_sse2 LABEL PROC
   DB  69,15,88,209                        ; addps         %xmm9,%xmm10
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,91,202                        ; cvtdq2ps      %xmm10,%xmm9
-  DB  68,15,89,13,52,40,0,0               ; mulps         0x2834(%rip),%xmm9        # 5260 <_sk_callback_sse2+0x6fa>
-  DB  68,15,84,21,60,40,0,0               ; andps         0x283c(%rip),%xmm10        # 5270 <_sk_callback_sse2+0x70a>
-  DB  68,15,86,21,68,40,0,0               ; orps          0x2844(%rip),%xmm10        # 5280 <_sk_callback_sse2+0x71a>
-  DB  68,15,88,13,76,40,0,0               ; addps         0x284c(%rip),%xmm9        # 5290 <_sk_callback_sse2+0x72a>
-  DB  68,15,40,37,84,40,0,0               ; movaps        0x2854(%rip),%xmm12        # 52a0 <_sk_callback_sse2+0x73a>
+  DB  68,15,89,13,228,40,0,0              ; mulps         0x28e4(%rip),%xmm9        # 5310 <_sk_callback_sse2+0x6f0>
+  DB  68,15,84,21,236,40,0,0              ; andps         0x28ec(%rip),%xmm10        # 5320 <_sk_callback_sse2+0x700>
+  DB  68,15,86,21,244,40,0,0              ; orps          0x28f4(%rip),%xmm10        # 5330 <_sk_callback_sse2+0x710>
+  DB  68,15,88,13,252,40,0,0              ; addps         0x28fc(%rip),%xmm9        # 5340 <_sk_callback_sse2+0x720>
+  DB  68,15,40,37,4,41,0,0                ; movaps        0x2904(%rip),%xmm12        # 5350 <_sk_callback_sse2+0x730>
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,88,21,84,40,0,0               ; addps         0x2854(%rip),%xmm10        # 52b0 <_sk_callback_sse2+0x74a>
-  DB  68,15,40,37,92,40,0,0               ; movaps        0x285c(%rip),%xmm12        # 52c0 <_sk_callback_sse2+0x75a>
+  DB  68,15,88,21,4,41,0,0                ; addps         0x2904(%rip),%xmm10        # 5360 <_sk_callback_sse2+0x740>
+  DB  68,15,40,37,12,41,0,0               ; movaps        0x290c(%rip),%xmm12        # 5370 <_sk_callback_sse2+0x750>
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
   DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
@@ -20519,22 +20631,22 @@ _sk_parametric_g_sse2 LABEL PROC
   DB  69,15,91,226                        ; cvtdq2ps      %xmm10,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,194,236,1                     ; cmpltps       %xmm12,%xmm13
-  DB  68,15,40,21,70,40,0,0               ; movaps        0x2846(%rip),%xmm10        # 52d0 <_sk_callback_sse2+0x76a>
+  DB  68,15,40,21,246,40,0,0              ; movaps        0x28f6(%rip),%xmm10        # 5380 <_sk_callback_sse2+0x760>
   DB  69,15,84,234                        ; andps         %xmm10,%xmm13
   DB  69,15,87,219                        ; xorps         %xmm11,%xmm11
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,92,236                        ; subps         %xmm12,%xmm13
-  DB  68,15,88,13,58,40,0,0               ; addps         0x283a(%rip),%xmm9        # 52e0 <_sk_callback_sse2+0x77a>
-  DB  68,15,40,37,66,40,0,0               ; movaps        0x2842(%rip),%xmm12        # 52f0 <_sk_callback_sse2+0x78a>
+  DB  68,15,88,13,234,40,0,0              ; addps         0x28ea(%rip),%xmm9        # 5390 <_sk_callback_sse2+0x770>
+  DB  68,15,40,37,242,40,0,0              ; movaps        0x28f2(%rip),%xmm12        # 53a0 <_sk_callback_sse2+0x780>
   DB  69,15,89,229                        ; mulps         %xmm13,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,40,37,66,40,0,0               ; movaps        0x2842(%rip),%xmm12        # 5300 <_sk_callback_sse2+0x79a>
+  DB  68,15,40,37,242,40,0,0              ; movaps        0x28f2(%rip),%xmm12        # 53b0 <_sk_callback_sse2+0x790>
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
-  DB  68,15,40,45,70,40,0,0               ; movaps        0x2846(%rip),%xmm13        # 5310 <_sk_callback_sse2+0x7aa>
+  DB  68,15,40,45,246,40,0,0              ; movaps        0x28f6(%rip),%xmm13        # 53c0 <_sk_callback_sse2+0x7a0>
   DB  69,15,94,236                        ; divps         %xmm12,%xmm13
   DB  69,15,88,233                        ; addps         %xmm9,%xmm13
-  DB  68,15,89,45,70,40,0,0               ; mulps         0x2846(%rip),%xmm13        # 5320 <_sk_callback_sse2+0x7ba>
+  DB  68,15,89,45,246,40,0,0              ; mulps         0x28f6(%rip),%xmm13        # 53d0 <_sk_callback_sse2+0x7b0>
   DB  102,69,15,91,205                    ; cvtps2dq      %xmm13,%xmm9
   DB  243,68,15,16,96,20                  ; movss         0x14(%rax),%xmm12
   DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
@@ -20568,15 +20680,15 @@ _sk_parametric_b_sse2 LABEL PROC
   DB  69,15,88,209                        ; addps         %xmm9,%xmm10
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,91,202                        ; cvtdq2ps      %xmm10,%xmm9
-  DB  68,15,89,13,198,39,0,0              ; mulps         0x27c6(%rip),%xmm9        # 5330 <_sk_callback_sse2+0x7ca>
-  DB  68,15,84,21,206,39,0,0              ; andps         0x27ce(%rip),%xmm10        # 5340 <_sk_callback_sse2+0x7da>
-  DB  68,15,86,21,214,39,0,0              ; orps          0x27d6(%rip),%xmm10        # 5350 <_sk_callback_sse2+0x7ea>
-  DB  68,15,88,13,222,39,0,0              ; addps         0x27de(%rip),%xmm9        # 5360 <_sk_callback_sse2+0x7fa>
-  DB  68,15,40,37,230,39,0,0              ; movaps        0x27e6(%rip),%xmm12        # 5370 <_sk_callback_sse2+0x80a>
+  DB  68,15,89,13,118,40,0,0              ; mulps         0x2876(%rip),%xmm9        # 53e0 <_sk_callback_sse2+0x7c0>
+  DB  68,15,84,21,126,40,0,0              ; andps         0x287e(%rip),%xmm10        # 53f0 <_sk_callback_sse2+0x7d0>
+  DB  68,15,86,21,134,40,0,0              ; orps          0x2886(%rip),%xmm10        # 5400 <_sk_callback_sse2+0x7e0>
+  DB  68,15,88,13,142,40,0,0              ; addps         0x288e(%rip),%xmm9        # 5410 <_sk_callback_sse2+0x7f0>
+  DB  68,15,40,37,150,40,0,0              ; movaps        0x2896(%rip),%xmm12        # 5420 <_sk_callback_sse2+0x800>
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,88,21,230,39,0,0              ; addps         0x27e6(%rip),%xmm10        # 5380 <_sk_callback_sse2+0x81a>
-  DB  68,15,40,37,238,39,0,0              ; movaps        0x27ee(%rip),%xmm12        # 5390 <_sk_callback_sse2+0x82a>
+  DB  68,15,88,21,150,40,0,0              ; addps         0x2896(%rip),%xmm10        # 5430 <_sk_callback_sse2+0x810>
+  DB  68,15,40,37,158,40,0,0              ; movaps        0x289e(%rip),%xmm12        # 5440 <_sk_callback_sse2+0x820>
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
   DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
@@ -20584,22 +20696,22 @@ _sk_parametric_b_sse2 LABEL PROC
   DB  69,15,91,226                        ; cvtdq2ps      %xmm10,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,194,236,1                     ; cmpltps       %xmm12,%xmm13
-  DB  68,15,40,21,216,39,0,0              ; movaps        0x27d8(%rip),%xmm10        # 53a0 <_sk_callback_sse2+0x83a>
+  DB  68,15,40,21,136,40,0,0              ; movaps        0x2888(%rip),%xmm10        # 5450 <_sk_callback_sse2+0x830>
   DB  69,15,84,234                        ; andps         %xmm10,%xmm13
   DB  69,15,87,219                        ; xorps         %xmm11,%xmm11
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,92,236                        ; subps         %xmm12,%xmm13
-  DB  68,15,88,13,204,39,0,0              ; addps         0x27cc(%rip),%xmm9        # 53b0 <_sk_callback_sse2+0x84a>
-  DB  68,15,40,37,212,39,0,0              ; movaps        0x27d4(%rip),%xmm12        # 53c0 <_sk_callback_sse2+0x85a>
+  DB  68,15,88,13,124,40,0,0              ; addps         0x287c(%rip),%xmm9        # 5460 <_sk_callback_sse2+0x840>
+  DB  68,15,40,37,132,40,0,0              ; movaps        0x2884(%rip),%xmm12        # 5470 <_sk_callback_sse2+0x850>
   DB  69,15,89,229                        ; mulps         %xmm13,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,40,37,212,39,0,0              ; movaps        0x27d4(%rip),%xmm12        # 53d0 <_sk_callback_sse2+0x86a>
+  DB  68,15,40,37,132,40,0,0              ; movaps        0x2884(%rip),%xmm12        # 5480 <_sk_callback_sse2+0x860>
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
-  DB  68,15,40,45,216,39,0,0              ; movaps        0x27d8(%rip),%xmm13        # 53e0 <_sk_callback_sse2+0x87a>
+  DB  68,15,40,45,136,40,0,0              ; movaps        0x2888(%rip),%xmm13        # 5490 <_sk_callback_sse2+0x870>
   DB  69,15,94,236                        ; divps         %xmm12,%xmm13
   DB  69,15,88,233                        ; addps         %xmm9,%xmm13
-  DB  68,15,89,45,216,39,0,0              ; mulps         0x27d8(%rip),%xmm13        # 53f0 <_sk_callback_sse2+0x88a>
+  DB  68,15,89,45,136,40,0,0              ; mulps         0x2888(%rip),%xmm13        # 54a0 <_sk_callback_sse2+0x880>
   DB  102,69,15,91,205                    ; cvtps2dq      %xmm13,%xmm9
   DB  243,68,15,16,96,20                  ; movss         0x14(%rax),%xmm12
   DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
@@ -20633,15 +20745,15 @@ _sk_parametric_a_sse2 LABEL PROC
   DB  69,15,88,209                        ; addps         %xmm9,%xmm10
   DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
   DB  69,15,91,202                        ; cvtdq2ps      %xmm10,%xmm9
-  DB  68,15,89,13,88,39,0,0               ; mulps         0x2758(%rip),%xmm9        # 5400 <_sk_callback_sse2+0x89a>
-  DB  68,15,84,21,96,39,0,0               ; andps         0x2760(%rip),%xmm10        # 5410 <_sk_callback_sse2+0x8aa>
-  DB  68,15,86,21,104,39,0,0              ; orps          0x2768(%rip),%xmm10        # 5420 <_sk_callback_sse2+0x8ba>
-  DB  68,15,88,13,112,39,0,0              ; addps         0x2770(%rip),%xmm9        # 5430 <_sk_callback_sse2+0x8ca>
-  DB  68,15,40,37,120,39,0,0              ; movaps        0x2778(%rip),%xmm12        # 5440 <_sk_callback_sse2+0x8da>
+  DB  68,15,89,13,8,40,0,0                ; mulps         0x2808(%rip),%xmm9        # 54b0 <_sk_callback_sse2+0x890>
+  DB  68,15,84,21,16,40,0,0               ; andps         0x2810(%rip),%xmm10        # 54c0 <_sk_callback_sse2+0x8a0>
+  DB  68,15,86,21,24,40,0,0               ; orps          0x2818(%rip),%xmm10        # 54d0 <_sk_callback_sse2+0x8b0>
+  DB  68,15,88,13,32,40,0,0               ; addps         0x2820(%rip),%xmm9        # 54e0 <_sk_callback_sse2+0x8c0>
+  DB  68,15,40,37,40,40,0,0               ; movaps        0x2828(%rip),%xmm12        # 54f0 <_sk_callback_sse2+0x8d0>
   DB  69,15,89,226                        ; mulps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,88,21,120,39,0,0              ; addps         0x2778(%rip),%xmm10        # 5450 <_sk_callback_sse2+0x8ea>
-  DB  68,15,40,37,128,39,0,0              ; movaps        0x2780(%rip),%xmm12        # 5460 <_sk_callback_sse2+0x8fa>
+  DB  68,15,88,21,40,40,0,0               ; addps         0x2828(%rip),%xmm10        # 5500 <_sk_callback_sse2+0x8e0>
+  DB  68,15,40,37,48,40,0,0               ; movaps        0x2830(%rip),%xmm12        # 5510 <_sk_callback_sse2+0x8f0>
   DB  69,15,94,226                        ; divps         %xmm10,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
   DB  69,15,89,203                        ; mulps         %xmm11,%xmm9
@@ -20649,22 +20761,22 @@ _sk_parametric_a_sse2 LABEL PROC
   DB  69,15,91,226                        ; cvtdq2ps      %xmm10,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,194,236,1                     ; cmpltps       %xmm12,%xmm13
-  DB  68,15,40,21,106,39,0,0              ; movaps        0x276a(%rip),%xmm10        # 5470 <_sk_callback_sse2+0x90a>
+  DB  68,15,40,21,26,40,0,0               ; movaps        0x281a(%rip),%xmm10        # 5520 <_sk_callback_sse2+0x900>
   DB  69,15,84,234                        ; andps         %xmm10,%xmm13
   DB  69,15,87,219                        ; xorps         %xmm11,%xmm11
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
   DB  69,15,40,233                        ; movaps        %xmm9,%xmm13
   DB  69,15,92,236                        ; subps         %xmm12,%xmm13
-  DB  68,15,88,13,94,39,0,0               ; addps         0x275e(%rip),%xmm9        # 5480 <_sk_callback_sse2+0x91a>
-  DB  68,15,40,37,102,39,0,0              ; movaps        0x2766(%rip),%xmm12        # 5490 <_sk_callback_sse2+0x92a>
+  DB  68,15,88,13,14,40,0,0               ; addps         0x280e(%rip),%xmm9        # 5530 <_sk_callback_sse2+0x910>
+  DB  68,15,40,37,22,40,0,0               ; movaps        0x2816(%rip),%xmm12        # 5540 <_sk_callback_sse2+0x920>
   DB  69,15,89,229                        ; mulps         %xmm13,%xmm12
   DB  69,15,92,204                        ; subps         %xmm12,%xmm9
-  DB  68,15,40,37,102,39,0,0              ; movaps        0x2766(%rip),%xmm12        # 54a0 <_sk_callback_sse2+0x93a>
+  DB  68,15,40,37,22,40,0,0               ; movaps        0x2816(%rip),%xmm12        # 5550 <_sk_callback_sse2+0x930>
   DB  69,15,92,229                        ; subps         %xmm13,%xmm12
-  DB  68,15,40,45,106,39,0,0              ; movaps        0x276a(%rip),%xmm13        # 54b0 <_sk_callback_sse2+0x94a>
+  DB  68,15,40,45,26,40,0,0               ; movaps        0x281a(%rip),%xmm13        # 5560 <_sk_callback_sse2+0x940>
   DB  69,15,94,236                        ; divps         %xmm12,%xmm13
   DB  69,15,88,233                        ; addps         %xmm9,%xmm13
-  DB  68,15,89,45,106,39,0,0              ; mulps         0x276a(%rip),%xmm13        # 54c0 <_sk_callback_sse2+0x95a>
+  DB  68,15,89,45,26,40,0,0               ; mulps         0x281a(%rip),%xmm13        # 5570 <_sk_callback_sse2+0x950>
   DB  102,69,15,91,205                    ; cvtps2dq      %xmm13,%xmm9
   DB  243,68,15,16,96,20                  ; movss         0x14(%rax),%xmm12
   DB  69,15,198,228,0                     ; shufps        $0x0,%xmm12,%xmm12
@@ -20679,29 +20791,29 @@ _sk_parametric_a_sse2 LABEL PROC
 
 PUBLIC _sk_lab_to_xyz_sse2
 _sk_lab_to_xyz_sse2 LABEL PROC
-  DB  15,89,5,71,39,0,0                   ; mulps         0x2747(%rip),%xmm0        # 54d0 <_sk_callback_sse2+0x96a>
-  DB  68,15,40,5,79,39,0,0                ; movaps        0x274f(%rip),%xmm8        # 54e0 <_sk_callback_sse2+0x97a>
+  DB  15,89,5,247,39,0,0                  ; mulps         0x27f7(%rip),%xmm0        # 5580 <_sk_callback_sse2+0x960>
+  DB  68,15,40,5,255,39,0,0               ; movaps        0x27ff(%rip),%xmm8        # 5590 <_sk_callback_sse2+0x970>
   DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
-  DB  68,15,40,13,83,39,0,0               ; movaps        0x2753(%rip),%xmm9        # 54f0 <_sk_callback_sse2+0x98a>
+  DB  68,15,40,13,3,40,0,0                ; movaps        0x2803(%rip),%xmm9        # 55a0 <_sk_callback_sse2+0x980>
   DB  65,15,88,201                        ; addps         %xmm9,%xmm1
   DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
   DB  65,15,88,209                        ; addps         %xmm9,%xmm2
-  DB  15,88,5,80,39,0,0                   ; addps         0x2750(%rip),%xmm0        # 5500 <_sk_callback_sse2+0x99a>
-  DB  15,89,5,89,39,0,0                   ; mulps         0x2759(%rip),%xmm0        # 5510 <_sk_callback_sse2+0x9aa>
-  DB  15,89,13,98,39,0,0                  ; mulps         0x2762(%rip),%xmm1        # 5520 <_sk_callback_sse2+0x9ba>
+  DB  15,88,5,0,40,0,0                    ; addps         0x2800(%rip),%xmm0        # 55b0 <_sk_callback_sse2+0x990>
+  DB  15,89,5,9,40,0,0                    ; mulps         0x2809(%rip),%xmm0        # 55c0 <_sk_callback_sse2+0x9a0>
+  DB  15,89,13,18,40,0,0                  ; mulps         0x2812(%rip),%xmm1        # 55d0 <_sk_callback_sse2+0x9b0>
   DB  15,88,200                           ; addps         %xmm0,%xmm1
-  DB  15,89,21,104,39,0,0                 ; mulps         0x2768(%rip),%xmm2        # 5530 <_sk_callback_sse2+0x9ca>
+  DB  15,89,21,24,40,0,0                  ; mulps         0x2818(%rip),%xmm2        # 55e0 <_sk_callback_sse2+0x9c0>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  68,15,92,202                        ; subps         %xmm2,%xmm9
   DB  68,15,40,225                        ; movaps        %xmm1,%xmm12
   DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
   DB  68,15,89,225                        ; mulps         %xmm1,%xmm12
-  DB  15,40,21,93,39,0,0                  ; movaps        0x275d(%rip),%xmm2        # 5540 <_sk_callback_sse2+0x9da>
+  DB  15,40,21,13,40,0,0                  ; movaps        0x280d(%rip),%xmm2        # 55f0 <_sk_callback_sse2+0x9d0>
   DB  68,15,40,194                        ; movaps        %xmm2,%xmm8
   DB  69,15,194,196,1                     ; cmpltps       %xmm12,%xmm8
-  DB  68,15,40,21,92,39,0,0               ; movaps        0x275c(%rip),%xmm10        # 5550 <_sk_callback_sse2+0x9ea>
+  DB  68,15,40,21,12,40,0,0               ; movaps        0x280c(%rip),%xmm10        # 5600 <_sk_callback_sse2+0x9e0>
   DB  65,15,88,202                        ; addps         %xmm10,%xmm1
-  DB  68,15,40,29,96,39,0,0               ; movaps        0x2760(%rip),%xmm11        # 5560 <_sk_callback_sse2+0x9fa>
+  DB  68,15,40,29,16,40,0,0               ; movaps        0x2810(%rip),%xmm11        # 5610 <_sk_callback_sse2+0x9f0>
   DB  65,15,89,203                        ; mulps         %xmm11,%xmm1
   DB  69,15,84,224                        ; andps         %xmm8,%xmm12
   DB  68,15,85,193                        ; andnps        %xmm1,%xmm8
@@ -20725,8 +20837,8 @@ _sk_lab_to_xyz_sse2 LABEL PROC
   DB  15,84,194                           ; andps         %xmm2,%xmm0
   DB  65,15,85,209                        ; andnps        %xmm9,%xmm2
   DB  15,86,208                           ; orps          %xmm0,%xmm2
-  DB  68,15,89,5,16,39,0,0                ; mulps         0x2710(%rip),%xmm8        # 5570 <_sk_callback_sse2+0xa0a>
-  DB  15,89,21,25,39,0,0                  ; mulps         0x2719(%rip),%xmm2        # 5580 <_sk_callback_sse2+0xa1a>
+  DB  68,15,89,5,192,39,0,0               ; mulps         0x27c0(%rip),%xmm8        # 5620 <_sk_callback_sse2+0xa00>
+  DB  15,89,21,201,39,0,0                 ; mulps         0x27c9(%rip),%xmm2        # 5630 <_sk_callback_sse2+0xa10>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  65,15,40,192                        ; movaps        %xmm8,%xmm0
   DB  255,224                             ; jmpq          *%rax
@@ -20740,7 +20852,7 @@ _sk_load_a8_sse2 LABEL PROC
   DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
   DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
   DB  15,91,216                           ; cvtdq2ps      %xmm0,%xmm3
-  DB  15,89,29,1,39,0,0                   ; mulps         0x2701(%rip),%xmm3        # 5590 <_sk_callback_sse2+0xa2a>
+  DB  15,89,29,177,39,0,0                 ; mulps         0x27b1(%rip),%xmm3        # 5640 <_sk_callback_sse2+0xa20>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
@@ -20783,7 +20895,7 @@ _sk_gather_a8_sse2 LABEL PROC
   DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
   DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
   DB  15,91,216                           ; cvtdq2ps      %xmm0,%xmm3
-  DB  15,89,29,112,38,0,0                 ; mulps         0x2670(%rip),%xmm3        # 55a0 <_sk_callback_sse2+0xa3a>
+  DB  15,89,29,32,39,0,0                  ; mulps         0x2720(%rip),%xmm3        # 5650 <_sk_callback_sse2+0xa30>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
@@ -20794,7 +20906,7 @@ PUBLIC _sk_store_a8_sse2
 _sk_store_a8_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,100,38,0,0               ; movaps        0x2664(%rip),%xmm8        # 55b0 <_sk_callback_sse2+0xa4a>
+  DB  68,15,40,5,20,39,0,0                ; movaps        0x2714(%rip),%xmm8        # 5660 <_sk_callback_sse2+0xa40>
   DB  68,15,89,195                        ; mulps         %xmm3,%xmm8
   DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
   DB  102,65,15,114,240,16                ; pslld         $0x10,%xmm8
@@ -20814,9 +20926,9 @@ _sk_load_g8_sse2 LABEL PROC
   DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
   DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,43,38,0,0                   ; mulps         0x262b(%rip),%xmm0        # 55c0 <_sk_callback_sse2+0xa5a>
+  DB  15,89,5,219,38,0,0                  ; mulps         0x26db(%rip),%xmm0        # 5670 <_sk_callback_sse2+0xa50>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,50,38,0,0                  ; movaps        0x2632(%rip),%xmm3        # 55d0 <_sk_callback_sse2+0xa6a>
+  DB  15,40,29,226,38,0,0                 ; movaps        0x26e2(%rip),%xmm3        # 5680 <_sk_callback_sse2+0xa60>
   DB  15,40,200                           ; movaps        %xmm0,%xmm1
   DB  15,40,208                           ; movaps        %xmm0,%xmm2
   DB  255,224                             ; jmpq          *%rax
@@ -20857,9 +20969,9 @@ _sk_gather_g8_sse2 LABEL PROC
   DB  102,15,96,193                       ; punpcklbw     %xmm1,%xmm0
   DB  102,15,97,193                       ; punpcklwd     %xmm1,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,167,37,0,0                  ; mulps         0x25a7(%rip),%xmm0        # 55e0 <_sk_callback_sse2+0xa7a>
+  DB  15,89,5,87,38,0,0                   ; mulps         0x2657(%rip),%xmm0        # 5690 <_sk_callback_sse2+0xa70>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,174,37,0,0                 ; movaps        0x25ae(%rip),%xmm3        # 55f0 <_sk_callback_sse2+0xa8a>
+  DB  15,40,29,94,38,0,0                  ; movaps        0x265e(%rip),%xmm3        # 56a0 <_sk_callback_sse2+0xa80>
   DB  15,40,200                           ; movaps        %xmm0,%xmm1
   DB  15,40,208                           ; movaps        %xmm0,%xmm2
   DB  255,224                             ; jmpq          *%rax
@@ -20920,11 +21032,11 @@ _sk_gather_i8_sse2 LABEL PROC
   DB  102,67,15,110,12,136                ; movd          (%r8,%r9,4),%xmm1
   DB  102,68,15,98,201                    ; punpckldq     %xmm1,%xmm9
   DB  102,68,15,98,200                    ; punpckldq     %xmm0,%xmm9
-  DB  102,15,111,21,205,36,0,0            ; movdqa        0x24cd(%rip),%xmm2        # 5600 <_sk_callback_sse2+0xa9a>
+  DB  102,15,111,21,125,37,0,0            ; movdqa        0x257d(%rip),%xmm2        # 56b0 <_sk_callback_sse2+0xa90>
   DB  102,65,15,111,193                   ; movdqa        %xmm9,%xmm0
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,201,36,0,0               ; movaps        0x24c9(%rip),%xmm8        # 5610 <_sk_callback_sse2+0xaaa>
+  DB  68,15,40,5,121,37,0,0               ; movaps        0x2579(%rip),%xmm8        # 56c0 <_sk_callback_sse2+0xaa0>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,65,15,111,201                   ; movdqa        %xmm9,%xmm1
   DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
@@ -20949,19 +21061,19 @@ _sk_load_565_sse2 LABEL PROC
   DB  243,15,126,20,120                   ; movq          (%rax,%rdi,2),%xmm2
   DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
   DB  102,15,97,208                       ; punpcklwd     %xmm0,%xmm2
-  DB  102,15,111,5,127,36,0,0             ; movdqa        0x247f(%rip),%xmm0        # 5620 <_sk_callback_sse2+0xaba>
+  DB  102,15,111,5,47,37,0,0              ; movdqa        0x252f(%rip),%xmm0        # 56d0 <_sk_callback_sse2+0xab0>
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,129,36,0,0                  ; mulps         0x2481(%rip),%xmm0        # 5630 <_sk_callback_sse2+0xaca>
-  DB  102,15,111,13,137,36,0,0            ; movdqa        0x2489(%rip),%xmm1        # 5640 <_sk_callback_sse2+0xada>
+  DB  15,89,5,49,37,0,0                   ; mulps         0x2531(%rip),%xmm0        # 56e0 <_sk_callback_sse2+0xac0>
+  DB  102,15,111,13,57,37,0,0             ; movdqa        0x2539(%rip),%xmm1        # 56f0 <_sk_callback_sse2+0xad0>
   DB  102,15,219,202                      ; pand          %xmm2,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,139,36,0,0                 ; mulps         0x248b(%rip),%xmm1        # 5650 <_sk_callback_sse2+0xaea>
-  DB  102,15,219,21,147,36,0,0            ; pand          0x2493(%rip),%xmm2        # 5660 <_sk_callback_sse2+0xafa>
+  DB  15,89,13,59,37,0,0                  ; mulps         0x253b(%rip),%xmm1        # 5700 <_sk_callback_sse2+0xae0>
+  DB  102,15,219,21,67,37,0,0             ; pand          0x2543(%rip),%xmm2        # 5710 <_sk_callback_sse2+0xaf0>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,153,36,0,0                 ; mulps         0x2499(%rip),%xmm2        # 5670 <_sk_callback_sse2+0xb0a>
+  DB  15,89,21,73,37,0,0                  ; mulps         0x2549(%rip),%xmm2        # 5720 <_sk_callback_sse2+0xb00>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,160,36,0,0                 ; movaps        0x24a0(%rip),%xmm3        # 5680 <_sk_callback_sse2+0xb1a>
+  DB  15,40,29,80,37,0,0                  ; movaps        0x2550(%rip),%xmm3        # 5730 <_sk_callback_sse2+0xb10>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_gather_565_sse2
@@ -20994,31 +21106,31 @@ _sk_gather_565_sse2 LABEL PROC
   DB  102,15,196,208,3                    ; pinsrw        $0x3,%eax,%xmm2
   DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
   DB  102,15,97,208                       ; punpcklwd     %xmm0,%xmm2
-  DB  102,15,111,5,41,36,0,0              ; movdqa        0x2429(%rip),%xmm0        # 5690 <_sk_callback_sse2+0xb2a>
+  DB  102,15,111,5,217,36,0,0             ; movdqa        0x24d9(%rip),%xmm0        # 5740 <_sk_callback_sse2+0xb20>
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,43,36,0,0                   ; mulps         0x242b(%rip),%xmm0        # 56a0 <_sk_callback_sse2+0xb3a>
-  DB  102,15,111,13,51,36,0,0             ; movdqa        0x2433(%rip),%xmm1        # 56b0 <_sk_callback_sse2+0xb4a>
+  DB  15,89,5,219,36,0,0                  ; mulps         0x24db(%rip),%xmm0        # 5750 <_sk_callback_sse2+0xb30>
+  DB  102,15,111,13,227,36,0,0            ; movdqa        0x24e3(%rip),%xmm1        # 5760 <_sk_callback_sse2+0xb40>
   DB  102,15,219,202                      ; pand          %xmm2,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,53,36,0,0                  ; mulps         0x2435(%rip),%xmm1        # 56c0 <_sk_callback_sse2+0xb5a>
-  DB  102,15,219,21,61,36,0,0             ; pand          0x243d(%rip),%xmm2        # 56d0 <_sk_callback_sse2+0xb6a>
+  DB  15,89,13,229,36,0,0                 ; mulps         0x24e5(%rip),%xmm1        # 5770 <_sk_callback_sse2+0xb50>
+  DB  102,15,219,21,237,36,0,0            ; pand          0x24ed(%rip),%xmm2        # 5780 <_sk_callback_sse2+0xb60>
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,67,36,0,0                  ; mulps         0x2443(%rip),%xmm2        # 56e0 <_sk_callback_sse2+0xb7a>
+  DB  15,89,21,243,36,0,0                 ; mulps         0x24f3(%rip),%xmm2        # 5790 <_sk_callback_sse2+0xb70>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,74,36,0,0                  ; movaps        0x244a(%rip),%xmm3        # 56f0 <_sk_callback_sse2+0xb8a>
+  DB  15,40,29,250,36,0,0                 ; movaps        0x24fa(%rip),%xmm3        # 57a0 <_sk_callback_sse2+0xb80>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_store_565_sse2
 _sk_store_565_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,75,36,0,0                ; movaps        0x244b(%rip),%xmm8        # 5700 <_sk_callback_sse2+0xb9a>
+  DB  68,15,40,5,251,36,0,0               ; movaps        0x24fb(%rip),%xmm8        # 57b0 <_sk_callback_sse2+0xb90>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
   DB  102,65,15,114,241,11                ; pslld         $0xb,%xmm9
-  DB  68,15,40,21,64,36,0,0               ; movaps        0x2440(%rip),%xmm10        # 5710 <_sk_callback_sse2+0xbaa>
+  DB  68,15,40,21,240,36,0,0              ; movaps        0x24f0(%rip),%xmm10        # 57c0 <_sk_callback_sse2+0xba0>
   DB  68,15,89,209                        ; mulps         %xmm1,%xmm10
   DB  102,69,15,91,210                    ; cvtps2dq      %xmm10,%xmm10
   DB  102,65,15,114,242,5                 ; pslld         $0x5,%xmm10
@@ -21040,21 +21152,21 @@ _sk_load_4444_sse2 LABEL PROC
   DB  243,15,126,28,120                   ; movq          (%rax,%rdi,2),%xmm3
   DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
   DB  102,15,97,216                       ; punpcklwd     %xmm0,%xmm3
-  DB  102,15,111,5,249,35,0,0             ; movdqa        0x23f9(%rip),%xmm0        # 5720 <_sk_callback_sse2+0xbba>
+  DB  102,15,111,5,169,36,0,0             ; movdqa        0x24a9(%rip),%xmm0        # 57d0 <_sk_callback_sse2+0xbb0>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,251,35,0,0                  ; mulps         0x23fb(%rip),%xmm0        # 5730 <_sk_callback_sse2+0xbca>
-  DB  102,15,111,13,3,36,0,0              ; movdqa        0x2403(%rip),%xmm1        # 5740 <_sk_callback_sse2+0xbda>
+  DB  15,89,5,171,36,0,0                  ; mulps         0x24ab(%rip),%xmm0        # 57e0 <_sk_callback_sse2+0xbc0>
+  DB  102,15,111,13,179,36,0,0            ; movdqa        0x24b3(%rip),%xmm1        # 57f0 <_sk_callback_sse2+0xbd0>
   DB  102,15,219,203                      ; pand          %xmm3,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,5,36,0,0                   ; mulps         0x2405(%rip),%xmm1        # 5750 <_sk_callback_sse2+0xbea>
-  DB  102,15,111,21,13,36,0,0             ; movdqa        0x240d(%rip),%xmm2        # 5760 <_sk_callback_sse2+0xbfa>
+  DB  15,89,13,181,36,0,0                 ; mulps         0x24b5(%rip),%xmm1        # 5800 <_sk_callback_sse2+0xbe0>
+  DB  102,15,111,21,189,36,0,0            ; movdqa        0x24bd(%rip),%xmm2        # 5810 <_sk_callback_sse2+0xbf0>
   DB  102,15,219,211                      ; pand          %xmm3,%xmm2
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,15,36,0,0                  ; mulps         0x240f(%rip),%xmm2        # 5770 <_sk_callback_sse2+0xc0a>
-  DB  102,15,219,29,23,36,0,0             ; pand          0x2417(%rip),%xmm3        # 5780 <_sk_callback_sse2+0xc1a>
+  DB  15,89,21,191,36,0,0                 ; mulps         0x24bf(%rip),%xmm2        # 5820 <_sk_callback_sse2+0xc00>
+  DB  102,15,219,29,199,36,0,0            ; pand          0x24c7(%rip),%xmm3        # 5830 <_sk_callback_sse2+0xc10>
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,29,36,0,0                  ; mulps         0x241d(%rip),%xmm3        # 5790 <_sk_callback_sse2+0xc2a>
+  DB  15,89,29,205,36,0,0                 ; mulps         0x24cd(%rip),%xmm3        # 5840 <_sk_callback_sse2+0xc20>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -21088,21 +21200,21 @@ _sk_gather_4444_sse2 LABEL PROC
   DB  102,15,196,216,3                    ; pinsrw        $0x3,%eax,%xmm3
   DB  102,15,239,192                      ; pxor          %xmm0,%xmm0
   DB  102,15,97,216                       ; punpcklwd     %xmm0,%xmm3
-  DB  102,15,111,5,164,35,0,0             ; movdqa        0x23a4(%rip),%xmm0        # 57a0 <_sk_callback_sse2+0xc3a>
+  DB  102,15,111,5,84,36,0,0              ; movdqa        0x2454(%rip),%xmm0        # 5850 <_sk_callback_sse2+0xc30>
   DB  102,15,219,195                      ; pand          %xmm3,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  15,89,5,166,35,0,0                  ; mulps         0x23a6(%rip),%xmm0        # 57b0 <_sk_callback_sse2+0xc4a>
-  DB  102,15,111,13,174,35,0,0            ; movdqa        0x23ae(%rip),%xmm1        # 57c0 <_sk_callback_sse2+0xc5a>
+  DB  15,89,5,86,36,0,0                   ; mulps         0x2456(%rip),%xmm0        # 5860 <_sk_callback_sse2+0xc40>
+  DB  102,15,111,13,94,36,0,0             ; movdqa        0x245e(%rip),%xmm1        # 5870 <_sk_callback_sse2+0xc50>
   DB  102,15,219,203                      ; pand          %xmm3,%xmm1
   DB  15,91,201                           ; cvtdq2ps      %xmm1,%xmm1
-  DB  15,89,13,176,35,0,0                 ; mulps         0x23b0(%rip),%xmm1        # 57d0 <_sk_callback_sse2+0xc6a>
-  DB  102,15,111,21,184,35,0,0            ; movdqa        0x23b8(%rip),%xmm2        # 57e0 <_sk_callback_sse2+0xc7a>
+  DB  15,89,13,96,36,0,0                  ; mulps         0x2460(%rip),%xmm1        # 5880 <_sk_callback_sse2+0xc60>
+  DB  102,15,111,21,104,36,0,0            ; movdqa        0x2468(%rip),%xmm2        # 5890 <_sk_callback_sse2+0xc70>
   DB  102,15,219,211                      ; pand          %xmm3,%xmm2
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
-  DB  15,89,21,186,35,0,0                 ; mulps         0x23ba(%rip),%xmm2        # 57f0 <_sk_callback_sse2+0xc8a>
-  DB  102,15,219,29,194,35,0,0            ; pand          0x23c2(%rip),%xmm3        # 5800 <_sk_callback_sse2+0xc9a>
+  DB  15,89,21,106,36,0,0                 ; mulps         0x246a(%rip),%xmm2        # 58a0 <_sk_callback_sse2+0xc80>
+  DB  102,15,219,29,114,36,0,0            ; pand          0x2472(%rip),%xmm3        # 58b0 <_sk_callback_sse2+0xc90>
   DB  15,91,219                           ; cvtdq2ps      %xmm3,%xmm3
-  DB  15,89,29,200,35,0,0                 ; mulps         0x23c8(%rip),%xmm3        # 5810 <_sk_callback_sse2+0xcaa>
+  DB  15,89,29,120,36,0,0                 ; mulps         0x2478(%rip),%xmm3        # 58c0 <_sk_callback_sse2+0xca0>
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
 
@@ -21110,7 +21222,7 @@ PUBLIC _sk_store_4444_sse2
 _sk_store_4444_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,199,35,0,0               ; movaps        0x23c7(%rip),%xmm8        # 5820 <_sk_callback_sse2+0xcba>
+  DB  68,15,40,5,119,36,0,0               ; movaps        0x2477(%rip),%xmm8        # 58d0 <_sk_callback_sse2+0xcb0>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
@@ -21140,11 +21252,11 @@ _sk_load_8888_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
   DB  68,15,16,12,184                     ; movups        (%rax,%rdi,4),%xmm9
-  DB  15,40,21,90,35,0,0                  ; movaps        0x235a(%rip),%xmm2        # 5830 <_sk_callback_sse2+0xcca>
+  DB  15,40,21,10,36,0,0                  ; movaps        0x240a(%rip),%xmm2        # 58e0 <_sk_callback_sse2+0xcc0>
   DB  65,15,40,193                        ; movaps        %xmm9,%xmm0
   DB  15,84,194                           ; andps         %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,88,35,0,0                ; movaps        0x2358(%rip),%xmm8        # 5840 <_sk_callback_sse2+0xcda>
+  DB  68,15,40,5,8,36,0,0                 ; movaps        0x2408(%rip),%xmm8        # 58f0 <_sk_callback_sse2+0xcd0>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  65,15,40,201                        ; movaps        %xmm9,%xmm1
   DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
@@ -21191,11 +21303,11 @@ _sk_gather_8888_sse2 LABEL PROC
   DB  102,67,15,110,12,129                ; movd          (%r9,%r8,4),%xmm1
   DB  102,68,15,98,201                    ; punpckldq     %xmm1,%xmm9
   DB  102,68,15,98,200                    ; punpckldq     %xmm0,%xmm9
-  DB  102,15,111,21,169,34,0,0            ; movdqa        0x22a9(%rip),%xmm2        # 5850 <_sk_callback_sse2+0xcea>
+  DB  102,15,111,21,89,35,0,0             ; movdqa        0x2359(%rip),%xmm2        # 5900 <_sk_callback_sse2+0xce0>
   DB  102,65,15,111,193                   ; movdqa        %xmm9,%xmm0
   DB  102,15,219,194                      ; pand          %xmm2,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,5,165,34,0,0               ; movaps        0x22a5(%rip),%xmm8        # 5860 <_sk_callback_sse2+0xcfa>
+  DB  68,15,40,5,85,35,0,0                ; movaps        0x2355(%rip),%xmm8        # 5910 <_sk_callback_sse2+0xcf0>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,65,15,111,201                   ; movdqa        %xmm9,%xmm1
   DB  102,15,114,209,8                    ; psrld         $0x8,%xmm1
@@ -21217,7 +21329,7 @@ PUBLIC _sk_store_8888_sse2
 _sk_store_8888_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,5,104,34,0,0               ; movaps        0x2268(%rip),%xmm8        # 5870 <_sk_callback_sse2+0xd0a>
+  DB  68,15,40,5,24,35,0,0                ; movaps        0x2318(%rip),%xmm8        # 5920 <_sk_callback_sse2+0xd00>
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  102,69,15,91,201                    ; cvtps2dq      %xmm9,%xmm9
@@ -21254,7 +21366,7 @@ _sk_load_f16_sse2 LABEL PROC
   DB  102,69,15,239,210                   ; pxor          %xmm10,%xmm10
   DB  102,65,15,111,206                   ; movdqa        %xmm14,%xmm1
   DB  102,65,15,97,202                    ; punpcklwd     %xmm10,%xmm1
-  DB  102,68,15,111,13,216,33,0,0         ; movdqa        0x21d8(%rip),%xmm9        # 5880 <_sk_callback_sse2+0xd1a>
+  DB  102,68,15,111,13,136,34,0,0         ; movdqa        0x2288(%rip),%xmm9        # 5930 <_sk_callback_sse2+0xd10>
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,65,15,219,193                   ; pand          %xmm9,%xmm0
   DB  102,15,239,200                      ; pxor          %xmm0,%xmm1
@@ -21262,11 +21374,11 @@ _sk_load_f16_sse2 LABEL PROC
   DB  102,68,15,111,233                   ; movdqa        %xmm1,%xmm13
   DB  102,65,15,114,245,13                ; pslld         $0xd,%xmm13
   DB  102,68,15,235,232                   ; por           %xmm0,%xmm13
-  DB  102,68,15,111,29,189,33,0,0         ; movdqa        0x21bd(%rip),%xmm11        # 5890 <_sk_callback_sse2+0xd2a>
+  DB  102,68,15,111,29,109,34,0,0         ; movdqa        0x226d(%rip),%xmm11        # 5940 <_sk_callback_sse2+0xd20>
   DB  102,69,15,254,235                   ; paddd         %xmm11,%xmm13
-  DB  102,68,15,111,37,191,33,0,0         ; movdqa        0x21bf(%rip),%xmm12        # 58a0 <_sk_callback_sse2+0xd3a>
+  DB  102,68,15,111,37,111,34,0,0         ; movdqa        0x226f(%rip),%xmm12        # 5950 <_sk_callback_sse2+0xd30>
   DB  102,65,15,239,204                   ; pxor          %xmm12,%xmm1
-  DB  102,15,111,29,194,33,0,0            ; movdqa        0x21c2(%rip),%xmm3        # 58b0 <_sk_callback_sse2+0xd4a>
+  DB  102,15,111,29,114,34,0,0            ; movdqa        0x2272(%rip),%xmm3        # 5960 <_sk_callback_sse2+0xd40>
   DB  102,15,111,195                      ; movdqa        %xmm3,%xmm0
   DB  102,15,102,193                      ; pcmpgtd       %xmm1,%xmm0
   DB  102,65,15,223,197                   ; pandn         %xmm13,%xmm0
@@ -21350,7 +21462,7 @@ _sk_gather_f16_sse2 LABEL PROC
   DB  102,69,15,239,210                   ; pxor          %xmm10,%xmm10
   DB  102,65,15,111,206                   ; movdqa        %xmm14,%xmm1
   DB  102,65,15,97,202                    ; punpcklwd     %xmm10,%xmm1
-  DB  102,68,15,111,13,80,32,0,0          ; movdqa        0x2050(%rip),%xmm9        # 58c0 <_sk_callback_sse2+0xd5a>
+  DB  102,68,15,111,13,0,33,0,0           ; movdqa        0x2100(%rip),%xmm9        # 5970 <_sk_callback_sse2+0xd50>
   DB  102,15,111,193                      ; movdqa        %xmm1,%xmm0
   DB  102,65,15,219,193                   ; pand          %xmm9,%xmm0
   DB  102,15,239,200                      ; pxor          %xmm0,%xmm1
@@ -21358,11 +21470,11 @@ _sk_gather_f16_sse2 LABEL PROC
   DB  102,68,15,111,233                   ; movdqa        %xmm1,%xmm13
   DB  102,65,15,114,245,13                ; pslld         $0xd,%xmm13
   DB  102,68,15,235,232                   ; por           %xmm0,%xmm13
-  DB  102,68,15,111,29,53,32,0,0          ; movdqa        0x2035(%rip),%xmm11        # 58d0 <_sk_callback_sse2+0xd6a>
+  DB  102,68,15,111,29,229,32,0,0         ; movdqa        0x20e5(%rip),%xmm11        # 5980 <_sk_callback_sse2+0xd60>
   DB  102,69,15,254,235                   ; paddd         %xmm11,%xmm13
-  DB  102,68,15,111,37,55,32,0,0          ; movdqa        0x2037(%rip),%xmm12        # 58e0 <_sk_callback_sse2+0xd7a>
+  DB  102,68,15,111,37,231,32,0,0         ; movdqa        0x20e7(%rip),%xmm12        # 5990 <_sk_callback_sse2+0xd70>
   DB  102,65,15,239,204                   ; pxor          %xmm12,%xmm1
-  DB  102,15,111,29,58,32,0,0             ; movdqa        0x203a(%rip),%xmm3        # 58f0 <_sk_callback_sse2+0xd8a>
+  DB  102,15,111,29,234,32,0,0            ; movdqa        0x20ea(%rip),%xmm3        # 59a0 <_sk_callback_sse2+0xd80>
   DB  102,15,111,195                      ; movdqa        %xmm3,%xmm0
   DB  102,15,102,193                      ; pcmpgtd       %xmm1,%xmm0
   DB  102,65,15,223,197                   ; pandn         %xmm13,%xmm0
@@ -21413,17 +21525,17 @@ PUBLIC _sk_store_f16_sse2
 _sk_store_f16_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  102,68,15,111,21,98,31,0,0          ; movdqa        0x1f62(%rip),%xmm10        # 5900 <_sk_callback_sse2+0xd9a>
+  DB  102,68,15,111,21,18,32,0,0          ; movdqa        0x2012(%rip),%xmm10        # 59b0 <_sk_callback_sse2+0xd90>
   DB  102,68,15,111,224                   ; movdqa        %xmm0,%xmm12
   DB  102,68,15,111,232                   ; movdqa        %xmm0,%xmm13
   DB  102,69,15,219,234                   ; pand          %xmm10,%xmm13
   DB  102,69,15,239,229                   ; pxor          %xmm13,%xmm12
-  DB  102,68,15,111,13,85,31,0,0          ; movdqa        0x1f55(%rip),%xmm9        # 5910 <_sk_callback_sse2+0xdaa>
+  DB  102,68,15,111,13,5,32,0,0           ; movdqa        0x2005(%rip),%xmm9        # 59c0 <_sk_callback_sse2+0xda0>
   DB  102,65,15,114,213,16                ; psrld         $0x10,%xmm13
   DB  102,69,15,111,193                   ; movdqa        %xmm9,%xmm8
   DB  102,69,15,102,196                   ; pcmpgtd       %xmm12,%xmm8
   DB  102,65,15,114,212,13                ; psrld         $0xd,%xmm12
-  DB  102,68,15,111,29,70,31,0,0          ; movdqa        0x1f46(%rip),%xmm11        # 5920 <_sk_callback_sse2+0xdba>
+  DB  102,68,15,111,29,246,31,0,0         ; movdqa        0x1ff6(%rip),%xmm11        # 59d0 <_sk_callback_sse2+0xdb0>
   DB  102,69,15,235,235                   ; por           %xmm11,%xmm13
   DB  102,69,15,254,236                   ; paddd         %xmm12,%xmm13
   DB  102,65,15,114,245,16                ; pslld         $0x10,%xmm13
@@ -21500,7 +21612,7 @@ _sk_load_u16_be_sse2 LABEL PROC
   DB  102,69,15,239,201                   ; pxor          %xmm9,%xmm9
   DB  102,65,15,97,201                    ; punpcklwd     %xmm9,%xmm1
   DB  15,91,193                           ; cvtdq2ps      %xmm1,%xmm0
-  DB  68,15,40,5,228,29,0,0               ; movaps        0x1de4(%rip),%xmm8        # 5930 <_sk_callback_sse2+0xdca>
+  DB  68,15,40,5,148,30,0,0               ; movaps        0x1e94(%rip),%xmm8        # 59e0 <_sk_callback_sse2+0xdc0>
   DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
   DB  102,15,113,241,8                    ; psllw         $0x8,%xmm1
@@ -21551,7 +21663,7 @@ _sk_load_rgb_u16_be_sse2 LABEL PROC
   DB  102,69,15,239,192                   ; pxor          %xmm8,%xmm8
   DB  102,65,15,97,192                    ; punpcklwd     %xmm8,%xmm0
   DB  15,91,192                           ; cvtdq2ps      %xmm0,%xmm0
-  DB  68,15,40,13,32,29,0,0               ; movaps        0x1d20(%rip),%xmm9        # 5940 <_sk_callback_sse2+0xdda>
+  DB  68,15,40,13,208,29,0,0              ; movaps        0x1dd0(%rip),%xmm9        # 59f0 <_sk_callback_sse2+0xdd0>
   DB  65,15,89,193                        ; mulps         %xmm9,%xmm0
   DB  102,15,111,203                      ; movdqa        %xmm3,%xmm1
   DB  102,15,113,241,8                    ; psllw         $0x8,%xmm1
@@ -21568,14 +21680,14 @@ _sk_load_rgb_u16_be_sse2 LABEL PROC
   DB  15,91,210                           ; cvtdq2ps      %xmm2,%xmm2
   DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  15,40,29,231,28,0,0                 ; movaps        0x1ce7(%rip),%xmm3        # 5950 <_sk_callback_sse2+0xdea>
+  DB  15,40,29,151,29,0,0                 ; movaps        0x1d97(%rip),%xmm3        # 5a00 <_sk_callback_sse2+0xde0>
   DB  255,224                             ; jmpq          *%rax
 
 PUBLIC _sk_store_u16_be_sse2
 _sk_store_u16_be_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  72,139,0                            ; mov           (%rax),%rax
-  DB  68,15,40,13,232,28,0,0              ; movaps        0x1ce8(%rip),%xmm9        # 5960 <_sk_callback_sse2+0xdfa>
+  DB  68,15,40,13,152,29,0,0              ; movaps        0x1d98(%rip),%xmm9        # 5a10 <_sk_callback_sse2+0xdf0>
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  102,69,15,91,192                    ; cvtps2dq      %xmm8,%xmm8
@@ -21711,7 +21823,7 @@ _sk_repeat_x_sse2 LABEL PROC
   DB  243,69,15,91,209                    ; cvttps2dq     %xmm9,%xmm10
   DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
   DB  69,15,194,202,1                     ; cmpltps       %xmm10,%xmm9
-  DB  68,15,84,13,232,26,0,0              ; andps         0x1ae8(%rip),%xmm9        # 5970 <_sk_callback_sse2+0xe0a>
+  DB  68,15,84,13,152,27,0,0              ; andps         0x1b98(%rip),%xmm9        # 5a20 <_sk_callback_sse2+0xe00>
   DB  69,15,92,209                        ; subps         %xmm9,%xmm10
   DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
   DB  65,15,92,194                        ; subps         %xmm10,%xmm0
@@ -21729,7 +21841,7 @@ _sk_repeat_y_sse2 LABEL PROC
   DB  243,69,15,91,209                    ; cvttps2dq     %xmm9,%xmm10
   DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
   DB  69,15,194,202,1                     ; cmpltps       %xmm10,%xmm9
-  DB  68,15,84,13,186,26,0,0              ; andps         0x1aba(%rip),%xmm9        # 5980 <_sk_callback_sse2+0xe1a>
+  DB  68,15,84,13,106,27,0,0              ; andps         0x1b6a(%rip),%xmm9        # 5a30 <_sk_callback_sse2+0xe10>
   DB  69,15,92,209                        ; subps         %xmm9,%xmm10
   DB  69,15,89,208                        ; mulps         %xmm8,%xmm10
   DB  65,15,92,202                        ; subps         %xmm10,%xmm1
@@ -21751,7 +21863,7 @@ _sk_mirror_x_sse2 LABEL PROC
   DB  243,69,15,91,218                    ; cvttps2dq     %xmm10,%xmm11
   DB  69,15,91,219                        ; cvtdq2ps      %xmm11,%xmm11
   DB  69,15,194,211,1                     ; cmpltps       %xmm11,%xmm10
-  DB  68,15,84,21,122,26,0,0              ; andps         0x1a7a(%rip),%xmm10        # 5990 <_sk_callback_sse2+0xe2a>
+  DB  68,15,84,21,42,27,0,0               ; andps         0x1b2a(%rip),%xmm10        # 5a40 <_sk_callback_sse2+0xe20>
   DB  69,15,87,228                        ; xorps         %xmm12,%xmm12
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
   DB  69,15,89,216                        ; mulps         %xmm8,%xmm11
@@ -21777,7 +21889,7 @@ _sk_mirror_y_sse2 LABEL PROC
   DB  243,69,15,91,218                    ; cvttps2dq     %xmm10,%xmm11
   DB  69,15,91,219                        ; cvtdq2ps      %xmm11,%xmm11
   DB  69,15,194,211,1                     ; cmpltps       %xmm11,%xmm10
-  DB  68,15,84,21,42,26,0,0               ; andps         0x1a2a(%rip),%xmm10        # 59a0 <_sk_callback_sse2+0xe3a>
+  DB  68,15,84,21,218,26,0,0              ; andps         0x1ada(%rip),%xmm10        # 5a50 <_sk_callback_sse2+0xe30>
   DB  69,15,87,228                        ; xorps         %xmm12,%xmm12
   DB  69,15,92,218                        ; subps         %xmm10,%xmm11
   DB  69,15,89,216                        ; mulps         %xmm8,%xmm11
@@ -21792,10 +21904,10 @@ _sk_mirror_y_sse2 LABEL PROC
 PUBLIC _sk_luminance_to_alpha_sse2
 _sk_luminance_to_alpha_sse2 LABEL PROC
   DB  15,40,218                           ; movaps        %xmm2,%xmm3
-  DB  15,89,5,12,26,0,0                   ; mulps         0x1a0c(%rip),%xmm0        # 59b0 <_sk_callback_sse2+0xe4a>
-  DB  15,89,13,21,26,0,0                  ; mulps         0x1a15(%rip),%xmm1        # 59c0 <_sk_callback_sse2+0xe5a>
+  DB  15,89,5,188,26,0,0                  ; mulps         0x1abc(%rip),%xmm0        # 5a60 <_sk_callback_sse2+0xe40>
+  DB  15,89,13,197,26,0,0                 ; mulps         0x1ac5(%rip),%xmm1        # 5a70 <_sk_callback_sse2+0xe50>
   DB  15,88,200                           ; addps         %xmm0,%xmm1
-  DB  15,89,29,27,26,0,0                  ; mulps         0x1a1b(%rip),%xmm3        # 59d0 <_sk_callback_sse2+0xe6a>
+  DB  15,89,29,203,26,0,0                 ; mulps         0x1acb(%rip),%xmm3        # 5a80 <_sk_callback_sse2+0xe60>
   DB  15,88,217                           ; addps         %xmm1,%xmm3
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,87,192                           ; xorps         %xmm0,%xmm0
@@ -21964,6 +22076,54 @@ _sk_matrix_4x5_sse2 LABEL PROC
   DB  65,15,40,219                        ; movaps        %xmm11,%xmm3
   DB  255,224                             ; jmpq          *%rax
 
+PUBLIC _sk_matrix_4x3_sse2
+_sk_matrix_4x3_sse2 LABEL PROC
+  DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
+  DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  243,15,16,0                         ; movss         (%rax),%xmm0
+  DB  243,15,16,72,4                      ; movss         0x4(%rax),%xmm1
+  DB  15,198,192,0                        ; shufps        $0x0,%xmm0,%xmm0
+  DB  243,15,16,80,16                     ; movss         0x10(%rax),%xmm2
+  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
+  DB  243,15,16,88,32                     ; movss         0x20(%rax),%xmm3
+  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
+  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
+  DB  15,88,211                           ; addps         %xmm3,%xmm2
+  DB  65,15,89,192                        ; mulps         %xmm8,%xmm0
+  DB  15,88,194                           ; addps         %xmm2,%xmm0
+  DB  15,198,201,0                        ; shufps        $0x0,%xmm1,%xmm1
+  DB  243,15,16,80,20                     ; movss         0x14(%rax),%xmm2
+  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
+  DB  243,15,16,88,36                     ; movss         0x24(%rax),%xmm3
+  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
+  DB  65,15,89,209                        ; mulps         %xmm9,%xmm2
+  DB  15,88,211                           ; addps         %xmm3,%xmm2
+  DB  65,15,89,200                        ; mulps         %xmm8,%xmm1
+  DB  15,88,202                           ; addps         %xmm2,%xmm1
+  DB  243,15,16,80,8                      ; movss         0x8(%rax),%xmm2
+  DB  15,198,210,0                        ; shufps        $0x0,%xmm2,%xmm2
+  DB  243,15,16,88,24                     ; movss         0x18(%rax),%xmm3
+  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
+  DB  243,68,15,16,80,40                  ; movss         0x28(%rax),%xmm10
+  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
+  DB  65,15,89,217                        ; mulps         %xmm9,%xmm3
+  DB  65,15,88,218                        ; addps         %xmm10,%xmm3
+  DB  65,15,89,208                        ; mulps         %xmm8,%xmm2
+  DB  15,88,211                           ; addps         %xmm3,%xmm2
+  DB  243,15,16,88,12                     ; movss         0xc(%rax),%xmm3
+  DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
+  DB  243,68,15,16,80,28                  ; movss         0x1c(%rax),%xmm10
+  DB  69,15,198,210,0                     ; shufps        $0x0,%xmm10,%xmm10
+  DB  243,68,15,16,88,44                  ; movss         0x2c(%rax),%xmm11
+  DB  69,15,198,219,0                     ; shufps        $0x0,%xmm11,%xmm11
+  DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
+  DB  69,15,88,211                        ; addps         %xmm11,%xmm10
+  DB  65,15,89,216                        ; mulps         %xmm8,%xmm3
+  DB  65,15,88,218                        ; addps         %xmm10,%xmm3
+  DB  72,173                              ; lods          %ds:(%rsi),%rax
+  DB  255,224                             ; jmpq          *%rax
+
 PUBLIC _sk_matrix_perspective_sse2
 _sk_matrix_perspective_sse2 LABEL PROC
   DB  68,15,40,192                        ; movaps        %xmm0,%xmm8
@@ -22011,9 +22171,9 @@ _sk_evenly_spaced_gradient_sse2 LABEL PROC
   DB  72,139,8                            ; mov           (%rax),%rcx
   DB  76,139,88,8                         ; mov           0x8(%rax),%r11
   DB  72,255,201                          ; dec           %rcx
-  DB  120,7                               ; js            4334 <_sk_evenly_spaced_gradient_sse2+0x15>
+  DB  120,7                               ; js            43ee <_sk_evenly_spaced_gradient_sse2+0x15>
   DB  243,72,15,42,201                    ; cvtsi2ss      %rcx,%xmm1
-  DB  235,21                              ; jmp           4349 <_sk_evenly_spaced_gradient_sse2+0x2a>
+  DB  235,21                              ; jmp           4403 <_sk_evenly_spaced_gradient_sse2+0x2a>
   DB  73,137,200                          ; mov           %rcx,%r8
   DB  73,209,232                          ; shr           %r8
   DB  131,225,1                           ; and           $0x1,%ecx
@@ -22111,12 +22271,12 @@ _sk_gradient_sse2 LABEL PROC
   DB  76,139,0                            ; mov           (%rax),%r8
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
   DB  73,131,248,2                        ; cmp           $0x2,%r8
-  DB  114,50                              ; jb            450c <_sk_gradient_sse2+0x41>
+  DB  114,50                              ; jb            45c6 <_sk_gradient_sse2+0x41>
   DB  72,139,72,72                        ; mov           0x48(%rax),%rcx
   DB  73,255,200                          ; dec           %r8
   DB  72,131,193,4                        ; add           $0x4,%rcx
   DB  102,15,239,201                      ; pxor          %xmm1,%xmm1
-  DB  15,40,21,240,20,0,0                 ; movaps        0x14f0(%rip),%xmm2        # 59e0 <_sk_callback_sse2+0xe7a>
+  DB  15,40,21,230,20,0,0                 ; movaps        0x14e6(%rip),%xmm2        # 5a90 <_sk_callback_sse2+0xe70>
   DB  243,15,16,25                        ; movss         (%rcx),%xmm3
   DB  15,198,219,0                        ; shufps        $0x0,%xmm3,%xmm3
   DB  15,194,216,2                        ; cmpleps       %xmm0,%xmm3
@@ -22124,7 +22284,7 @@ _sk_gradient_sse2 LABEL PROC
   DB  102,15,254,203                      ; paddd         %xmm3,%xmm1
   DB  72,131,193,4                        ; add           $0x4,%rcx
   DB  73,255,200                          ; dec           %r8
-  DB  117,228                             ; jne           44f0 <_sk_gradient_sse2+0x25>
+  DB  117,228                             ; jne           45aa <_sk_gradient_sse2+0x25>
   DB  65,86                               ; push          %r14
   DB  83                                  ; push          %rbx
   DB  102,15,112,209,78                   ; pshufd        $0x4e,%xmm1,%xmm2
@@ -22260,29 +22420,29 @@ _sk_xy_to_unit_angle_sse2 LABEL PROC
   DB  69,15,94,220                        ; divps         %xmm12,%xmm11
   DB  69,15,40,227                        ; movaps        %xmm11,%xmm12
   DB  69,15,89,228                        ; mulps         %xmm12,%xmm12
-  DB  68,15,40,45,178,18,0,0              ; movaps        0x12b2(%rip),%xmm13        # 59f0 <_sk_callback_sse2+0xe8a>
+  DB  68,15,40,45,168,18,0,0              ; movaps        0x12a8(%rip),%xmm13        # 5aa0 <_sk_callback_sse2+0xe80>
   DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
-  DB  68,15,88,45,182,18,0,0              ; addps         0x12b6(%rip),%xmm13        # 5a00 <_sk_callback_sse2+0xe9a>
+  DB  68,15,88,45,172,18,0,0              ; addps         0x12ac(%rip),%xmm13        # 5ab0 <_sk_callback_sse2+0xe90>
   DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
-  DB  68,15,88,45,186,18,0,0              ; addps         0x12ba(%rip),%xmm13        # 5a10 <_sk_callback_sse2+0xeaa>
+  DB  68,15,88,45,176,18,0,0              ; addps         0x12b0(%rip),%xmm13        # 5ac0 <_sk_callback_sse2+0xea0>
   DB  69,15,89,236                        ; mulps         %xmm12,%xmm13
-  DB  68,15,88,45,190,18,0,0              ; addps         0x12be(%rip),%xmm13        # 5a20 <_sk_callback_sse2+0xeba>
+  DB  68,15,88,45,180,18,0,0              ; addps         0x12b4(%rip),%xmm13        # 5ad0 <_sk_callback_sse2+0xeb0>
   DB  69,15,89,235                        ; mulps         %xmm11,%xmm13
   DB  69,15,194,202,1                     ; cmpltps       %xmm10,%xmm9
-  DB  68,15,40,21,189,18,0,0              ; movaps        0x12bd(%rip),%xmm10        # 5a30 <_sk_callback_sse2+0xeca>
+  DB  68,15,40,21,179,18,0,0              ; movaps        0x12b3(%rip),%xmm10        # 5ae0 <_sk_callback_sse2+0xec0>
   DB  69,15,92,213                        ; subps         %xmm13,%xmm10
   DB  69,15,84,209                        ; andps         %xmm9,%xmm10
   DB  69,15,85,205                        ; andnps        %xmm13,%xmm9
   DB  69,15,86,202                        ; orps          %xmm10,%xmm9
   DB  68,15,194,192,1                     ; cmpltps       %xmm0,%xmm8
-  DB  68,15,40,21,176,18,0,0              ; movaps        0x12b0(%rip),%xmm10        # 5a40 <_sk_callback_sse2+0xeda>
+  DB  68,15,40,21,166,18,0,0              ; movaps        0x12a6(%rip),%xmm10        # 5af0 <_sk_callback_sse2+0xed0>
   DB  69,15,92,209                        ; subps         %xmm9,%xmm10
   DB  69,15,84,208                        ; andps         %xmm8,%xmm10
   DB  69,15,85,193                        ; andnps        %xmm9,%xmm8
   DB  69,15,86,194                        ; orps          %xmm10,%xmm8
   DB  68,15,40,201                        ; movaps        %xmm1,%xmm9
   DB  68,15,194,200,1                     ; cmpltps       %xmm0,%xmm9
-  DB  68,15,40,21,159,18,0,0              ; movaps        0x129f(%rip),%xmm10        # 5a50 <_sk_callback_sse2+0xeea>
+  DB  68,15,40,21,149,18,0,0              ; movaps        0x1295(%rip),%xmm10        # 5b00 <_sk_callback_sse2+0xee0>
   DB  69,15,92,208                        ; subps         %xmm8,%xmm10
   DB  69,15,84,209                        ; andps         %xmm9,%xmm10
   DB  69,15,85,200                        ; andnps        %xmm8,%xmm9
@@ -22305,7 +22465,7 @@ _sk_xy_to_radius_sse2 LABEL PROC
 PUBLIC _sk_save_xy_sse2
 _sk_save_xy_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,113,18,0,0               ; movaps        0x1271(%rip),%xmm8        # 5a60 <_sk_callback_sse2+0xefa>
+  DB  68,15,40,5,103,18,0,0               ; movaps        0x1267(%rip),%xmm8        # 5b10 <_sk_callback_sse2+0xef0>
   DB  15,17,0                             ; movups        %xmm0,(%rax)
   DB  68,15,40,200                        ; movaps        %xmm0,%xmm9
   DB  69,15,88,200                        ; addps         %xmm8,%xmm9
@@ -22313,7 +22473,7 @@ _sk_save_xy_sse2 LABEL PROC
   DB  69,15,91,210                        ; cvtdq2ps      %xmm10,%xmm10
   DB  69,15,40,217                        ; movaps        %xmm9,%xmm11
   DB  69,15,194,218,1                     ; cmpltps       %xmm10,%xmm11
-  DB  68,15,40,37,92,18,0,0               ; movaps        0x125c(%rip),%xmm12        # 5a70 <_sk_callback_sse2+0xf0a>
+  DB  68,15,40,37,82,18,0,0               ; movaps        0x1252(%rip),%xmm12        # 5b20 <_sk_callback_sse2+0xf00>
   DB  69,15,84,220                        ; andps         %xmm12,%xmm11
   DB  69,15,92,211                        ; subps         %xmm11,%xmm10
   DB  69,15,92,202                        ; subps         %xmm10,%xmm9
@@ -22356,8 +22516,8 @@ _sk_bilinear_nx_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,213,17,0,0                  ; addps         0x11d5(%rip),%xmm0        # 5a80 <_sk_callback_sse2+0xf1a>
-  DB  68,15,40,13,221,17,0,0              ; movaps        0x11dd(%rip),%xmm9        # 5a90 <_sk_callback_sse2+0xf2a>
+  DB  15,88,5,203,17,0,0                  ; addps         0x11cb(%rip),%xmm0        # 5b30 <_sk_callback_sse2+0xf10>
+  DB  68,15,40,13,211,17,0,0              ; movaps        0x11d3(%rip),%xmm9        # 5b40 <_sk_callback_sse2+0xf20>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,128,0,0,0              ; movups        %xmm9,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -22368,7 +22528,7 @@ _sk_bilinear_px_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,204,17,0,0                  ; addps         0x11cc(%rip),%xmm0        # 5aa0 <_sk_callback_sse2+0xf3a>
+  DB  15,88,5,194,17,0,0                  ; addps         0x11c2(%rip),%xmm0        # 5b50 <_sk_callback_sse2+0xf30>
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -22378,8 +22538,8 @@ _sk_bilinear_ny_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,190,17,0,0                 ; addps         0x11be(%rip),%xmm1        # 5ab0 <_sk_callback_sse2+0xf4a>
-  DB  68,15,40,13,198,17,0,0              ; movaps        0x11c6(%rip),%xmm9        # 5ac0 <_sk_callback_sse2+0xf5a>
+  DB  15,88,13,180,17,0,0                 ; addps         0x11b4(%rip),%xmm1        # 5b60 <_sk_callback_sse2+0xf40>
+  DB  68,15,40,13,188,17,0,0              ; movaps        0x11bc(%rip),%xmm9        # 5b70 <_sk_callback_sse2+0xf50>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  68,15,17,136,160,0,0,0              ; movups        %xmm9,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -22390,7 +22550,7 @@ _sk_bilinear_py_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,180,17,0,0                 ; addps         0x11b4(%rip),%xmm1        # 5ad0 <_sk_callback_sse2+0xf6a>
+  DB  15,88,13,170,17,0,0                 ; addps         0x11aa(%rip),%xmm1        # 5b80 <_sk_callback_sse2+0xf60>
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -22400,13 +22560,13 @@ _sk_bicubic_n3x_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,167,17,0,0                  ; addps         0x11a7(%rip),%xmm0        # 5ae0 <_sk_callback_sse2+0xf7a>
-  DB  68,15,40,13,175,17,0,0              ; movaps        0x11af(%rip),%xmm9        # 5af0 <_sk_callback_sse2+0xf8a>
+  DB  15,88,5,157,17,0,0                  ; addps         0x119d(%rip),%xmm0        # 5b90 <_sk_callback_sse2+0xf70>
+  DB  68,15,40,13,165,17,0,0              ; movaps        0x11a5(%rip),%xmm9        # 5ba0 <_sk_callback_sse2+0xf80>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,171,17,0,0              ; mulps         0x11ab(%rip),%xmm9        # 5b00 <_sk_callback_sse2+0xf9a>
-  DB  68,15,88,13,179,17,0,0              ; addps         0x11b3(%rip),%xmm9        # 5b10 <_sk_callback_sse2+0xfaa>
+  DB  68,15,89,13,161,17,0,0              ; mulps         0x11a1(%rip),%xmm9        # 5bb0 <_sk_callback_sse2+0xf90>
+  DB  68,15,88,13,169,17,0,0              ; addps         0x11a9(%rip),%xmm9        # 5bc0 <_sk_callback_sse2+0xfa0>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,128,0,0,0              ; movups        %xmm9,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -22417,16 +22577,16 @@ _sk_bicubic_n1x_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,162,17,0,0                  ; addps         0x11a2(%rip),%xmm0        # 5b20 <_sk_callback_sse2+0xfba>
-  DB  68,15,40,13,170,17,0,0              ; movaps        0x11aa(%rip),%xmm9        # 5b30 <_sk_callback_sse2+0xfca>
+  DB  15,88,5,152,17,0,0                  ; addps         0x1198(%rip),%xmm0        # 5bd0 <_sk_callback_sse2+0xfb0>
+  DB  68,15,40,13,160,17,0,0              ; movaps        0x11a0(%rip),%xmm9        # 5be0 <_sk_callback_sse2+0xfc0>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,174,17,0,0               ; movaps        0x11ae(%rip),%xmm8        # 5b40 <_sk_callback_sse2+0xfda>
+  DB  68,15,40,5,164,17,0,0               ; movaps        0x11a4(%rip),%xmm8        # 5bf0 <_sk_callback_sse2+0xfd0>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,178,17,0,0               ; addps         0x11b2(%rip),%xmm8        # 5b50 <_sk_callback_sse2+0xfea>
+  DB  68,15,88,5,168,17,0,0               ; addps         0x11a8(%rip),%xmm8        # 5c00 <_sk_callback_sse2+0xfe0>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,182,17,0,0               ; addps         0x11b6(%rip),%xmm8        # 5b60 <_sk_callback_sse2+0xffa>
+  DB  68,15,88,5,172,17,0,0               ; addps         0x11ac(%rip),%xmm8        # 5c10 <_sk_callback_sse2+0xff0>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,186,17,0,0               ; addps         0x11ba(%rip),%xmm8        # 5b70 <_sk_callback_sse2+0x100a>
+  DB  68,15,88,5,176,17,0,0               ; addps         0x11b0(%rip),%xmm8        # 5c20 <_sk_callback_sse2+0x1000>
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -22434,17 +22594,17 @@ _sk_bicubic_n1x_sse2 LABEL PROC
 PUBLIC _sk_bicubic_p1x_sse2
 _sk_bicubic_p1x_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,180,17,0,0               ; movaps        0x11b4(%rip),%xmm8        # 5b80 <_sk_callback_sse2+0x101a>
+  DB  68,15,40,5,170,17,0,0               ; movaps        0x11aa(%rip),%xmm8        # 5c30 <_sk_callback_sse2+0x1010>
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,72,64                      ; movups        0x40(%rax),%xmm9
   DB  65,15,88,192                        ; addps         %xmm8,%xmm0
-  DB  68,15,40,21,176,17,0,0              ; movaps        0x11b0(%rip),%xmm10        # 5b90 <_sk_callback_sse2+0x102a>
+  DB  68,15,40,21,166,17,0,0              ; movaps        0x11a6(%rip),%xmm10        # 5c40 <_sk_callback_sse2+0x1020>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,180,17,0,0              ; addps         0x11b4(%rip),%xmm10        # 5ba0 <_sk_callback_sse2+0x103a>
+  DB  68,15,88,21,170,17,0,0              ; addps         0x11aa(%rip),%xmm10        # 5c50 <_sk_callback_sse2+0x1030>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,176,17,0,0              ; addps         0x11b0(%rip),%xmm10        # 5bb0 <_sk_callback_sse2+0x104a>
+  DB  68,15,88,21,166,17,0,0              ; addps         0x11a6(%rip),%xmm10        # 5c60 <_sk_callback_sse2+0x1040>
   DB  68,15,17,144,128,0,0,0              ; movups        %xmm10,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -22454,11 +22614,11 @@ _sk_bicubic_p3x_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,0                             ; movups        (%rax),%xmm0
   DB  68,15,16,64,64                      ; movups        0x40(%rax),%xmm8
-  DB  15,88,5,163,17,0,0                  ; addps         0x11a3(%rip),%xmm0        # 5bc0 <_sk_callback_sse2+0x105a>
+  DB  15,88,5,153,17,0,0                  ; addps         0x1199(%rip),%xmm0        # 5c70 <_sk_callback_sse2+0x1050>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,163,17,0,0               ; mulps         0x11a3(%rip),%xmm8        # 5bd0 <_sk_callback_sse2+0x106a>
-  DB  68,15,88,5,171,17,0,0               ; addps         0x11ab(%rip),%xmm8        # 5be0 <_sk_callback_sse2+0x107a>
+  DB  68,15,89,5,153,17,0,0               ; mulps         0x1199(%rip),%xmm8        # 5c80 <_sk_callback_sse2+0x1060>
+  DB  68,15,88,5,161,17,0,0               ; addps         0x11a1(%rip),%xmm8        # 5c90 <_sk_callback_sse2+0x1070>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,128,0,0,0              ; movups        %xmm8,0x80(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -22469,13 +22629,13 @@ _sk_bicubic_n3y_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,153,17,0,0                 ; addps         0x1199(%rip),%xmm1        # 5bf0 <_sk_callback_sse2+0x108a>
-  DB  68,15,40,13,161,17,0,0              ; movaps        0x11a1(%rip),%xmm9        # 5c00 <_sk_callback_sse2+0x109a>
+  DB  15,88,13,143,17,0,0                 ; addps         0x118f(%rip),%xmm1        # 5ca0 <_sk_callback_sse2+0x1080>
+  DB  68,15,40,13,151,17,0,0              ; movaps        0x1197(%rip),%xmm9        # 5cb0 <_sk_callback_sse2+0x1090>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
   DB  69,15,40,193                        ; movaps        %xmm9,%xmm8
   DB  69,15,89,192                        ; mulps         %xmm8,%xmm8
-  DB  68,15,89,13,157,17,0,0              ; mulps         0x119d(%rip),%xmm9        # 5c10 <_sk_callback_sse2+0x10aa>
-  DB  68,15,88,13,165,17,0,0              ; addps         0x11a5(%rip),%xmm9        # 5c20 <_sk_callback_sse2+0x10ba>
+  DB  68,15,89,13,147,17,0,0              ; mulps         0x1193(%rip),%xmm9        # 5cc0 <_sk_callback_sse2+0x10a0>
+  DB  68,15,88,13,155,17,0,0              ; addps         0x119b(%rip),%xmm9        # 5cd0 <_sk_callback_sse2+0x10b0>
   DB  69,15,89,200                        ; mulps         %xmm8,%xmm9
   DB  68,15,17,136,160,0,0,0              ; movups        %xmm9,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -22486,16 +22646,16 @@ _sk_bicubic_n1y_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,147,17,0,0                 ; addps         0x1193(%rip),%xmm1        # 5c30 <_sk_callback_sse2+0x10ca>
-  DB  68,15,40,13,155,17,0,0              ; movaps        0x119b(%rip),%xmm9        # 5c40 <_sk_callback_sse2+0x10da>
+  DB  15,88,13,137,17,0,0                 ; addps         0x1189(%rip),%xmm1        # 5ce0 <_sk_callback_sse2+0x10c0>
+  DB  68,15,40,13,145,17,0,0              ; movaps        0x1191(%rip),%xmm9        # 5cf0 <_sk_callback_sse2+0x10d0>
   DB  69,15,92,200                        ; subps         %xmm8,%xmm9
-  DB  68,15,40,5,159,17,0,0               ; movaps        0x119f(%rip),%xmm8        # 5c50 <_sk_callback_sse2+0x10ea>
+  DB  68,15,40,5,149,17,0,0               ; movaps        0x1195(%rip),%xmm8        # 5d00 <_sk_callback_sse2+0x10e0>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,163,17,0,0               ; addps         0x11a3(%rip),%xmm8        # 5c60 <_sk_callback_sse2+0x10fa>
+  DB  68,15,88,5,153,17,0,0               ; addps         0x1199(%rip),%xmm8        # 5d10 <_sk_callback_sse2+0x10f0>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,167,17,0,0               ; addps         0x11a7(%rip),%xmm8        # 5c70 <_sk_callback_sse2+0x110a>
+  DB  68,15,88,5,157,17,0,0               ; addps         0x119d(%rip),%xmm8        # 5d20 <_sk_callback_sse2+0x1100>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
-  DB  68,15,88,5,171,17,0,0               ; addps         0x11ab(%rip),%xmm8        # 5c80 <_sk_callback_sse2+0x111a>
+  DB  68,15,88,5,161,17,0,0               ; addps         0x11a1(%rip),%xmm8        # 5d30 <_sk_callback_sse2+0x1110>
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -22503,17 +22663,17 @@ _sk_bicubic_n1y_sse2 LABEL PROC
 PUBLIC _sk_bicubic_p1y_sse2
 _sk_bicubic_p1y_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
-  DB  68,15,40,5,165,17,0,0               ; movaps        0x11a5(%rip),%xmm8        # 5c90 <_sk_callback_sse2+0x112a>
+  DB  68,15,40,5,155,17,0,0               ; movaps        0x119b(%rip),%xmm8        # 5d40 <_sk_callback_sse2+0x1120>
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,72,96                      ; movups        0x60(%rax),%xmm9
   DB  65,15,88,200                        ; addps         %xmm8,%xmm1
-  DB  68,15,40,21,160,17,0,0              ; movaps        0x11a0(%rip),%xmm10        # 5ca0 <_sk_callback_sse2+0x113a>
+  DB  68,15,40,21,150,17,0,0              ; movaps        0x1196(%rip),%xmm10        # 5d50 <_sk_callback_sse2+0x1130>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,164,17,0,0              ; addps         0x11a4(%rip),%xmm10        # 5cb0 <_sk_callback_sse2+0x114a>
+  DB  68,15,88,21,154,17,0,0              ; addps         0x119a(%rip),%xmm10        # 5d60 <_sk_callback_sse2+0x1140>
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
   DB  69,15,88,208                        ; addps         %xmm8,%xmm10
   DB  69,15,89,209                        ; mulps         %xmm9,%xmm10
-  DB  68,15,88,21,160,17,0,0              ; addps         0x11a0(%rip),%xmm10        # 5cc0 <_sk_callback_sse2+0x115a>
+  DB  68,15,88,21,150,17,0,0              ; addps         0x1196(%rip),%xmm10        # 5d70 <_sk_callback_sse2+0x1150>
   DB  68,15,17,144,160,0,0,0              ; movups        %xmm10,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  255,224                             ; jmpq          *%rax
@@ -22523,11 +22683,11 @@ _sk_bicubic_p3y_sse2 LABEL PROC
   DB  72,173                              ; lods          %ds:(%rsi),%rax
   DB  15,16,72,32                         ; movups        0x20(%rax),%xmm1
   DB  68,15,16,64,96                      ; movups        0x60(%rax),%xmm8
-  DB  15,88,13,146,17,0,0                 ; addps         0x1192(%rip),%xmm1        # 5cd0 <_sk_callback_sse2+0x116a>
+  DB  15,88,13,136,17,0,0                 ; addps         0x1188(%rip),%xmm1        # 5d80 <_sk_callback_sse2+0x1160>
   DB  69,15,40,200                        ; movaps        %xmm8,%xmm9
   DB  69,15,89,201                        ; mulps         %xmm9,%xmm9
-  DB  68,15,89,5,146,17,0,0               ; mulps         0x1192(%rip),%xmm8        # 5ce0 <_sk_callback_sse2+0x117a>
-  DB  68,15,88,5,154,17,0,0               ; addps         0x119a(%rip),%xmm8        # 5cf0 <_sk_callback_sse2+0x118a>
+  DB  68,15,89,5,136,17,0,0               ; mulps         0x1188(%rip),%xmm8        # 5d90 <_sk_callback_sse2+0x1170>
+  DB  68,15,88,5,144,17,0,0               ; addps         0x1190(%rip),%xmm8        # 5da0 <_sk_callback_sse2+0x1180>
   DB  69,15,89,193                        ; mulps         %xmm9,%xmm8
   DB  68,15,17,128,160,0,0,0              ; movups        %xmm8,0xa0(%rax)
   DB  72,173                              ; lods          %ds:(%rsi),%rax
@@ -22732,11 +22892,11 @@ ALIGN 16
   DB  128,191,0,0,128,191,0               ; cmpb          $0x0,-0x40800000(%rdi)
   DB  0,224                               ; add           %ah,%al
   DB  64,0,0                              ; add           %al,(%rax)
-  DB  224,64                              ; loopne        4df8 <.literal16+0x1d8>
+  DB  224,64                              ; loopne        4ea8 <.literal16+0x1d8>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,64                              ; loopne        4dfc <.literal16+0x1dc>
+  DB  224,64                              ; loopne        4eac <.literal16+0x1dc>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,64                              ; loopne        4e00 <.literal16+0x1e0>
+  DB  224,64                              ; loopne        4eb0 <.literal16+0x1e0>
   DB  154                                 ; (bad)
   DB  153                                 ; cltd
   DB  153                                 ; cltd
@@ -22756,13 +22916,13 @@ ALIGN 16
   DB  10,23                               ; or            (%rdi),%dl
   DB  63                                  ; (bad)
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4e21 <.literal16+0x201>
+  DB  71,225,61                           ; rex.RXB       loope 4ed1 <.literal16+0x201>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4e25 <.literal16+0x205>
+  DB  71,225,61                           ; rex.RXB       loope 4ed5 <.literal16+0x205>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4e29 <.literal16+0x209>
+  DB  71,225,61                           ; rex.RXB       loope 4ed9 <.literal16+0x209>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4e2d <.literal16+0x20d>
+  DB  71,225,61                           ; rex.RXB       loope 4edd <.literal16+0x20d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -22787,13 +22947,13 @@ ALIGN 16
   DB  10,23                               ; or            (%rdi),%dl
   DB  63                                  ; (bad)
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4e61 <.literal16+0x241>
+  DB  71,225,61                           ; rex.RXB       loope 4f11 <.literal16+0x241>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4e65 <.literal16+0x245>
+  DB  71,225,61                           ; rex.RXB       loope 4f15 <.literal16+0x245>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4e69 <.literal16+0x249>
+  DB  71,225,61                           ; rex.RXB       loope 4f19 <.literal16+0x249>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4e6d <.literal16+0x24d>
+  DB  71,225,61                           ; rex.RXB       loope 4f1d <.literal16+0x24d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -22818,13 +22978,13 @@ ALIGN 16
   DB  10,23                               ; or            (%rdi),%dl
   DB  63                                  ; (bad)
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4ea1 <.literal16+0x281>
+  DB  71,225,61                           ; rex.RXB       loope 4f51 <.literal16+0x281>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4ea5 <.literal16+0x285>
+  DB  71,225,61                           ; rex.RXB       loope 4f55 <.literal16+0x285>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4ea9 <.literal16+0x289>
+  DB  71,225,61                           ; rex.RXB       loope 4f59 <.literal16+0x289>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4ead <.literal16+0x28d>
+  DB  71,225,61                           ; rex.RXB       loope 4f5d <.literal16+0x28d>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -22849,13 +23009,13 @@ ALIGN 16
   DB  10,23                               ; or            (%rdi),%dl
   DB  63                                  ; (bad)
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4ee1 <.literal16+0x2c1>
+  DB  71,225,61                           ; rex.RXB       loope 4f91 <.literal16+0x2c1>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4ee5 <.literal16+0x2c5>
+  DB  71,225,61                           ; rex.RXB       loope 4f95 <.literal16+0x2c5>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4ee9 <.literal16+0x2c9>
+  DB  71,225,61                           ; rex.RXB       loope 4f99 <.literal16+0x2c9>
   DB  174                                 ; scas          %es:(%rdi),%al
-  DB  71,225,61                           ; rex.RXB       loope 4eed <.literal16+0x2cd>
+  DB  71,225,61                           ; rex.RXB       loope 4f9d <.literal16+0x2cd>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -23076,13 +23236,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        50c9 <.literal16+0x4a9>
+  DB  224,7                               ; loopne        5179 <.literal16+0x4a9>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        50cd <.literal16+0x4ad>
+  DB  224,7                               ; loopne        517d <.literal16+0x4ad>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        50d1 <.literal16+0x4b1>
+  DB  224,7                               ; loopne        5181 <.literal16+0x4b1>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        50d5 <.literal16+0x4b5>
+  DB  224,7                               ; loopne        5185 <.literal16+0x4b5>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -23147,11 +23307,11 @@ ALIGN 16
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,127,67                            ; add           %bh,0x43(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            51ab <.literal16+0x58b>
+  DB  127,67                              ; jg            525b <.literal16+0x58b>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            51af <.literal16+0x58f>
+  DB  127,67                              ; jg            525f <.literal16+0x58f>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            51b3 <.literal16+0x593>
+  DB  127,67                              ; jg            5263 <.literal16+0x593>
   DB  129,128,128,59,129,128,128,59,129,128; addl          $0x80813b80,-0x7f7ec480(%rax)
   DB  128,59,129                          ; cmpb          $0x81,(%rbx)
   DB  128,128,59,129,128,128,59           ; addb          $0x3b,-0x7f7f7ec5(%rax)
@@ -23166,16 +23326,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            51a4 <.literal16+0x584>
+  DB  127,0                               ; jg            5254 <.literal16+0x584>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            51a8 <.literal16+0x588>
+  DB  127,0                               ; jg            5258 <.literal16+0x588>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            51ac <.literal16+0x58c>
+  DB  127,0                               ; jg            525c <.literal16+0x58c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            51b0 <.literal16+0x590>
+  DB  127,0                               ; jg            5260 <.literal16+0x590>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -23184,7 +23344,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            5235 <.literal16+0x615>
+  DB  119,115                             ; ja            52e5 <.literal16+0x615>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -23195,7 +23355,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           5199 <.literal16+0x579>
+  DB  117,191                             ; jne           5249 <.literal16+0x579>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -23207,7 +23367,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a391da <_sk_callback_sse2+0xffffffffe9a34674>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3928a <_sk_callback_sse2+0xffffffffe9a3466a>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
@@ -23261,16 +23421,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            5274 <.literal16+0x654>
+  DB  127,0                               ; jg            5324 <.literal16+0x654>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            5278 <.literal16+0x658>
+  DB  127,0                               ; jg            5328 <.literal16+0x658>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            527c <.literal16+0x65c>
+  DB  127,0                               ; jg            532c <.literal16+0x65c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            5280 <.literal16+0x660>
+  DB  127,0                               ; jg            5330 <.literal16+0x660>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -23279,7 +23439,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            5305 <.literal16+0x6e5>
+  DB  119,115                             ; ja            53b5 <.literal16+0x6e5>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -23290,7 +23450,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           5269 <.literal16+0x649>
+  DB  117,191                             ; jne           5319 <.literal16+0x649>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -23302,7 +23462,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a392aa <_sk_callback_sse2+0xffffffffe9a34744>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3935a <_sk_callback_sse2+0xffffffffe9a3473a>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
@@ -23356,16 +23516,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            5344 <.literal16+0x724>
+  DB  127,0                               ; jg            53f4 <.literal16+0x724>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            5348 <.literal16+0x728>
+  DB  127,0                               ; jg            53f8 <.literal16+0x728>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            534c <.literal16+0x72c>
+  DB  127,0                               ; jg            53fc <.literal16+0x72c>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            5350 <.literal16+0x730>
+  DB  127,0                               ; jg            5400 <.literal16+0x730>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -23374,7 +23534,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            53d5 <.literal16+0x7b5>
+  DB  119,115                             ; ja            5485 <.literal16+0x7b5>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -23385,7 +23545,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           5339 <.literal16+0x719>
+  DB  117,191                             ; jne           53e9 <.literal16+0x719>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -23397,7 +23557,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3937a <_sk_callback_sse2+0xffffffffe9a34814>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3942a <_sk_callback_sse2+0xffffffffe9a3480a>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
@@ -23451,16 +23611,16 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  52,255                              ; xor           $0xff,%al
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            5414 <.literal16+0x7f4>
+  DB  127,0                               ; jg            54c4 <.literal16+0x7f4>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            5418 <.literal16+0x7f8>
+  DB  127,0                               ; jg            54c8 <.literal16+0x7f8>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            541c <.literal16+0x7fc>
+  DB  127,0                               ; jg            54cc <.literal16+0x7fc>
   DB  255                                 ; (bad)
   DB  255                                 ; (bad)
-  DB  127,0                               ; jg            5420 <.literal16+0x800>
+  DB  127,0                               ; jg            54d0 <.literal16+0x800>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -23469,7 +23629,7 @@ ALIGN 16
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
-  DB  119,115                             ; ja            54a5 <.literal16+0x885>
+  DB  119,115                             ; ja            5555 <.literal16+0x885>
   DB  248                                 ; clc
   DB  194,119,115                         ; retq          $0x7377
   DB  248                                 ; clc
@@ -23480,7 +23640,7 @@ ALIGN 16
   DB  194,117,191                         ; retq          $0xbf75
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
-  DB  117,191                             ; jne           5409 <.literal16+0x7e9>
+  DB  117,191                             ; jne           54b9 <.literal16+0x7e9>
   DB  191,63,117,191,191                  ; mov           $0xbfbf753f,%edi
   DB  63                                  ; (bad)
   DB  249                                 ; stc
@@ -23492,7 +23652,7 @@ ALIGN 16
   DB  249                                 ; stc
   DB  68,180,62                           ; rex.R         mov $0x3e,%spl
   DB  163,233,220,63,163,233,220,63,163   ; movabs        %eax,0xa33fdce9a33fdce9
-  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a3944a <_sk_callback_sse2+0xffffffffe9a348e4>
+  DB  233,220,63,163,233                  ; jmpq          ffffffffe9a394fa <_sk_callback_sse2+0xffffffffe9a348da>
   DB  220,63                              ; fdivrl        (%rdi)
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
@@ -23542,13 +23702,13 @@ ALIGN 16
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
   DB  200,66,0,0                          ; enterq        $0x42,$0x0
-  DB  127,67                              ; jg            5527 <.literal16+0x907>
+  DB  127,67                              ; jg            55d7 <.literal16+0x907>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            552b <.literal16+0x90b>
+  DB  127,67                              ; jg            55db <.literal16+0x90b>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            552f <.literal16+0x90f>
+  DB  127,67                              ; jg            55df <.literal16+0x90f>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            5533 <.literal16+0x913>
+  DB  127,67                              ; jg            55e3 <.literal16+0x913>
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,195                               ; add           %al,%bl
   DB  0,0                                 ; add           %al,(%rax)
@@ -23595,16 +23755,16 @@ ALIGN 16
   DB  128,3,62                            ; addb          $0x3e,(%rbx)
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           55b3 <.literal16+0x993>
+  DB  118,63                              ; jbe           5663 <.literal16+0x993>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           55b7 <.literal16+0x997>
+  DB  118,63                              ; jbe           5667 <.literal16+0x997>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           55bb <.literal16+0x99b>
+  DB  118,63                              ; jbe           566b <.literal16+0x99b>
   DB  31                                  ; (bad)
   DB  215                                 ; xlat          %ds:(%rbx)
-  DB  118,63                              ; jbe           55bf <.literal16+0x99f>
+  DB  118,63                              ; jbe           566f <.literal16+0x99f>
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
   DB  246,64,83,63                        ; testb         $0x3f,0x53(%rax)
@@ -23616,11 +23776,11 @@ ALIGN 16
   DB  128,59,0                            ; cmpb          $0x0,(%rbx)
   DB  0,127,67                            ; add           %bh,0x43(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            55fb <.literal16+0x9db>
+  DB  127,67                              ; jg            56ab <.literal16+0x9db>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            55ff <.literal16+0x9df>
+  DB  127,67                              ; jg            56af <.literal16+0x9df>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            5603 <.literal16+0x9e3>
+  DB  127,67                              ; jg            56b3 <.literal16+0x9e3>
   DB  129,128,128,59,129,128,128,59,129,128; addl          $0x80813b80,-0x7f7ec480(%rax)
   DB  128,59,129                          ; cmpb          $0x81,(%rbx)
   DB  128,128,59,0,0,128,63               ; addb          $0x3f,-0x7fffffc5(%rax)
@@ -23660,13 +23820,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        5649 <.literal16+0xa29>
+  DB  224,7                               ; loopne        56f9 <.literal16+0xa29>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        564d <.literal16+0xa2d>
+  DB  224,7                               ; loopne        56fd <.literal16+0xa2d>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        5651 <.literal16+0xa31>
+  DB  224,7                               ; loopne        5701 <.literal16+0xa31>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        5655 <.literal16+0xa35>
+  DB  224,7                               ; loopne        5705 <.literal16+0xa35>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -23712,13 +23872,13 @@ ALIGN 16
   DB  132,55                              ; test          %dh,(%rdi)
   DB  8,33                                ; or            %ah,(%rcx)
   DB  132,55                              ; test          %dh,(%rdi)
-  DB  224,7                               ; loopne        56b9 <.literal16+0xa99>
+  DB  224,7                               ; loopne        5769 <.literal16+0xa99>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        56bd <.literal16+0xa9d>
+  DB  224,7                               ; loopne        576d <.literal16+0xa9d>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        56c1 <.literal16+0xaa1>
+  DB  224,7                               ; loopne        5771 <.literal16+0xaa1>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  224,7                               ; loopne        56c5 <.literal16+0xaa5>
+  DB  224,7                               ; loopne        5775 <.literal16+0xaa5>
   DB  0,0                                 ; add           %al,(%rax)
   DB  33,8                                ; and           %ecx,(%rax)
   DB  2,58                                ; add           (%rdx),%bh
@@ -23756,13 +23916,13 @@ ALIGN 16
   DB  65,0,0                              ; add           %al,(%r8)
   DB  248                                 ; clc
   DB  65,0,0                              ; add           %al,(%r8)
-  DB  124,66                              ; jl            5756 <.literal16+0xb36>
+  DB  124,66                              ; jl            5806 <.literal16+0xb36>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            575a <.literal16+0xb3a>
+  DB  124,66                              ; jl            580a <.literal16+0xb3a>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            575e <.literal16+0xb3e>
+  DB  124,66                              ; jl            580e <.literal16+0xb3e>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  124,66                              ; jl            5762 <.literal16+0xb42>
+  DB  124,66                              ; jl            5812 <.literal16+0xb42>
   DB  0,240                               ; add           %dh,%al
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,240                               ; add           %dh,%al
@@ -23852,13 +24012,13 @@ ALIGN 16
   DB  136,136,61,137,136,136              ; mov           %cl,-0x777776c3(%rax)
   DB  61,137,136,136,61                   ; cmp           $0x3d888889,%eax
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            5865 <.literal16+0xc45>
+  DB  112,65                              ; jo            5915 <.literal16+0xc45>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            5869 <.literal16+0xc49>
+  DB  112,65                              ; jo            5919 <.literal16+0xc49>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            586d <.literal16+0xc4d>
+  DB  112,65                              ; jo            591d <.literal16+0xc4d>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  112,65                              ; jo            5871 <.literal16+0xc51>
+  DB  112,65                              ; jo            5921 <.literal16+0xc51>
   DB  255,0                               ; incl          (%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  255,0                               ; incl          (%rax)
@@ -23880,11 +24040,11 @@ ALIGN 16
   DB  128,59,129                          ; cmpb          $0x81,(%rbx)
   DB  128,128,59,0,0,127,67               ; addb          $0x43,0x7f00003b(%rax)
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            58bb <.literal16+0xc9b>
+  DB  127,67                              ; jg            596b <.literal16+0xc9b>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            58bf <.literal16+0xc9f>
+  DB  127,67                              ; jg            596f <.literal16+0xc9f>
   DB  0,0                                 ; add           %al,(%rax)
-  DB  127,67                              ; jg            58c3 <.literal16+0xca3>
+  DB  127,67                              ; jg            5973 <.literal16+0xca3>
   DB  0,128,0,0,0,128                     ; add           %al,-0x80000000(%rax)
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,128,0,0,0,128                     ; add           %al,-0x80000000(%rax)
@@ -23960,13 +24120,13 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  255                                 ; (bad)
-  DB  127,71                              ; jg            59ab <.literal16+0xd8b>
+  DB  127,71                              ; jg            5a5b <.literal16+0xd8b>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            59af <.literal16+0xd8f>
+  DB  127,71                              ; jg            5a5f <.literal16+0xd8f>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            59b3 <.literal16+0xd93>
+  DB  127,71                              ; jg            5a63 <.literal16+0xd93>
   DB  0,255                               ; add           %bh,%bh
-  DB  127,71                              ; jg            59b7 <.literal16+0xd97>
+  DB  127,71                              ; jg            5a67 <.literal16+0xd97>
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,0                            ; cmpb          $0x0,(%rdi)
   DB  0,128,63,0,0,128                    ; add           %al,-0x7fffffc1(%rax)
@@ -24127,11 +24287,11 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,114                          ; cmpb          $0x72,(%rdi)
   DB  28,199                              ; sbb           $0xc7,%al
-  DB  62,114,28                           ; jb,pt         5b22 <.literal16+0xf02>
+  DB  62,114,28                           ; jb,pt         5bd2 <.literal16+0xf02>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         5b26 <.literal16+0xf06>
+  DB  62,114,28                           ; jb,pt         5bd6 <.literal16+0xf06>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         5b2a <.literal16+0xf0a>
+  DB  62,114,28                           ; jb,pt         5bda <.literal16+0xf0a>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -24175,7 +24335,7 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63e9b5 <_sk_callback_sse2+0x3d639e4f>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63ea65 <_sk_callback_sse2+0x3d639e45>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -24201,7 +24361,7 @@ ALIGN 16
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63e9f5 <_sk_callback_sse2+0x3d639e8f>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63eaa5 <_sk_callback_sse2+0x3d639e85>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
@@ -24210,13 +24370,13 @@ ALIGN 16
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
-  DB  114,28                              ; jb            5bee <.literal16+0xfce>
+  DB  114,28                              ; jb            5c9e <.literal16+0xfce>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         5bf2 <.literal16+0xfd2>
+  DB  62,114,28                           ; jb,pt         5ca2 <.literal16+0xfd2>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         5bf6 <.literal16+0xfd6>
+  DB  62,114,28                           ; jb,pt         5ca6 <.literal16+0xfd6>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         5bfa <.literal16+0xfda>
+  DB  62,114,28                           ; jb,pt         5caa <.literal16+0xfda>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -24237,11 +24397,11 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  128,63,114                          ; cmpb          $0x72,(%rdi)
   DB  28,199                              ; sbb           $0xc7,%al
-  DB  62,114,28                           ; jb,pt         5c32 <.literal16+0x1012>
+  DB  62,114,28                           ; jb,pt         5ce2 <.literal16+0x1012>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         5c36 <.literal16+0x1016>
+  DB  62,114,28                           ; jb,pt         5ce6 <.literal16+0x1016>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         5c3a <.literal16+0x101a>
+  DB  62,114,28                           ; jb,pt         5cea <.literal16+0x101a>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
@@ -24285,7 +24445,7 @@ ALIGN 16
   DB  0,0                                 ; add           %al,(%rax)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63eac5 <_sk_callback_sse2+0x3d639f5f>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63eb75 <_sk_callback_sse2+0x3d639f55>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  0,63                                ; add           %bh,(%rdi)
   DB  0,0                                 ; add           %al,(%rax)
@@ -24311,7 +24471,7 @@ ALIGN 16
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
   DB  57,142,99,61,57,142                 ; cmp           %ecx,-0x71c6c29d(%rsi)
-  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63eb05 <_sk_callback_sse2+0x3d639f9f>
+  DB  99,61,57,142,99,61                  ; movslq        0x3d638e39(%rip),%edi        # 3d63ebb5 <_sk_callback_sse2+0x3d639f95>
   DB  57,142,99,61,0,0                    ; cmp           %ecx,0x3d63(%rsi)
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
@@ -24320,13 +24480,13 @@ ALIGN 16
   DB  192,63,0                            ; sarb          $0x0,(%rdi)
   DB  0,192                               ; add           %al,%al
   DB  63                                  ; (bad)
-  DB  114,28                              ; jb            5cfe <.literal16+0x10de>
+  DB  114,28                              ; jb            5dae <.literal16+0x10de>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         5d02 <_sk_callback_sse2+0x119c>
+  DB  62,114,28                           ; jb,pt         5db2 <_sk_callback_sse2+0x1192>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         5d06 <_sk_callback_sse2+0x11a0>
+  DB  62,114,28                           ; jb,pt         5db6 <_sk_callback_sse2+0x1196>
   DB  199                                 ; (bad)
-  DB  62,114,28                           ; jb,pt         5d0a <_sk_callback_sse2+0x11a4>
+  DB  62,114,28                           ; jb,pt         5dba <_sk_callback_sse2+0x119a>
   DB  199                                 ; (bad)
   DB  62,171                              ; ds            stos %eax,%es:(%rdi)
   DB  170                                 ; stos          %al,%es:(%rdi)
index 33e6764..4d84cb4 100644 (file)
@@ -1047,6 +1047,16 @@ STAGE(matrix_4x5) {
     b = B;
     a = A;
 }
+STAGE(matrix_4x3) {
+    auto m = (const float*)ctx;
+    auto X = r,
+         Y = g;
+
+    r = mad(X, m[0], mad(Y, m[4], m[ 8]));
+    g = mad(X, m[1], mad(Y, m[5], m[ 9]));
+    b = mad(X, m[2], mad(Y, m[6], m[10]));
+    a = mad(X, m[3], mad(Y, m[7], m[11]));
+}
 STAGE(matrix_perspective) {
     // N.B. Unlike the other matrix_ stages, this matrix is row-major.
     auto m = (const float*)ctx;