gst/videoscale/vs_lanczos.c

   1 /*
   2  * Image Scaling Functions
   3  * Copyright (c) 2011 David A. Schleef <ds@schleef.org>
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  19  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  23  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  24  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25  * POSSIBILITY OF SUCH DAMAGE.
  26  */
  27 /*
  28  *
  29  * Modified Lanczos scaling algorithm
  30  * ==================================
  31  *
  32  * This algorithm was developed by the author.  The primary goals of
  33  * the algorithm are high-quality video downscaling for medium scale
  34  * factors (in the range of 1.3x to 5.0x) using methods that can be
  35  * converted to SIMD code.  Concerns with existing algorithms were
  36  * mainly related to either over-soft filtering (Lanczos) or aliasing
  37  * (bilinear or any other method with inadequate sampling).
  38  *
  39  * The problems with bilinear scaling are apparent when downscaling
  40  * more than a factor of 2.  For example, when downscaling by a factor
  41  * of 3, only two-thirds of the input pixels contribute to the output
  42  * pixels.  This is only considering scaling in one direction; after
  43  * scaling both vertically and horizontally in a 2-D image, fewer than
  44  * half of the input pixels contribute to the output, so it should not
  45  * be surprising that the output is suboptimal.
  46  *
  47  * The problems with Lanczos scaling are more subtle.  From a theoretical
  48  * perspective, Lanczos is an optimal algorithm for resampling equally-
  49  * spaced values.  This theoretical perspective is based on analysis
  50  * done in frequency space, thus, Lanczos works very well for audio
  51  * resampling, since the ear hears primarily in frequency space.  The
  52  * human visual system is sensitive primarily in the spatial domain,
  53  * therefore any resampling algorithm should take this into account.
  54  * This difference is immediately clear in the size of resampling
  55  * window or envelope that is chosen for resampling: for audio, an
  56  * envelope of a=64 is typical, in image scaling, the envelope is
  57  * usually a=2 or a=3.
  58  *
  59  * One result of the HVS being sensitive in the spatial domain (and
  60  * also probably due to oversampling capabilities of the retina and
  61  * visual cortex) is that it is less sensitive to the exact magnitude
  62  * of high-frequency visual signals than to the appropriate amount of
  63  * energy in the nearby frequency band.  A Lanczos kernel with a=2
  64  * or a=3 strongly decreases the amount of energy in the high frequency
  65  * bands.  The energy in this area can be increased by increasing a,
  66  * which brings in energy from different areas of the image (bad for
  67  * reasons mentioned above), or by oversampling the input data.  We
  68  * have chosen two methods for doing the latter.  Firstly, there is
  69  * a sharpness parameter, which increases the cutoff frequency of the
  70  * filter, aliasing higher frequency noise into the passband.  And
  71  * secondly, there is the sharpen parameter, which increases the
  72  * contribution of high-frequency (but in-band) components.
  73  *
  74  * An alternate explanation of the usefulness of a sharpening filter
  75  * is that many natural images have a roughly 1/f spectrum.  In order
  76  * for a downsampled image to look more "natural" when high frequencies
  77  * are removed, the frequencies in the pass band near the cutoff
  78  * frequency are amplified, causing the spectrum to be more roughly
  79  * 1/f.  I said "roughly", not "literally".
  80  *
  81  * This alternate explanation is useful for understanding the author's
  82  * secondary motivation for developing this algorithm, namely, as a
  83  * method of video compression.  Several recent techniques (such as
  84  * HTTP Live Streaming and SVC) use image scaling as a method to get
  85  * increased compression out of nominally non-scalable codecs such as
  86  * H.264.  For optimal quality, it is thusly important to consider
  87  * the scaler and encoder as a combined unit.  Tuning of the sharpness
  88  * and sharpen parameters was performed using the Toro encoder tuner,
  89  * where scaled and encoded video was compared to unscaled and encoded
  90  * video.  This tuning suggested values that were very close to the
  91  * values chosen by manual inspection of scaled images and video.
  92  *
  93  * The optimal values of sharpen and sharpness were slightly different
  94  * depending whether the comparison was still images or video.  Video
  95  * comparisons were more sensitive to aliasing, since the aliasing
  96  * artifacts tended to move or "crawl" around the video.  The default
  97  * values are for video; image scaling may prefer higher values.
  98  *
  99  * A number of related techniques were rejected for various reasons.
 100  * An early technique of selecting the sharpness factor locally based
 101  * on edge detection (in order to use a higher sharpness values without
 102  * the corresponding aliasing on edges) worked very well for still
 103  * images, but caused too much "crawling" on textures in video.  Also,
 104  * this method is slow, as it does not parallelize well.
 105  *
 106  * Non-separable techniques were rejected because the fastest would
 107  * have been at least 4x slower.
 108  *
 109  * It is infrequently appreciated that image scaling should ideally be
 110  * done in linear light space.  Converting to linear light space has
 111  * a similar effect to a sharpening filter.  This approach was not
 112  * taken because the added benefit is minor compared to the additional
 113  * computational cost.  Morever, the benefit is decreased by increasing
 114  * the strength of the sharpening filter.
 115  *
 116  */
 117 #include <string.h>
 118
 119 #include "vs_scanline.h"
 120 #include "vs_image.h"
 121
 122 #include "gstvideoscaleorc.h"
 123 #include <gst/gst.h>
 124 #include <math.h>
 125
 126 #define NEED_CLAMP(x,a,b) ((x) < (a) || (x) > (b))
 127
 128 #define ROUND_UP_2(x)  (((x)+1)&~1)
 129 #define ROUND_UP_4(x)  (((x)+3)&~3)
 130 #define ROUND_UP_8(x)  (((x)+7)&~7)
 131
 132 #define SRC_LINE(i) (scale->src->pixels + scale->src->stride * (i))
 133
 134 #define TMP_LINE_S16(i) ((gint16 *)scale->tmpdata + (i)*(scale->dest->width))
 135 #define TMP_LINE_S32(i) ((gint32 *)scale->tmpdata + (i)*(scale->dest->width))
 136 #define TMP_LINE_FLOAT(i) ((float *)scale->tmpdata + (i)*(scale->dest->width))
 137 #define TMP_LINE_DOUBLE(i) ((double *)scale->tmpdata + (i)*(scale->dest->width))
 138 #define TMP_LINE_S16_AYUV(i) ((gint16 *)scale->tmpdata + (i)*4*(scale->dest->width))
 139 #define TMP_LINE_S32_AYUV(i) ((gint32 *)scale->tmpdata + (i)*4*(scale->dest->width))
 140 #define TMP_LINE_FLOAT_AYUV(i) ((float *)scale->tmpdata + (i)*4*(scale->dest->width))
 141 #define TMP_LINE_DOUBLE_AYUV(i) ((double *)scale->tmpdata + (i)*4*(scale->dest->width))
 142
 143 #define PTR_OFFSET(a,b) ((void *)((char *)(a) + (b)))
 144
 145 typedef void (*HorizResampleFunc) (void *dest, const gint32 * offsets,
 146     const void *taps, const void *src, int n_taps, int shift, int n);
 147
 148 typedef struct _Scale1D Scale1D;
 149 struct _Scale1D
 150 {
 151   int n;
 152   double offset;
 153   double scale;
 154
 155   double fx;
 156   double ex;
 157   int dx;
 158
 159   int n_taps;
 160   gint32 *offsets;
 161   void *taps;
 162 };
 163
 164 typedef struct _Scale Scale;
 165 struct _Scale
 166 {
 167   const VSImage *dest;
 168   const VSImage *src;
 169
 170   double sharpness;
 171   gboolean dither;
 172
 173   void *tmpdata;
 174
 175   HorizResampleFunc horiz_resample_func;
 176
 177   Scale1D x_scale1d;
 178   Scale1D y_scale1d;
 179 };
 180
 181 static void
 182 vs_image_scale_lanczos_Y_int16 (const VSImage * dest, const VSImage * src,
 183     uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
 184     double sharpen);
 185 static void vs_image_scale_lanczos_Y_int32 (const VSImage * dest,
 186     const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
 187     double a, double sharpen);
 188 static void vs_image_scale_lanczos_Y_float (const VSImage * dest,
 189     const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
 190     double a, double sharpen);
 191 static void vs_image_scale_lanczos_Y_double (const VSImage * dest,
 192     const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
 193     double a, double sharpen);
 194 static void
 195 vs_image_scale_lanczos_AYUV_int16 (const VSImage * dest, const VSImage * src,
 196     uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
 197     double sharpen);
 198 static void vs_image_scale_lanczos_AYUV_int32 (const VSImage * dest,
 199     const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
 200     double a, double sharpen);
 201 static void vs_image_scale_lanczos_AYUV_float (const VSImage * dest,
 202     const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
 203     double a, double sharpen);
 204 static void vs_image_scale_lanczos_AYUV_double (const VSImage * dest,
 205     const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
 206     double a, double sharpen);
 207 static void vs_image_scale_lanczos_AYUV64_double (const VSImage * dest,
 208     const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
 209     double a, double sharpen);
 210
 211 static double
 212 sinc (double x)
 213 {
 214   if (x == 0)
 215     return 1;
 216   return sin (G_PI * x) / (G_PI * x);
 217 }
 218
 219 static double
 220 envelope (double x)
 221 {
 222   if (x <= -1 || x >= 1)
 223     return 0;
 224   return sinc (x);
 225 }
 226
 227 static int
 228 scale1d_get_n_taps (int src_size, int dest_size, double a, double sharpness)
 229 {
 230   double scale;
 231   double fx;
 232   int dx;
 233
 234   scale = src_size / (double) dest_size;
 235   if (scale > 1.0) {
 236     fx = (1.0 / scale) * sharpness;
 237   } else {
 238     fx = (1.0) * sharpness;
 239   }
 240   dx = ceil (a / fx);
 241
 242   return 2 * dx;
 243 }
 244
 245 static void
 246 scale1d_cleanup (Scale1D * scale)
 247 {
 248   g_free (scale->taps);
 249   g_free (scale->offsets);
 250 }
 251
 252 /*
 253  * Calculates a set of taps for each destination element in double
 254  * format.  Each set of taps sums to 1.0.
 255  *
 256  */
 257 static void
 258 scale1d_calculate_taps (Scale1D * scale, int src_size, int dest_size,
 259     int n_taps, double a, double sharpness, double sharpen)
 260 {
 261   int j;
 262   double *tap_array;
 263   gint32 *offsets;
 264   double scale_offset;
 265   double scale_increment;
 266   int dx;
 267   double fx;
 268   double ex;
 269
 270   scale->scale = src_size / (double) dest_size;
 271   scale->offset = scale->scale / 2 - 0.5;
 272
 273   if (scale->scale > 1.0) {
 274     scale->fx = (1.0 / scale->scale) * sharpness;
 275   } else {
 276     scale->fx = (1.0) * sharpness;
 277   }
 278   scale->ex = scale->fx / a;
 279   scale->dx = ceil (a / scale->fx);
 280
 281   g_assert (n_taps >= 2 * scale->dx);
 282   scale->n_taps = n_taps;
 283
 284   scale->taps = g_malloc (sizeof (double) * scale->n_taps * dest_size);
 285   scale->offsets = g_malloc (sizeof (gint32) * dest_size);
 286   tap_array = scale->taps;
 287   offsets = scale->offsets;
 288
 289   scale_offset = scale->offset;
 290   scale_increment = scale->scale;
 291   dx = scale->dx;
 292   fx = scale->fx;
 293   ex = scale->ex;
 294
 295   for (j = 0; j < dest_size; j++) {
 296     double x;
 297     int xi;
 298     int l;
 299     double weight;
 300     double *taps;
 301
 302     x = scale_offset + scale_increment * j;
 303     x = CLAMP (x, 0, src_size);
 304     xi = ceil (x) - dx;
 305
 306     offsets[j] = xi;
 307     weight = 0;
 308     taps = tap_array + j * n_taps;
 309
 310     for (l = 0; l < n_taps; l++) {
 311       int xl = xi + l;
 312       taps[l] = sinc ((x - xl) * fx) * envelope ((x - xl) * ex);
 313       taps[l] -= sharpen * envelope ((x - xl) * ex);
 314       weight += taps[l];
 315     }
 316     g_assert (envelope ((x - (xi - 1)) * ex) == 0);
 317     g_assert (envelope ((x - (xi + n_taps)) * ex) == 0);
 318     for (l = 0; l < n_taps; l++) {
 319       taps[l] /= weight;
 320     }
 321
 322     if (xi < 0) {
 323       int shift = -xi;
 324
 325       for (l = 0; l < shift; l++) {
 326         taps[shift] += taps[l];
 327       }
 328       for (l = 0; l < n_taps - shift; l++) {
 329         taps[l] = taps[shift + l];
 330       }
 331       for (; l < n_taps; l++) {
 332         taps[l] = 0;
 333       }
 334       offsets[j] += shift;
 335     }
 336
 337     if (xi > src_size - n_taps) {
 338       int shift = xi - (src_size - n_taps);
 339
 340       for (l = 0; l < shift; l++) {
 341         taps[n_taps - shift - 1] += taps[n_taps - shift + l];
 342       }
 343       for (l = 0; l < n_taps - shift; l++) {
 344         taps[n_taps - 1 - l] = taps[n_taps - 1 - shift - l];
 345       }
 346       for (l = 0; l < shift; l++) {
 347         taps[l] = 0;
 348       }
 349       offsets[j] -= shift;
 350     }
 351   }
 352 }
 353
 354 /*
 355  * Calculates a set of taps for each destination element in float
 356  * format.  Each set of taps sums to 1.0.
 357  */
 358 static void
 359 scale1d_calculate_taps_float (Scale1D * scale, int src_size, int dest_size,
 360     int n_taps, double a, double sharpness, double sharpen)
 361 {
 362   double *taps_d;
 363   float *taps_f;
 364   int j;
 365
 366   scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness,
 367       sharpen);
 368
 369   taps_d = scale->taps;
 370   taps_f = g_malloc (sizeof (float) * scale->n_taps * dest_size);
 371
 372   for (j = 0; j < dest_size * n_taps; j++) {
 373     taps_f[j] = taps_d[j];
 374   }
 375
 376   g_free (taps_d);
 377   scale->taps = taps_f;
 378 }
 379
 380 /*
 381  * Calculates a set of taps for each destination element in gint32
 382  * format.  Each set of taps sums to (very nearly) (1<<shift).  A
 383  * typical value for shift is 10 to 15, so that applying the taps to
 384  * uint8 values and summing will fit in a (signed) int32.
 385  */
 386 static void
 387 scale1d_calculate_taps_int32 (Scale1D * scale, int src_size, int dest_size,
 388     int n_taps, double a, double sharpness, double sharpen, int shift)
 389 {
 390   double *taps_d;
 391   gint32 *taps_i;
 392   int i;
 393   int j;
 394   double multiplier;
 395
 396   scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness,
 397       sharpen);
 398
 399   taps_d = scale->taps;
 400   taps_i = g_malloc (sizeof (gint32) * scale->n_taps * dest_size);
 401
 402   multiplier = (1 << shift);
 403
 404   for (j = 0; j < dest_size; j++) {
 405     for (i = 0; i < n_taps; i++) {
 406       taps_i[j * n_taps + i] =
 407           floor (0.5 + taps_d[j * n_taps + i] * multiplier);
 408     }
 409   }
 410
 411   g_free (taps_d);
 412   scale->taps = taps_i;
 413 }
 414
 415 /*
 416  * Calculates a set of taps for each destination element in gint16
 417  * format.  Each set of taps sums to (1<<shift).  A typical value
 418  * for shift is 7, so that applying the taps to uint8 values and
 419  * summing will fit in a (signed) int16.
 420  */
 421 static void
 422 scale1d_calculate_taps_int16 (Scale1D * scale, int src_size, int dest_size,
 423     int n_taps, double a, double sharpness, double sharpen, int shift)
 424 {
 425   double *taps_d;
 426   gint16 *taps_i;
 427   int i;
 428   int j;
 429   double multiplier;
 430
 431   scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness,
 432       sharpen);
 433
 434   taps_d = scale->taps;
 435   taps_i = g_malloc (sizeof (gint16) * scale->n_taps * dest_size);
 436
 437   multiplier = (1 << shift);
 438
 439   /* Various methods for converting floating point taps to integer.
 440    * The dB values are the SSIM value between scaling an image via
 441    * the floating point pathway vs. the integer pathway using the
 442    * given code to generate the taps.  Only one image was tested,
 443    * scaling from 1920x1080 to 640x360.  Several variations of the
 444    * methods were also tested, with nothing appearing useful.  */
 445 #if 0
 446   /* Standard round to integer.  This causes bad DC errors. */
 447   /* 44.588 dB */
 448   for (j = 0; j < dest_size; j++) {
 449     for (i = 0; i < n_taps; i++) {
 450       taps_i[j * n_taps + i] =
 451           floor (0.5 + taps_d[j * n_taps + i] * multiplier);
 452     }
 453   }
 454 #endif
 455 #if 0
 456   /* Dithering via error propogation.  Works pretty well, but
 457    * really we want to propogate errors across rows, which would
 458    * mean having several sets of tap arrays.  Possible, but more work,
 459    * and it may not even be better. */
 460   /* 57.0961 dB */
 461   {
 462     double err = 0;
 463     for (j = 0; j < dest_size; j++) {
 464       for (i = 0; i < n_taps; i++) {
 465         err += taps_d[j * n_taps + i] * multiplier;
 466         taps_i[j * n_taps + i] = floor (err);
 467         err -= floor (err);
 468       }
 469     }
 470   }
 471 #endif
 472 #if 1
 473   /* Round to integer, but with an adjustable bias that we use to
 474    * eliminate the DC error.  This search method is a bit crude, and
 475    * could perhaps be improved somewhat. */
 476   /* 60.4851 dB */
 477   for (j = 0; j < dest_size; j++) {
 478     int k;
 479     for (k = 0; k < 100; k++) {
 480       int sum = 0;
 481       double offset;
 482
 483       offset = k * 0.01;
 484       for (i = 0; i < n_taps; i++) {
 485         taps_i[j * n_taps + i] =
 486             floor (offset + taps_d[j * n_taps + i] * multiplier);
 487         sum += taps_i[j * n_taps + i];
 488       }
 489
 490       if (sum >= (1 << shift))
 491         break;
 492     }
 493   }
 494 #endif
 495 #if 0
 496   /* Round to integer, but adjust the multiplier.  The search method is
 497    * wrong a lot, but was sufficient enough to calculate dB error. */
 498   /* 58.6517 dB */
 499   for (j = 0; j < dest_size; j++) {
 500     int k;
 501     int sum = 0;
 502     for (k = 0; k < 200; k++) {
 503       sum = 0;
 504
 505       multiplier = (1 << shift) - 1.0 + k * 0.01;
 506       for (i = 0; i < n_taps; i++) {
 507         taps_i[j * n_taps + i] =
 508             floor (0.5 + taps_d[j * n_taps + i] * multiplier);
 509         sum += taps_i[j * n_taps + i];
 510       }
 511
 512       if (sum >= (1 << shift))
 513         break;
 514     }
 515     if (sum != (1 << shift)) {
 516       GST_ERROR ("%g %d", multiplier, sum);
 517     }
 518   }
 519 #endif
 520 #if 0
 521   /* Round to integer, but subtract the error from the largest tap */
 522   /* 58.3677 dB */
 523   for (j = 0; j < dest_size; j++) {
 524     int err = -multiplier;
 525     for (i = 0; i < n_taps; i++) {
 526       taps_i[j * n_taps + i] =
 527           floor (0.5 + taps_d[j * n_taps + i] * multiplier);
 528       err += taps_i[j * n_taps + i];
 529     }
 530     if (taps_i[j * n_taps + (n_taps / 2 - 1)] >
 531         taps_i[j * n_taps + (n_taps / 2)]) {
 532       taps_i[j * n_taps + (n_taps / 2 - 1)] -= err;
 533     } else {
 534       taps_i[j * n_taps + (n_taps / 2)] -= err;
 535     }
 536   }
 537 #endif
 538
 539   g_free (taps_d);
 540   scale->taps = taps_i;
 541 }
 542
 543
 544 void
 545 vs_image_scale_lanczos_Y (const VSImage * dest, const VSImage * src,
 546     uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod,
 547     double a, double sharpen)
 548 {
 549   switch (submethod) {
 550     case 0:
 551     default:
 552       vs_image_scale_lanczos_Y_int16 (dest, src, tmpbuf, sharpness, dither, a,
 553           sharpen);
 554       break;
 555     case 1:
 556       vs_image_scale_lanczos_Y_int32 (dest, src, tmpbuf, sharpness, dither, a,
 557           sharpen);
 558       break;
 559     case 2:
 560       vs_image_scale_lanczos_Y_float (dest, src, tmpbuf, sharpness, dither, a,
 561           sharpen);
 562       break;
 563     case 3:
 564       vs_image_scale_lanczos_Y_double (dest, src, tmpbuf, sharpness, dither, a,
 565           sharpen);
 566       break;
 567   }
 568 }
 569
 570 void
 571 vs_image_scale_lanczos_AYUV (const VSImage * dest, const VSImage * src,
 572     uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod,
 573     double a, double sharpen)
 574 {
 575   switch (submethod) {
 576     case 0:
 577     default:
 578       vs_image_scale_lanczos_AYUV_int16 (dest, src, tmpbuf, sharpness, dither,
 579           a, sharpen);
 580       break;
 581     case 1:
 582       vs_image_scale_lanczos_AYUV_int32 (dest, src, tmpbuf, sharpness, dither,
 583           a, sharpen);
 584       break;
 585     case 2:
 586       vs_image_scale_lanczos_AYUV_float (dest, src, tmpbuf, sharpness, dither,
 587           a, sharpen);
 588       break;
 589     case 3:
 590       vs_image_scale_lanczos_AYUV_double (dest, src, tmpbuf, sharpness, dither,
 591           a, sharpen);
 592       break;
 593   }
 594 }
 595
 596 void
 597 vs_image_scale_lanczos_AYUV64 (const VSImage * dest, const VSImage * src,
 598     uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod,
 599     double a, double sharpen)
 600 {
 601   vs_image_scale_lanczos_AYUV64_double (dest, src, tmpbuf, sharpness, dither,
 602       a, sharpen);
 603 }
 604
 605
 606
 607 #define RESAMPLE_HORIZ_FLOAT(function, dest_type, tap_type, src_type, _n_taps) \
 608 static void \
 609 function (dest_type *dest, const gint32 *offsets, \
 610     const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
 611 { \
 612   int i; \
 613   int k; \
 614   dest_type sum; \
 615   const src_type *srcline; \
 616   const tap_type *tapsline; \
 617   for (i = 0; i < n; i++) { \
 618     srcline = src + offsets[i]; \
 619     tapsline = taps + i * _n_taps; \
 620     sum = 0; \
 621     for (k = 0; k < _n_taps; k++) { \
 622       sum += srcline[k] * tapsline[k]; \
 623     } \
 624     dest[i] = sum; \
 625   } \
 626 }
 627
 628 #define RESAMPLE_HORIZ(function, dest_type, tap_type, src_type, _n_taps, _shift) \
 629 static void \
 630 function (dest_type *dest, const gint32 *offsets, \
 631     const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
 632 { \
 633   int i; \
 634   int k; \
 635   dest_type sum; \
 636   const src_type *srcline; \
 637   const tap_type *tapsline; \
 638   int offset; \
 639   if (_shift > 0) offset = (1<<_shift)>>1; \
 640   else offset = 0; \
 641   for (i = 0; i < n; i++) { \
 642     srcline = src + offsets[i]; \
 643     tapsline = taps + i * _n_taps; \
 644     sum = 0; \
 645     for (k = 0; k < _n_taps; k++) { \
 646       sum += srcline[k] * tapsline[k]; \
 647     } \
 648     dest[i] = (sum + offset) >> _shift; \
 649   } \
 650 }
 651
 652 #define RESAMPLE_HORIZ_AYUV_FLOAT(function, dest_type, tap_type, src_type, _n_taps) \
 653 static void \
 654 function (dest_type *dest, const gint32 *offsets, \
 655     const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
 656 { \
 657   int i; \
 658   int k; \
 659   dest_type sum1; \
 660   dest_type sum2; \
 661   dest_type sum3; \
 662   dest_type sum4; \
 663   const src_type *srcline; \
 664   const tap_type *tapsline; \
 665   for (i = 0; i < n; i++) { \
 666     srcline = src + 4*offsets[i]; \
 667     tapsline = taps + i * _n_taps; \
 668     sum1 = 0; \
 669     sum2 = 0; \
 670     sum3 = 0; \
 671     sum4 = 0; \
 672     for (k = 0; k < _n_taps; k++) { \
 673       sum1 += srcline[k*4+0] * tapsline[k]; \
 674       sum2 += srcline[k*4+1] * tapsline[k]; \
 675       sum3 += srcline[k*4+2] * tapsline[k]; \
 676       sum4 += srcline[k*4+3] * tapsline[k]; \
 677     } \
 678     dest[i*4+0] = sum1; \
 679     dest[i*4+1] = sum2; \
 680     dest[i*4+2] = sum3; \
 681     dest[i*4+3] = sum4; \
 682   } \
 683 }
 684
 685 #define RESAMPLE_HORIZ_AYUV(function, dest_type, tap_type, src_type, _n_taps, _shift) \
 686 static void \
 687 function (dest_type *dest, const gint32 *offsets, \
 688     const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
 689 { \
 690   int i; \
 691   int k; \
 692   dest_type sum1; \
 693   dest_type sum2; \
 694   dest_type sum3; \
 695   dest_type sum4; \
 696   const src_type *srcline; \
 697   const tap_type *tapsline; \
 698   int offset; \
 699   if (_shift > 0) offset = (1<<_shift)>>1; \
 700   else offset = 0; \
 701   for (i = 0; i < n; i++) { \
 702     srcline = src + 4*offsets[i]; \
 703     tapsline = taps + i * _n_taps; \
 704     sum1 = 0; \
 705     sum2 = 0; \
 706     sum3 = 0; \
 707     sum4 = 0; \
 708     for (k = 0; k < _n_taps; k++) { \
 709       sum1 += srcline[k*4+0] * tapsline[k]; \
 710       sum2 += srcline[k*4+1] * tapsline[k]; \
 711       sum3 += srcline[k*4+2] * tapsline[k]; \
 712       sum4 += srcline[k*4+3] * tapsline[k]; \
 713     } \
 714     dest[i*4+0] = (sum1 + offset) >> _shift; \
 715     dest[i*4+1] = (sum2 + offset) >> _shift; \
 716     dest[i*4+2] = (sum3 + offset) >> _shift; \
 717     dest[i*4+3] = (sum4 + offset) >> _shift; \
 718   } \
 719 }
 720
 721 /* *INDENT-OFF* */
 722 RESAMPLE_HORIZ_FLOAT (resample_horiz_double_u8_generic, double, double,
 723     guint8, n_taps)
 724 RESAMPLE_HORIZ_FLOAT (resample_horiz_float_u8_generic, float, float,
 725     guint8, n_taps)
 726 RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_double_ayuv_generic, double, double,
 727     guint8, n_taps)
 728 RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_float_ayuv_generic, float, float,
 729     guint8, n_taps)
 730
 731 RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_double_ayuv_generic_s16, double, double,
 732     guint16, n_taps)
 733
 734 RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_generic, gint32, gint32,
 735     guint8, n_taps, shift)
 736 RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_generic, gint16, gint16,
 737     guint8, n_taps, shift)
 738 RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_generic, gint32, gint32,
 739     guint8, n_taps, shift)
 740 RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_generic, gint16, gint16,
 741     guint8, n_taps, shift)
 742
 743 /* Candidates for orcification */
 744 RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps16_shift0, gint32, gint32,
 745     guint8, 16, 0)
 746 RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps12_shift0, gint32, gint32,
 747     guint8, 12, 0)
 748 RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps8_shift0, gint32, gint32,
 749     guint8, 8, 0)
 750 RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps4_shift0, gint32, gint32,
 751     guint8, 4, 0)
 752 RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps16_shift0, gint16, gint16,
 753     guint8, 16, 0)
 754 RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps12_shift0, gint16, gint16,
 755     guint8, 12, 0)
 756 RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps8_shift0, gint16, gint16,
 757     guint8, 8, 0)
 758 RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps4_shift0, gint16, gint16,
 759     guint8, 4, 0)
 760
 761 RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps16_shift0, gint32, gint32,
 762     guint8, 16, 0)
 763 RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps12_shift0, gint32, gint32,
 764     guint8, 12, 0)
 765 RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps8_shift0, gint32, gint32,
 766     guint8, 8, 0)
 767 RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps4_shift0, gint32, gint32,
 768     guint8, 4, 0)
 769 RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps16_shift0, gint16, gint16,
 770     guint8, 16, 0)
 771 RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps12_shift0, gint16, gint16,
 772     guint8, 12, 0)
 773 RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps8_shift0, gint16, gint16,
 774     guint8, 8, 0)
 775 RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps4_shift0, gint16, gint16,
 776     guint8, 4, 0)
 777 /* *INDENT-ON* */
 778
 779 #define RESAMPLE_VERT(function, tap_type, src_type, _n_taps, _shift) \
 780 static void \
 781 function (guint8 *dest, \
 782     const tap_type *taps, const src_type *src, int stride, int n_taps, \
 783     int shift, int n) \
 784 { \
 785   int i; \
 786   int l; \
 787   gint32 sum_y; \
 788   gint32 offset = (1<<_shift) >> 1; \
 789   for (i = 0; i < n; i++) { \
 790     sum_y = 0; \
 791     for (l = 0; l < n_taps; l++) { \
 792       const src_type *line = PTR_OFFSET(src, stride * l); \
 793       sum_y += line[i] * taps[l]; \
 794     } \
 795     dest[i] = CLAMP ((sum_y + offset) >> _shift, 0, 255); \
 796   } \
 797 }
 798
 799 #define RESAMPLE_VERT_DITHER(function, tap_type, src_type, _n_taps, _shift) \
 800 static void \
 801 function (guint8 *dest, \
 802     const tap_type *taps, const src_type *src, int stride, int n_taps, \
 803     int shift, int n) \
 804 { \
 805   int i; \
 806   int l; \
 807   gint32 sum_y; \
 808   gint32 err_y = 0; \
 809   gint32 mask = (1<<_shift) - 1; \
 810   for (i = 0; i < n; i++) { \
 811     sum_y = 0; \
 812     for (l = 0; l < n_taps; l++) { \
 813       const src_type *line = PTR_OFFSET(src, stride * l); \
 814       sum_y += line[i] * taps[l]; \
 815     } \
 816     err_y += sum_y; \
 817     dest[i] = CLAMP (err_y >> _shift, 0, 255); \
 818     err_y &= mask; \
 819   } \
 820 }
 821
 822 /* *INDENT-OFF* */
 823 RESAMPLE_VERT (resample_vert_int32_generic, gint32, gint32, n_taps, shift)
 824 RESAMPLE_VERT_DITHER (resample_vert_dither_int32_generic, gint32, gint32,
 825     n_taps, shift)
 826 RESAMPLE_VERT (resample_vert_int16_generic, gint16, gint16, n_taps, shift);
 827 RESAMPLE_VERT_DITHER (resample_vert_dither_int16_generic, gint16, gint16,
 828     n_taps, shift)
 829 /* *INDENT-ON* */
 830
 831 #define RESAMPLE_VERT_FLOAT(function, dest_type, clamp, tap_type, src_type, _n_taps, _shift) \
 832 static void \
 833 function (dest_type *dest, \
 834     const tap_type *taps, const src_type *src, int stride, int n_taps, \
 835     int shift, int n) \
 836 { \
 837   int i; \
 838   int l; \
 839   src_type sum_y; \
 840   for (i = 0; i < n; i++) { \
 841     sum_y = 0; \
 842     for (l = 0; l < n_taps; l++) { \
 843       const src_type *line = PTR_OFFSET(src, stride * l); \
 844       sum_y += line[i] * taps[l]; \
 845     } \
 846     dest[i] = CLAMP (floor(0.5 + sum_y), 0, clamp); \
 847   } \
 848 }
 849
 850 #define RESAMPLE_VERT_FLOAT_DITHER(function, dest_type, clamp, tap_type, src_type, _n_taps, _shift) \
 851 static void \
 852 function (dest_type *dest, \
 853     const tap_type *taps, const src_type *src, int stride, int n_taps, \
 854     int shift, int n) \
 855 { \
 856   int i; \
 857   int l; \
 858   src_type sum_y; \
 859   src_type err_y = 0; \
 860   for (i = 0; i < n; i++) { \
 861     sum_y = 0; \
 862     for (l = 0; l < n_taps; l++) { \
 863       const src_type *line = PTR_OFFSET(src, stride * l); \
 864       sum_y += line[i] * taps[l]; \
 865     } \
 866     err_y += sum_y; \
 867     dest[i] = CLAMP (floor (err_y), 0, clamp); \
 868     err_y -= floor (err_y); \
 869   } \
 870 }
 871
 872 /* *INDENT-OFF* */
 873 RESAMPLE_VERT_FLOAT (resample_vert_double_generic, guint8, 255, double, double, n_taps,
 874     shift)
 875 RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_double_generic, guint8, 255, double, double,
 876     n_taps, shift)
 877
 878 RESAMPLE_VERT_FLOAT (resample_vert_double_generic_u16, guint16, 65535, double, double, n_taps,
 879     shift)
 880 RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_double_generic_u16, guint16, 65535, double, double,
 881     n_taps, shift)
 882
 883 RESAMPLE_VERT_FLOAT (resample_vert_float_generic, guint8, 255, float, float, n_taps, shift)
 884 RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_float_generic, guint8, 255, float, float,
 885     n_taps, shift)
 886 /* *INDENT-ON* */
 887
 888 #define S16_SHIFT1 7
 889 #define S16_SHIFT2 7
 890 #define S16_MIDSHIFT 0
 891 #define S16_POSTSHIFT (S16_SHIFT1+S16_SHIFT2-S16_MIDSHIFT)
 892
 893 static void
 894 vs_scale_lanczos_Y_int16 (Scale * scale)
 895 {
 896   int j;
 897   int yi;
 898   int tmp_yi;
 899
 900   tmp_yi = 0;
 901
 902   for (j = 0; j < scale->dest->height; j++) {
 903     guint8 *destline;
 904     gint16 *taps;
 905
 906     destline = scale->dest->pixels + scale->dest->stride * j;
 907
 908     yi = scale->y_scale1d.offsets[j];
 909
 910     while (tmp_yi < yi + scale->y_scale1d.n_taps) {
 911       scale->horiz_resample_func (TMP_LINE_S16 (tmp_yi),
 912           scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
 913           scale->x_scale1d.n_taps, S16_MIDSHIFT, scale->dest->width);
 914       tmp_yi++;
 915     }
 916
 917     taps = (gint16 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
 918     if (scale->dither) {
 919       resample_vert_dither_int16_generic (destline,
 920           taps, TMP_LINE_S16 (scale->y_scale1d.offsets[j]),
 921           sizeof (gint16) * scale->dest->width, scale->y_scale1d.n_taps,
 922           S16_POSTSHIFT, scale->dest->width);
 923     } else {
 924       resample_vert_int16_generic (destline,
 925           taps, TMP_LINE_S16 (scale->y_scale1d.offsets[j]),
 926           sizeof (gint16) * scale->dest->width, scale->y_scale1d.n_taps,
 927           S16_POSTSHIFT, scale->dest->width);
 928     }
 929   }
 930 }
 931
 932 void
 933 vs_image_scale_lanczos_Y_int16 (const VSImage * dest, const VSImage * src,
 934     uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
 935     double sharpen)
 936 {
 937   Scale s = { 0 };
 938   Scale *scale = &s;
 939   int n_taps;
 940
 941   scale->dest = dest;
 942   scale->src = src;
 943
 944   n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
 945   n_taps = ROUND_UP_4 (n_taps);
 946   scale1d_calculate_taps_int16 (&scale->x_scale1d,
 947       src->width, dest->width, n_taps, a, sharpness, sharpen, S16_SHIFT1);
 948
 949   n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
 950   scale1d_calculate_taps_int16 (&scale->y_scale1d,
 951       src->height, dest->height, n_taps, a, sharpness, sharpen, S16_SHIFT2);
 952
 953   scale->dither = dither;
 954
 955   switch (scale->x_scale1d.n_taps) {
 956     case 4:
 957       scale->horiz_resample_func =
 958           (HorizResampleFunc) resample_horiz_int16_int16_u8_taps4_shift0;
 959       break;
 960     case 8:
 961       scale->horiz_resample_func =
 962           (HorizResampleFunc) resample_horiz_int16_int16_u8_taps8_shift0;
 963       break;
 964     case 12:
 965       scale->horiz_resample_func =
 966           (HorizResampleFunc) resample_horiz_int16_int16_u8_taps12_shift0;
 967       break;
 968     case 16:
 969       scale->horiz_resample_func =
 970           (HorizResampleFunc) resample_horiz_int16_int16_u8_taps16_shift0;
 971       break;
 972     default:
 973       scale->horiz_resample_func =
 974           (HorizResampleFunc) resample_horiz_int16_int16_u8_generic;
 975       break;
 976   }
 977
 978   scale->tmpdata =
 979       g_malloc (sizeof (gint16) * scale->dest->width * scale->src->height);
 980
 981   vs_scale_lanczos_Y_int16 (scale);
 982
 983   scale1d_cleanup (&scale->x_scale1d);
 984   scale1d_cleanup (&scale->y_scale1d);
 985   g_free (scale->tmpdata);
 986 }
 987
 988
 989 #define S32_SHIFT1 11
 990 #define S32_SHIFT2 11
 991 #define S32_MIDSHIFT 0
 992 #define S32_POSTSHIFT (S32_SHIFT1+S32_SHIFT2-S32_MIDSHIFT)
 993
 994 static void
 995 vs_scale_lanczos_Y_int32 (Scale * scale)
 996 {
 997   int j;
 998   int yi;
 999   int tmp_yi;
1000
1001   tmp_yi = 0;
1002
1003   for (j = 0; j < scale->dest->height; j++) {
1004     guint8 *destline;
1005     gint32 *taps;
1006
1007     destline = scale->dest->pixels + scale->dest->stride * j;
1008
1009     yi = scale->y_scale1d.offsets[j];
1010
1011     while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1012       scale->horiz_resample_func (TMP_LINE_S32 (tmp_yi),
1013           scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1014           scale->x_scale1d.n_taps, S32_MIDSHIFT, scale->dest->width);
1015       tmp_yi++;
1016     }
1017
1018     taps = (gint32 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1019     if (scale->dither) {
1020       resample_vert_dither_int32_generic (destline,
1021           taps, TMP_LINE_S32 (scale->y_scale1d.offsets[j]),
1022           sizeof (gint32) * scale->dest->width,
1023           scale->y_scale1d.n_taps, S32_POSTSHIFT, scale->dest->width);
1024     } else {
1025       resample_vert_int32_generic (destline,
1026           taps, TMP_LINE_S32 (scale->y_scale1d.offsets[j]),
1027           sizeof (gint32) * scale->dest->width,
1028           scale->y_scale1d.n_taps, S32_POSTSHIFT, scale->dest->width);
1029     }
1030   }
1031 }
1032
1033 void
1034 vs_image_scale_lanczos_Y_int32 (const VSImage * dest, const VSImage * src,
1035     uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1036     double sharpen)
1037 {
1038   Scale s = { 0 };
1039   Scale *scale = &s;
1040   int n_taps;
1041
1042   scale->dest = dest;
1043   scale->src = src;
1044
1045   n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1046   n_taps = ROUND_UP_4 (n_taps);
1047   scale1d_calculate_taps_int32 (&scale->x_scale1d,
1048       src->width, dest->width, n_taps, a, sharpness, sharpen, S32_SHIFT1);
1049
1050   n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1051   scale1d_calculate_taps_int32 (&scale->y_scale1d,
1052       src->height, dest->height, n_taps, a, sharpness, sharpen, S32_SHIFT2);
1053
1054   scale->dither = dither;
1055
1056   switch (scale->x_scale1d.n_taps) {
1057     case 4:
1058       scale->horiz_resample_func =
1059           (HorizResampleFunc) resample_horiz_int32_int32_u8_taps4_shift0;
1060       break;
1061     case 8:
1062       scale->horiz_resample_func =
1063           (HorizResampleFunc) resample_horiz_int32_int32_u8_taps8_shift0;
1064       break;
1065     case 12:
1066       scale->horiz_resample_func =
1067           (HorizResampleFunc) resample_horiz_int32_int32_u8_taps12_shift0;
1068       break;
1069     case 16:
1070       scale->horiz_resample_func =
1071           (HorizResampleFunc) resample_horiz_int32_int32_u8_taps16_shift0;
1072       break;
1073     default:
1074       scale->horiz_resample_func =
1075           (HorizResampleFunc) resample_horiz_int32_int32_u8_generic;
1076       break;
1077   }
1078
1079   scale->tmpdata =
1080       g_malloc (sizeof (int32_t) * scale->dest->width * scale->src->height);
1081
1082   vs_scale_lanczos_Y_int32 (scale);
1083
1084   scale1d_cleanup (&scale->x_scale1d);
1085   scale1d_cleanup (&scale->y_scale1d);
1086   g_free (scale->tmpdata);
1087 }
1088
1089 static void
1090 vs_scale_lanczos_Y_double (Scale * scale)
1091 {
1092   int j;
1093   int yi;
1094   int tmp_yi;
1095
1096   tmp_yi = 0;
1097
1098   for (j = 0; j < scale->dest->height; j++) {
1099     guint8 *destline;
1100     double *taps;
1101
1102     destline = scale->dest->pixels + scale->dest->stride * j;
1103
1104     yi = scale->y_scale1d.offsets[j];
1105
1106     while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1107       scale->horiz_resample_func (TMP_LINE_DOUBLE (tmp_yi),
1108           scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1109           scale->x_scale1d.n_taps, 0, scale->dest->width);
1110       tmp_yi++;
1111     }
1112
1113     taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1114     if (scale->dither) {
1115       resample_vert_dither_double_generic (destline,
1116           taps, TMP_LINE_DOUBLE (scale->y_scale1d.offsets[j]),
1117           sizeof (double) * scale->dest->width,
1118           scale->y_scale1d.n_taps, 0, scale->dest->width);
1119     } else {
1120       resample_vert_double_generic (destline,
1121           taps, TMP_LINE_DOUBLE (scale->y_scale1d.offsets[j]),
1122           sizeof (double) * scale->dest->width,
1123           scale->y_scale1d.n_taps, 0, scale->dest->width);
1124     }
1125   }
1126 }
1127
1128 void
1129 vs_image_scale_lanczos_Y_double (const VSImage * dest, const VSImage * src,
1130     uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1131     double sharpen)
1132 {
1133   Scale s = { 0 };
1134   Scale *scale = &s;
1135   int n_taps;
1136
1137   scale->dest = dest;
1138   scale->src = src;
1139
1140   n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1141   scale1d_calculate_taps (&scale->x_scale1d,
1142       src->width, dest->width, n_taps, a, sharpness, sharpen);
1143
1144   n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1145   scale1d_calculate_taps (&scale->y_scale1d,
1146       src->height, dest->height, n_taps, a, sharpness, sharpen);
1147
1148   scale->dither = dither;
1149
1150   scale->horiz_resample_func =
1151       (HorizResampleFunc) resample_horiz_double_u8_generic;
1152
1153   scale->tmpdata =
1154       g_malloc (sizeof (double) * scale->dest->width * scale->src->height);
1155
1156   vs_scale_lanczos_Y_double (scale);
1157
1158   scale1d_cleanup (&scale->x_scale1d);
1159   scale1d_cleanup (&scale->y_scale1d);
1160   g_free (scale->tmpdata);
1161 }
1162
1163 static void
1164 vs_scale_lanczos_Y_float (Scale * scale)
1165 {
1166   int j;
1167   int yi;
1168   int tmp_yi;
1169
1170   tmp_yi = 0;
1171
1172   for (j = 0; j < scale->dest->height; j++) {
1173     guint8 *destline;
1174     float *taps;
1175
1176     destline = scale->dest->pixels + scale->dest->stride * j;
1177
1178     yi = scale->y_scale1d.offsets[j];
1179
1180     while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1181       scale->horiz_resample_func (TMP_LINE_FLOAT (tmp_yi),
1182           scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1183           scale->x_scale1d.n_taps, 0, scale->dest->width);
1184       tmp_yi++;
1185     }
1186
1187     taps = (float *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1188     if (scale->dither) {
1189       resample_vert_dither_float_generic (destline,
1190           taps, TMP_LINE_FLOAT (scale->y_scale1d.offsets[j]),
1191           sizeof (float) * scale->dest->width,
1192           scale->y_scale1d.n_taps, 0, scale->dest->width);
1193     } else {
1194       resample_vert_float_generic (destline,
1195           taps, TMP_LINE_FLOAT (scale->y_scale1d.offsets[j]),
1196           sizeof (float) * scale->dest->width,
1197           scale->y_scale1d.n_taps, 0, scale->dest->width);
1198     }
1199   }
1200 }
1201
1202 void
1203 vs_image_scale_lanczos_Y_float (const VSImage * dest, const VSImage * src,
1204     uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1205     double sharpen)
1206 {
1207   Scale s = { 0 };
1208   Scale *scale = &s;
1209   int n_taps;
1210
1211   scale->dest = dest;
1212   scale->src = src;
1213
1214   n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1215   scale1d_calculate_taps_float (&scale->x_scale1d,
1216       src->width, dest->width, n_taps, a, sharpness, sharpen);
1217
1218   n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1219   scale1d_calculate_taps_float (&scale->y_scale1d,
1220       src->height, dest->height, n_taps, a, sharpness, sharpen);
1221
1222   scale->dither = dither;
1223
1224   scale->horiz_resample_func =
1225       (HorizResampleFunc) resample_horiz_float_u8_generic;
1226
1227   scale->tmpdata =
1228       g_malloc (sizeof (float) * scale->dest->width * scale->src->height);
1229
1230   vs_scale_lanczos_Y_float (scale);
1231
1232   scale1d_cleanup (&scale->x_scale1d);
1233   scale1d_cleanup (&scale->y_scale1d);
1234   g_free (scale->tmpdata);
1235 }
1236
1237
1238
1239
1240
1241 static void
1242 vs_scale_lanczos_AYUV_int16 (Scale * scale)
1243 {
1244   int j;
1245   int yi;
1246   int tmp_yi;
1247
1248   tmp_yi = 0;
1249
1250   for (j = 0; j < scale->dest->height; j++) {
1251     guint8 *destline;
1252     gint16 *taps;
1253
1254     destline = scale->dest->pixels + scale->dest->stride * j;
1255
1256     yi = scale->y_scale1d.offsets[j];
1257
1258     while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1259       scale->horiz_resample_func (TMP_LINE_S16_AYUV (tmp_yi),
1260           scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1261           scale->x_scale1d.n_taps, S16_MIDSHIFT, scale->dest->width);
1262       tmp_yi++;
1263     }
1264
1265     taps = (gint16 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1266     if (scale->dither) {
1267       resample_vert_dither_int16_generic (destline,
1268           taps, TMP_LINE_S16_AYUV (scale->y_scale1d.offsets[j]),
1269           sizeof (gint16) * 4 * scale->dest->width,
1270           scale->y_scale1d.n_taps, S16_POSTSHIFT, scale->dest->width * 4);
1271     } else {
1272       resample_vert_int16_generic (destline,
1273           taps, TMP_LINE_S16_AYUV (scale->y_scale1d.offsets[j]),
1274           sizeof (gint16) * 4 * scale->dest->width,
1275           scale->y_scale1d.n_taps, S16_POSTSHIFT, scale->dest->width * 4);
1276     }
1277   }
1278 }
1279
1280 void
1281 vs_image_scale_lanczos_AYUV_int16 (const VSImage * dest, const VSImage * src,
1282     uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1283     double sharpen)
1284 {
1285   Scale s = { 0 };
1286   Scale *scale = &s;
1287   int n_taps;
1288
1289   scale->dest = dest;
1290   scale->src = src;
1291
1292   n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1293   n_taps = ROUND_UP_4 (n_taps);
1294   scale1d_calculate_taps_int16 (&scale->x_scale1d,
1295       src->width, dest->width, n_taps, a, sharpness, sharpen, S16_SHIFT1);
1296
1297   n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1298   scale1d_calculate_taps_int16 (&scale->y_scale1d,
1299       src->height, dest->height, n_taps, a, sharpness, sharpen, S16_SHIFT2);
1300
1301   scale->dither = dither;
1302
1303   switch (scale->x_scale1d.n_taps) {
1304     case 4:
1305       scale->horiz_resample_func =
1306           (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps4_shift0;
1307       break;
1308     case 8:
1309       scale->horiz_resample_func =
1310           (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps8_shift0;
1311       break;
1312     case 12:
1313       scale->horiz_resample_func =
1314           (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps12_shift0;
1315       break;
1316     case 16:
1317       scale->horiz_resample_func =
1318           (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps16_shift0;
1319       break;
1320     default:
1321       scale->horiz_resample_func =
1322           (HorizResampleFunc) resample_horiz_int16_int16_ayuv_generic;
1323       break;
1324   }
1325
1326   scale->tmpdata =
1327       g_malloc (sizeof (gint16) * scale->dest->width * scale->src->height * 4);
1328
1329   vs_scale_lanczos_AYUV_int16 (scale);
1330
1331   scale1d_cleanup (&scale->x_scale1d);
1332   scale1d_cleanup (&scale->y_scale1d);
1333   g_free (scale->tmpdata);
1334 }
1335
1336
1337 static void
1338 vs_scale_lanczos_AYUV_int32 (Scale * scale)
1339 {
1340   int j;
1341   int yi;
1342   int tmp_yi;
1343
1344   tmp_yi = 0;
1345
1346   for (j = 0; j < scale->dest->height; j++) {
1347     guint8 *destline;
1348     gint32 *taps;
1349
1350     destline = scale->dest->pixels + scale->dest->stride * j;
1351
1352     yi = scale->y_scale1d.offsets[j];
1353
1354     while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1355       scale->horiz_resample_func (TMP_LINE_S32_AYUV (tmp_yi),
1356           scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1357           scale->x_scale1d.n_taps, S32_MIDSHIFT, scale->dest->width);
1358       tmp_yi++;
1359     }
1360
1361     taps = (gint32 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1362     if (scale->dither) {
1363       resample_vert_dither_int32_generic (destline,
1364           taps, TMP_LINE_S32_AYUV (scale->y_scale1d.offsets[j]),
1365           sizeof (gint32) * 4 * scale->dest->width, scale->y_scale1d.n_taps,
1366           S32_POSTSHIFT, scale->dest->width * 4);
1367     } else {
1368       resample_vert_int32_generic (destline,
1369           taps, TMP_LINE_S32_AYUV (scale->y_scale1d.offsets[j]),
1370           sizeof (gint32) * 4 * scale->dest->width, scale->y_scale1d.n_taps,
1371           S32_POSTSHIFT, scale->dest->width * 4);
1372     }
1373   }
1374 }
1375
1376 void
1377 vs_image_scale_lanczos_AYUV_int32 (const VSImage * dest, const VSImage * src,
1378     uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1379     double sharpen)
1380 {
1381   Scale s = { 0 };
1382   Scale *scale = &s;
1383   int n_taps;
1384
1385   scale->dest = dest;
1386   scale->src = src;
1387
1388   n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1389   n_taps = ROUND_UP_4 (n_taps);
1390   scale1d_calculate_taps_int32 (&scale->x_scale1d,
1391       src->width, dest->width, n_taps, a, sharpness, sharpen, S32_SHIFT1);
1392
1393   n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1394   scale1d_calculate_taps_int32 (&scale->y_scale1d,
1395       src->height, dest->height, n_taps, a, sharpness, sharpen, S32_SHIFT2);
1396
1397   scale->dither = dither;
1398
1399   switch (scale->x_scale1d.n_taps) {
1400     case 4:
1401       scale->horiz_resample_func =
1402           (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps4_shift0;
1403       break;
1404     case 8:
1405       scale->horiz_resample_func =
1406           (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps8_shift0;
1407       break;
1408     case 12:
1409       scale->horiz_resample_func =
1410           (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps12_shift0;
1411       break;
1412     case 16:
1413       scale->horiz_resample_func =
1414           (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps16_shift0;
1415       break;
1416     default:
1417       scale->horiz_resample_func =
1418           (HorizResampleFunc) resample_horiz_int32_int32_ayuv_generic;
1419       break;
1420   }
1421
1422   scale->tmpdata =
1423       g_malloc (sizeof (int32_t) * scale->dest->width * scale->src->height * 4);
1424
1425   vs_scale_lanczos_AYUV_int32 (scale);
1426
1427   scale1d_cleanup (&scale->x_scale1d);
1428   scale1d_cleanup (&scale->y_scale1d);
1429   g_free (scale->tmpdata);
1430 }
1431
1432 static void
1433 vs_scale_lanczos_AYUV_double (Scale * scale)
1434 {
1435   int j;
1436   int yi;
1437   int tmp_yi;
1438
1439   tmp_yi = 0;
1440
1441   for (j = 0; j < scale->dest->height; j++) {
1442     guint8 *destline;
1443     double *taps;
1444
1445     destline = scale->dest->pixels + scale->dest->stride * j;
1446
1447     yi = scale->y_scale1d.offsets[j];
1448
1449     while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1450       scale->horiz_resample_func (TMP_LINE_DOUBLE_AYUV (tmp_yi),
1451           scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1452           scale->x_scale1d.n_taps, 0, scale->dest->width);
1453       tmp_yi++;
1454     }
1455
1456     taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1457     if (scale->dither) {
1458       resample_vert_dither_double_generic (destline,
1459           taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]),
1460           sizeof (double) * 4 * scale->dest->width,
1461           scale->y_scale1d.n_taps, 0, scale->dest->width * 4);
1462     } else {
1463       resample_vert_double_generic (destline,
1464           taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]),
1465           sizeof (double) * 4 * scale->dest->width,
1466           scale->y_scale1d.n_taps, 0, scale->dest->width * 4);
1467     }
1468   }
1469 }
1470
1471 void
1472 vs_image_scale_lanczos_AYUV_double (const VSImage * dest, const VSImage * src,
1473     uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1474     double sharpen)
1475 {
1476   Scale s = { 0 };
1477   Scale *scale = &s;
1478   int n_taps;
1479
1480   scale->dest = dest;
1481   scale->src = src;
1482
1483   n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1484   scale1d_calculate_taps (&scale->x_scale1d,
1485       src->width, dest->width, n_taps, a, sharpness, sharpen);
1486
1487   n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1488   scale1d_calculate_taps (&scale->y_scale1d,
1489       src->height, dest->height, n_taps, a, sharpness, sharpen);
1490
1491   scale->dither = dither;
1492
1493   scale->horiz_resample_func =
1494       (HorizResampleFunc) resample_horiz_double_ayuv_generic;
1495
1496   scale->tmpdata =
1497       g_malloc (sizeof (double) * scale->dest->width * scale->src->height * 4);
1498
1499   vs_scale_lanczos_AYUV_double (scale);
1500
1501   scale1d_cleanup (&scale->x_scale1d);
1502   scale1d_cleanup (&scale->y_scale1d);
1503   g_free (scale->tmpdata);
1504 }
1505
1506 static void
1507 vs_scale_lanczos_AYUV_float (Scale * scale)
1508 {
1509   int j;
1510   int yi;
1511   int tmp_yi;
1512
1513   tmp_yi = 0;
1514
1515   for (j = 0; j < scale->dest->height; j++) {
1516     guint8 *destline;
1517     float *taps;
1518
1519     destline = scale->dest->pixels + scale->dest->stride * j;
1520
1521     yi = scale->y_scale1d.offsets[j];
1522
1523     while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1524       scale->horiz_resample_func (TMP_LINE_FLOAT_AYUV (tmp_yi),
1525           scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1526           scale->x_scale1d.n_taps, 0, scale->dest->width);
1527       tmp_yi++;
1528     }
1529
1530     taps = (float *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1531     if (scale->dither) {
1532       resample_vert_dither_float_generic (destline,
1533           taps, TMP_LINE_FLOAT_AYUV (scale->y_scale1d.offsets[j]),
1534           sizeof (float) * 4 * scale->dest->width, scale->y_scale1d.n_taps, 0,
1535           scale->dest->width * 4);
1536     } else {
1537       resample_vert_float_generic (destline,
1538           taps, TMP_LINE_FLOAT_AYUV (scale->y_scale1d.offsets[j]),
1539           sizeof (float) * 4 * scale->dest->width, scale->y_scale1d.n_taps, 0,
1540           scale->dest->width * 4);
1541     }
1542   }
1543 }
1544
1545 void
1546 vs_image_scale_lanczos_AYUV_float (const VSImage * dest, const VSImage * src,
1547     uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1548     double sharpen)
1549 {
1550   Scale s = { 0 };
1551   Scale *scale = &s;
1552   int n_taps;
1553
1554   scale->dest = dest;
1555   scale->src = src;
1556
1557   n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1558   scale1d_calculate_taps_float (&scale->x_scale1d,
1559       src->width, dest->width, n_taps, a, sharpness, sharpen);
1560
1561   n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1562   scale1d_calculate_taps_float (&scale->y_scale1d,
1563       src->height, dest->height, n_taps, a, sharpness, sharpen);
1564
1565   scale->dither = dither;
1566
1567   scale->horiz_resample_func =
1568       (HorizResampleFunc) resample_horiz_float_ayuv_generic;
1569
1570   scale->tmpdata =
1571       g_malloc (sizeof (float) * scale->dest->width * scale->src->height * 4);
1572
1573   vs_scale_lanczos_AYUV_float (scale);
1574
1575   scale1d_cleanup (&scale->x_scale1d);
1576   scale1d_cleanup (&scale->y_scale1d);
1577   g_free (scale->tmpdata);
1578 }
1579
1580 static void
1581 vs_scale_lanczos_AYUV64_double (Scale * scale)
1582 {
1583   int j;
1584   int yi;
1585   int tmp_yi;
1586
1587   tmp_yi = 0;
1588
1589   for (j = 0; j < scale->dest->height; j++) {
1590     guint16 *destline;
1591     double *taps;
1592
1593     destline = (guint16 *) (scale->dest->pixels + scale->dest->stride * j);
1594
1595     yi = scale->y_scale1d.offsets[j];
1596
1597     while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1598       scale->horiz_resample_func (TMP_LINE_DOUBLE_AYUV (tmp_yi),
1599           scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1600           scale->x_scale1d.n_taps, 0, scale->dest->width);
1601       tmp_yi++;
1602     }
1603
1604     taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1605     if (scale->dither) {
1606       resample_vert_dither_double_generic_u16 (destline,
1607           taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]),
1608           sizeof (double) * 4 * scale->dest->width,
1609           scale->y_scale1d.n_taps, 0, scale->dest->width * 4);
1610     } else {
1611       resample_vert_double_generic_u16 (destline,
1612           taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]),
1613           sizeof (double) * 4 * scale->dest->width,
1614           scale->y_scale1d.n_taps, 0, scale->dest->width * 4);
1615     }
1616   }
1617 }
1618
1619 void
1620 vs_image_scale_lanczos_AYUV64_double (const VSImage * dest, const VSImage * src,
1621     uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1622     double sharpen)
1623 {
1624   Scale s = { 0 };
1625   Scale *scale = &s;
1626   int n_taps;
1627
1628   scale->dest = dest;
1629   scale->src = src;
1630
1631   n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1632   scale1d_calculate_taps (&scale->x_scale1d,
1633       src->width, dest->width, n_taps, a, sharpness, sharpen);
1634
1635   n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1636   scale1d_calculate_taps (&scale->y_scale1d,
1637       src->height, dest->height, n_taps, a, sharpness, sharpen);
1638
1639   scale->dither = dither;
1640
1641   scale->horiz_resample_func =
1642       (HorizResampleFunc) resample_horiz_double_ayuv_generic_s16;
1643
1644   scale->tmpdata =
1645       g_malloc (sizeof (double) * scale->dest->width * scale->src->height * 4);
1646
1647   vs_scale_lanczos_AYUV64_double (scale);
1648
1649   scale1d_cleanup (&scale->x_scale1d);
1650   scale1d_cleanup (&scale->y_scale1d);
1651   g_free (scale->tmpdata);
1652 }