--- /dev/null
+/*
+ * Image Scaling Functions
+ * Copyright (c) 2011 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ *
+ * Modified Lanczos scaling algorithm
+ * ==================================
+ *
+ * This algorithm was developed by the author. The primary goals of
+ * the algorithm are high-quality video downscaling for medium scale
+ * factors (in the range of 1.3x to 5.0x) using methods that can be
+ * converted to SIMD code. Concerns with existing algorithms were
+ * mainly related to either over-soft filtering (Lanczos) or aliasing
+ * (bilinear or any other method with inadequate sampling).
+ *
+ * The problems with bilinear scaling are apparent when downscaling
+ * more than a factor of 2. For example, when downscaling by a factor
+ * of 3, only two-thirds of the input pixels contribute to the output
+ * pixels. This is only considering scaling in one direction; after
+ * scaling both vertically and horizontally in a 2-D image, fewer than
+ * half of the input pixels contribute to the output, so it should not
+ * be surprising that the output is suboptimal.
+ *
+ * The problems with Lanczos scaling are more subtle. From a theoretical
+ * perspective, Lanczos is an optimal algorithm for resampling equally-
+ * spaced values. This theoretical perspective is based on analysis
+ * done in frequency space, thus, Lanczos works very well for audio
+ * resampling, since the ear hears primarily in frequency space. The
+ * human visual system is sensitive primarily in the spatial domain,
+ * therefore any resampling algorithm should take this into account.
+ * This difference is immediately clear in the size of resampling
+ * window or envelope that is chosen for resampling: for audio, an
+ * envelope of a=64 is typical, in image scaling, the envelope is
+ * usually a=2 or a=3.
+ *
+ * One result of the HVS being sensitive in the spatial domain (and
+ * also probably due to oversampling capabilities of the retina and
+ * visual cortex) is that it is less sensitive to the exact magnitude
+ * of high-frequency visual signals than to the appropriate amount of
+ * energy in the nearby frequency band. A Lanczos kernel with a=2
+ * or a=3 strongly decreases the amount of energy in the high frequency
+ * bands. The energy in this area can be increased by increasing a,
+ * which brings in energy from different areas of the image (bad for
+ * reasons mentioned above), or by oversampling the input data. We
+ * have chosen two methods for doing the latter. Firstly, there is
+ * a sharpness parameter, which increases the cutoff frequency of the
+ * filter, aliasing higher frequency noise into the passband. And
+ * secondly, there is the sharpen parameter, which increases the
+ * contribution of high-frequency (but in-band) components.
+ *
+ * An alternate explanation of the usefulness of a sharpening filter
+ * is that many natural images have a roughly 1/f spectrum. In order
+ * for a downsampled image to look more "natural" when high frequencies
+ * are removed, the frequencies in the pass band near the cutoff
+ * frequency are amplified, causing the spectrum to be more roughly
+ * 1/f. I said "roughly", not "literally".
+ *
+ * This alternate explanation is useful for understanding the author's
+ * secondary motivation for developing this algorithm, namely, as a
+ * method of video compression. Several recent techniques (such as
+ * HTTP Live Streaming and SVC) use image scaling as a method to get
+ * increased compression out of nominally non-scalable codecs such as
+ * H.264. For optimal quality, it is thusly important to consider
+ * the scaler and encoder as a combined unit. Tuning of the sharpness
+ * and sharpen parameters was performed using the Toro encoder tuner,
+ * where scaled and encoded video was compared to unscaled and encoded
+ * video. This tuning suggested values that were very close to the
+ * values chosen by manual inspection of scaled images and video.
+ *
+ * The optimal values of sharpen and sharpness were slightly different
+ * depending whether the comparison was still images or video. Video
+ * comparisons were more sensitive to aliasing, since the aliasing
+ * artifacts tended to move or "crawl" around the video. The default
+ * values are for video; image scaling may prefer higher values.
+ *
+ * A number of related techniques were rejected for various reasons.
+ * An early technique of selecting the sharpness factor locally based
+ * on edge detection (in order to use a higher sharpness values without
+ * the corresponding aliasing on edges) worked very well for still
+ * images, but caused too much "crawling" on textures in video. Also,
+ * this method is slow, as it does not parallelize well.
+ *
+ * Non-separable techniques were rejected because the fastest would
+ * have been at least 4x slower.
+ *
+ * It is infrequently appreciated that image scaling should ideally be
+ * done in linear light space. Converting to linear light space has
+ * a similar effect to a sharpening filter. This approach was not
+ * taken because the added benefit is minor compared to the additional
+ * computational cost. Morever, the benefit is decreased by increasing
+ * the strength of the sharpening filter.
+ *
+ */
+#include <string.h>
+
+#include "vs_scanline.h"
+#include "vs_image.h"
+
+#include "gstvideoscaleorc.h"
+#include <gst/gst.h>
+#include <math.h>
+
+#define NEED_CLAMP(x,a,b) ((x) < (a) || (x) > (b))
+
+#define ROUND_UP_2(x) (((x)+1)&~1)
+#define ROUND_UP_4(x) (((x)+3)&~3)
+#define ROUND_UP_8(x) (((x)+7)&~7)
+
+#define SRC_LINE(i) (scale->src->pixels + scale->src->stride * (i))
+
+#define TMP_LINE_S16(i) ((gint16 *)scale->tmpdata + (i)*(scale->dest->width))
+#define TMP_LINE_S32(i) ((gint32 *)scale->tmpdata + (i)*(scale->dest->width))
+#define TMP_LINE_FLOAT(i) ((float *)scale->tmpdata + (i)*(scale->dest->width))
+#define TMP_LINE_DOUBLE(i) ((double *)scale->tmpdata + (i)*(scale->dest->width))
+#define TMP_LINE_S16_AYUV(i) ((gint16 *)scale->tmpdata + (i)*4*(scale->dest->width))
+#define TMP_LINE_S32_AYUV(i) ((gint32 *)scale->tmpdata + (i)*4*(scale->dest->width))
+#define TMP_LINE_FLOAT_AYUV(i) ((float *)scale->tmpdata + (i)*4*(scale->dest->width))
+#define TMP_LINE_DOUBLE_AYUV(i) ((double *)scale->tmpdata + (i)*4*(scale->dest->width))
+
+#define PTR_OFFSET(a,b) ((void *)((char *)(a) + (b)))
+
+typedef void (*HorizResampleFunc) (void *dest, const gint32 * offsets,
+ const void *taps, const void *src, int n_taps, int shift, int n);
+
+typedef struct _Scale1D Scale1D;
+struct _Scale1D
+{
+ int n;
+ double offset;
+ double scale;
+
+ double fx;
+ double ex;
+ int dx;
+
+ int n_taps;
+ gint32 *offsets;
+ void *taps;
+};
+
+typedef struct _Scale Scale;
+struct _Scale
+{
+ const VSImage *dest;
+ const VSImage *src;
+
+ double sharpness;
+ gboolean dither;
+
+ void *tmpdata;
+
+ HorizResampleFunc horiz_resample_func;
+
+ Scale1D x_scale1d;
+ Scale1D y_scale1d;
+};
+
+static void
+vs_image_scale_lanczos_Y_int16 (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
+ double sharpen);
+static void vs_image_scale_lanczos_Y_int32 (const VSImage * dest,
+ const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
+ double a, double sharpen);
+static void vs_image_scale_lanczos_Y_float (const VSImage * dest,
+ const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
+ double a, double sharpen);
+static void vs_image_scale_lanczos_Y_double (const VSImage * dest,
+ const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
+ double a, double sharpen);
+static void
+vs_image_scale_lanczos_AYUV_int16 (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
+ double sharpen);
+static void vs_image_scale_lanczos_AYUV_int32 (const VSImage * dest,
+ const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
+ double a, double sharpen);
+static void vs_image_scale_lanczos_AYUV_float (const VSImage * dest,
+ const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
+ double a, double sharpen);
+static void vs_image_scale_lanczos_AYUV_double (const VSImage * dest,
+ const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
+ double a, double sharpen);
+
+static double
+sinc (double x)
+{
+ if (x == 0)
+ return 1;
+ return sin (G_PI * x) / (G_PI * x);
+}
+
+static double
+envelope (double x)
+{
+ if (x <= -1 || x >= 1)
+ return 0;
+ return sinc (x);
+}
+
+static int
+scale1d_get_n_taps (int src_size, int dest_size, double a, double sharpness)
+{
+ double scale;
+ double fx;
+ int dx;
+
+ scale = src_size / (double) dest_size;
+ if (scale > 1.0) {
+ fx = (1.0 / scale) * sharpness;
+ } else {
+ fx = (1.0) * sharpness;
+ }
+ dx = ceil (a / fx);
+
+ return 2 * dx;
+}
+
+static void
+scale1d_cleanup (Scale1D * scale)
+{
+ g_free (scale->taps);
+ g_free (scale->offsets);
+}
+
+/*
+ * Calculates a set of taps for each destination element in double
+ * format. Each set of taps sums to 1.0.
+ *
+ */
+static void
+scale1d_calculate_taps (Scale1D * scale, int src_size, int dest_size,
+ int n_taps, double a, double sharpness, double sharpen)
+{
+ int j;
+ double *tap_array;
+ gint32 *offsets;
+ double scale_offset;
+ double scale_increment;
+ int dx;
+ double fx;
+ double ex;
+
+ scale->scale = src_size / (double) dest_size;
+ scale->offset = scale->scale / 2 - 0.5;
+
+ if (scale->scale > 1.0) {
+ scale->fx = (1.0 / scale->scale) * sharpness;
+ } else {
+ scale->fx = (1.0) * sharpness;
+ }
+ scale->ex = scale->fx / a;
+ scale->dx = ceil (a / scale->fx);
+
+ g_assert (n_taps >= 2 * scale->dx);
+ scale->n_taps = n_taps;
+
+ scale->taps = g_malloc (sizeof (double) * scale->n_taps * dest_size);
+ scale->offsets = g_malloc (sizeof (gint32) * dest_size);
+ tap_array = scale->taps;
+ offsets = scale->offsets;
+
+ scale_offset = scale->offset;
+ scale_increment = scale->scale;
+ dx = scale->dx;
+ fx = scale->fx;
+ ex = scale->ex;
+
+ for (j = 0; j < dest_size; j++) {
+ double x;
+ int xi;
+ int l;
+ double weight;
+ double *taps;
+
+ x = scale_offset + scale_increment * j;
+ x = CLAMP (x, 0, src_size);
+ xi = ceil (x) - dx;
+
+ offsets[j] = xi;
+ weight = 0;
+ taps = tap_array + j * n_taps;
+
+ for (l = 0; l < n_taps; l++) {
+ int xl = xi + l;
+ taps[l] = sinc ((x - xl) * fx) * envelope ((x - xl) * ex);
+ taps[l] -= sharpen * envelope ((x - xl) * ex);
+ weight += taps[l];
+ }
+ g_assert (envelope ((x - (xi - 1)) * ex) == 0);
+ g_assert (envelope ((x - (xi + n_taps)) * ex) == 0);
+ for (l = 0; l < n_taps; l++) {
+ taps[l] /= weight;
+ }
+
+ if (xi < 0) {
+ int shift = -xi;
+
+ for (l = 0; l < shift; l++) {
+ taps[shift] += taps[l];
+ }
+ for (l = 0; l < n_taps - shift; l++) {
+ taps[l] = taps[shift + l];
+ }
+ for (; l < n_taps; l++) {
+ taps[l] = 0;
+ }
+ offsets[j] += shift;
+ }
+
+ if (xi > src_size - n_taps) {
+ int shift = xi - (src_size - n_taps);
+
+ for (l = 0; l < shift; l++) {
+ taps[n_taps - shift - 1] += taps[n_taps - shift + l];
+ }
+ for (l = 0; l < n_taps - shift; l++) {
+ taps[n_taps - 1 - l] = taps[n_taps - 1 - shift - l];
+ }
+ for (l = 0; l < shift; l++) {
+ taps[l] = 0;
+ }
+ offsets[j] -= shift;
+ }
+ }
+}
+
+/*
+ * Calculates a set of taps for each destination element in float
+ * format. Each set of taps sums to 1.0.
+ */
+static void
+scale1d_calculate_taps_float (Scale1D * scale, int src_size, int dest_size,
+ int n_taps, double a, double sharpness, double sharpen)
+{
+ double *taps_d;
+ float *taps_f;
+ int j;
+
+ scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness,
+ sharpen);
+
+ taps_d = scale->taps;
+ taps_f = g_malloc (sizeof (float) * scale->n_taps * dest_size);
+
+ for (j = 0; j < dest_size * n_taps; j++) {
+ taps_f[j] = taps_d[j];
+ }
+
+ g_free (taps_d);
+ scale->taps = taps_f;
+}
+
+/*
+ * Calculates a set of taps for each destination element in gint32
+ * format. Each set of taps sums to (very nearly) (1<<shift). A
+ * typical value for shift is 10 to 15, so that applying the taps to
+ * uint8 values and summing will fit in a (signed) int32.
+ */
+static void
+scale1d_calculate_taps_int32 (Scale1D * scale, int src_size, int dest_size,
+ int n_taps, double a, double sharpness, double sharpen, int shift)
+{
+ double *taps_d;
+ gint32 *taps_i;
+ int i;
+ int j;
+ double multiplier;
+
+ scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness,
+ sharpen);
+
+ taps_d = scale->taps;
+ taps_i = g_malloc (sizeof (gint32) * scale->n_taps * dest_size);
+
+ multiplier = (1 << shift);
+
+ for (j = 0; j < dest_size; j++) {
+ for (i = 0; i < n_taps; i++) {
+ taps_i[j * n_taps + i] =
+ floor (0.5 + taps_d[j * n_taps + i] * multiplier);
+ }
+ }
+
+ g_free (taps_d);
+ scale->taps = taps_i;
+}
+
+/*
+ * Calculates a set of taps for each destination element in gint16
+ * format. Each set of taps sums to (1<<shift). A typical value
+ * for shift is 7, so that applying the taps to uint8 values and
+ * summing will fit in a (signed) int16.
+ */
+static void
+scale1d_calculate_taps_int16 (Scale1D * scale, int src_size, int dest_size,
+ int n_taps, double a, double sharpness, double sharpen, int shift)
+{
+ double *taps_d;
+ gint16 *taps_i;
+ int i;
+ int j;
+ double multiplier;
+
+ scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness,
+ sharpen);
+
+ taps_d = scale->taps;
+ taps_i = g_malloc (sizeof (gint16) * scale->n_taps * dest_size);
+
+ multiplier = (1 << shift);
+
+ /* Various methods for converting floating point taps to integer.
+ * The dB values are the SSIM value between scaling an image via
+ * the floating point pathway vs. the integer pathway using the
+ * given code to generate the taps. Only one image was tested,
+ * scaling from 1920x1080 to 640x360. Several variations of the
+ * methods were also tested, with nothing appearing useful. */
+#if 0
+ /* Standard round to integer. This causes bad DC errors. */
+ /* 44.588 dB */
+ for (j = 0; j < dest_size; j++) {
+ for (i = 0; i < n_taps; i++) {
+ taps_i[j * n_taps + i] =
+ floor (0.5 + taps_d[j * n_taps + i] * multiplier);
+ }
+ }
+#endif
+#if 0
+ /* Dithering via error propogation. Works pretty well, but
+ * really we want to propogate errors across rows, which would
+ * mean having several sets of tap arrays. Possible, but more work,
+ * and it may not even be better. */
+ /* 57.0961 dB */
+ {
+ double err = 0;
+ for (j = 0; j < dest_size; j++) {
+ for (i = 0; i < n_taps; i++) {
+ err += taps_d[j * n_taps + i] * multiplier;
+ taps_i[j * n_taps + i] = floor (err);
+ err -= floor (err);
+ }
+ }
+ }
+#endif
+#if 1
+ /* Round to integer, but with an adjustable bias that we use to
+ * eliminate the DC error. This search method is a bit crude, and
+ * could perhaps be improved somewhat. */
+ /* 60.4851 dB */
+ for (j = 0; j < dest_size; j++) {
+ int k;
+ for (k = 0; k < 100; k++) {
+ int sum = 0;
+ double offset;
+
+ offset = k * 0.01;
+ for (i = 0; i < n_taps; i++) {
+ taps_i[j * n_taps + i] =
+ floor (offset + taps_d[j * n_taps + i] * multiplier);
+ sum += taps_i[j * n_taps + i];
+ }
+
+ if (sum >= (1 << shift))
+ break;
+ }
+ }
+#endif
+#if 0
+ /* Round to integer, but adjust the multiplier. The search method is
+ * wrong a lot, but was sufficient enough to calculate dB error. */
+ /* 58.6517 dB */
+ for (j = 0; j < dest_size; j++) {
+ int k;
+ int sum = 0;
+ for (k = 0; k < 200; k++) {
+ sum = 0;
+
+ multiplier = (1 << shift) - 1.0 + k * 0.01;
+ for (i = 0; i < n_taps; i++) {
+ taps_i[j * n_taps + i] =
+ floor (0.5 + taps_d[j * n_taps + i] * multiplier);
+ sum += taps_i[j * n_taps + i];
+ }
+
+ if (sum >= (1 << shift))
+ break;
+ }
+ if (sum != (1 << shift)) {
+ GST_ERROR ("%g %d", multiplier, sum);
+ }
+ }
+#endif
+#if 0
+ /* Round to integer, but subtract the error from the largest tap */
+ /* 58.3677 dB */
+ for (j = 0; j < dest_size; j++) {
+ int err = -multiplier;
+ for (i = 0; i < n_taps; i++) {
+ taps_i[j * n_taps + i] =
+ floor (0.5 + taps_d[j * n_taps + i] * multiplier);
+ err += taps_i[j * n_taps + i];
+ }
+ if (taps_i[j * n_taps + (n_taps / 2 - 1)] >
+ taps_i[j * n_taps + (n_taps / 2)]) {
+ taps_i[j * n_taps + (n_taps / 2 - 1)] -= err;
+ } else {
+ taps_i[j * n_taps + (n_taps / 2)] -= err;
+ }
+ }
+#endif
+
+ g_free (taps_d);
+ scale->taps = taps_i;
+}
+
+
+void
+vs_image_scale_lanczos_Y (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod,
+ double a, double sharpen)
+{
+ switch (submethod) {
+ case 0:
+ default:
+ vs_image_scale_lanczos_Y_int16 (dest, src, tmpbuf, sharpness, dither, a,
+ sharpen);
+ break;
+ case 1:
+ vs_image_scale_lanczos_Y_int32 (dest, src, tmpbuf, sharpness, dither, a,
+ sharpen);
+ break;
+ case 2:
+ vs_image_scale_lanczos_Y_float (dest, src, tmpbuf, sharpness, dither, a,
+ sharpen);
+ break;
+ case 3:
+ vs_image_scale_lanczos_Y_double (dest, src, tmpbuf, sharpness, dither, a,
+ sharpen);
+ break;
+ }
+}
+
+void
+vs_image_scale_lanczos_AYUV (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod,
+ double a, double sharpen)
+{
+ switch (submethod) {
+ case 0:
+ default:
+ vs_image_scale_lanczos_AYUV_int16 (dest, src, tmpbuf, sharpness, dither,
+ a, sharpen);
+ break;
+ case 1:
+ vs_image_scale_lanczos_AYUV_int32 (dest, src, tmpbuf, sharpness, dither,
+ a, sharpen);
+ break;
+ case 2:
+ vs_image_scale_lanczos_AYUV_float (dest, src, tmpbuf, sharpness, dither,
+ a, sharpen);
+ break;
+ case 3:
+ vs_image_scale_lanczos_AYUV_double (dest, src, tmpbuf, sharpness, dither,
+ a, sharpen);
+ break;
+ }
+}
+
+
+
+#define RESAMPLE_HORIZ_FLOAT(function, dest_type, tap_type, src_type, _n_taps) \
+static void \
+function (dest_type *dest, const gint32 *offsets, \
+ const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
+{ \
+ int i; \
+ int k; \
+ dest_type sum; \
+ const src_type *srcline; \
+ const tap_type *tapsline; \
+ for (i = 0; i < n; i++) { \
+ srcline = src + offsets[i]; \
+ tapsline = taps + i * _n_taps; \
+ sum = 0; \
+ for (k = 0; k < _n_taps; k++) { \
+ sum += srcline[k] * tapsline[k]; \
+ } \
+ dest[i] = sum; \
+ } \
+}
+
+#define RESAMPLE_HORIZ(function, dest_type, tap_type, src_type, _n_taps, _shift) \
+static void \
+function (dest_type *dest, const gint32 *offsets, \
+ const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
+{ \
+ int i; \
+ int k; \
+ dest_type sum; \
+ const src_type *srcline; \
+ const tap_type *tapsline; \
+ int offset; \
+ if (_shift > 0) offset = (1<<_shift)>>1; \
+ else offset = 0; \
+ for (i = 0; i < n; i++) { \
+ srcline = src + offsets[i]; \
+ tapsline = taps + i * _n_taps; \
+ sum = 0; \
+ for (k = 0; k < _n_taps; k++) { \
+ sum += srcline[k] * tapsline[k]; \
+ } \
+ dest[i] = (sum + offset) >> _shift; \
+ } \
+}
+
+#define RESAMPLE_HORIZ_AYUV_FLOAT(function, dest_type, tap_type, src_type, _n_taps) \
+static void \
+function (dest_type *dest, const gint32 *offsets, \
+ const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
+{ \
+ int i; \
+ int k; \
+ dest_type sum1; \
+ dest_type sum2; \
+ dest_type sum3; \
+ dest_type sum4; \
+ const src_type *srcline; \
+ const tap_type *tapsline; \
+ for (i = 0; i < n; i++) { \
+ srcline = src + 4*offsets[i]; \
+ tapsline = taps + i * _n_taps; \
+ sum1 = 0; \
+ sum2 = 0; \
+ sum3 = 0; \
+ sum4 = 0; \
+ for (k = 0; k < _n_taps; k++) { \
+ sum1 += srcline[k*4+0] * tapsline[k]; \
+ sum2 += srcline[k*4+1] * tapsline[k]; \
+ sum3 += srcline[k*4+2] * tapsline[k]; \
+ sum4 += srcline[k*4+3] * tapsline[k]; \
+ } \
+ dest[i*4+0] = sum1; \
+ dest[i*4+1] = sum2; \
+ dest[i*4+2] = sum3; \
+ dest[i*4+3] = sum4; \
+ } \
+}
+
+#define RESAMPLE_HORIZ_AYUV(function, dest_type, tap_type, src_type, _n_taps, _shift) \
+static void \
+function (dest_type *dest, const gint32 *offsets, \
+ const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
+{ \
+ int i; \
+ int k; \
+ dest_type sum1; \
+ dest_type sum2; \
+ dest_type sum3; \
+ dest_type sum4; \
+ const src_type *srcline; \
+ const tap_type *tapsline; \
+ int offset; \
+ if (_shift > 0) offset = (1<<_shift)>>1; \
+ else offset = 0; \
+ for (i = 0; i < n; i++) { \
+ srcline = src + 4*offsets[i]; \
+ tapsline = taps + i * _n_taps; \
+ sum1 = 0; \
+ sum2 = 0; \
+ sum3 = 0; \
+ sum4 = 0; \
+ for (k = 0; k < _n_taps; k++) { \
+ sum1 += srcline[k*4+0] * tapsline[k]; \
+ sum2 += srcline[k*4+1] * tapsline[k]; \
+ sum3 += srcline[k*4+2] * tapsline[k]; \
+ sum4 += srcline[k*4+3] * tapsline[k]; \
+ } \
+ dest[i*4+0] = (sum1 + offset) >> _shift; \
+ dest[i*4+1] = (sum2 + offset) >> _shift; \
+ dest[i*4+2] = (sum3 + offset) >> _shift; \
+ dest[i*4+3] = (sum4 + offset) >> _shift; \
+ } \
+}
+
+/* *INDENT-OFF* */
+RESAMPLE_HORIZ_FLOAT (resample_horiz_double_u8_generic, double, double,
+ guint8, n_taps)
+RESAMPLE_HORIZ_FLOAT (resample_horiz_float_u8_generic, float, float,
+ guint8, n_taps)
+RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_double_ayuv_generic, double, double,
+ guint8, n_taps)
+RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_float_ayuv_generic, float, float,
+ guint8, n_taps)
+
+RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_generic, gint32, gint32,
+ guint8, n_taps, shift)
+RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_generic, gint16, gint16,
+ guint8, n_taps, shift)
+RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_generic, gint32, gint32,
+ guint8, n_taps, shift)
+RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_generic, gint16, gint16,
+ guint8, n_taps, shift)
+
+/* Candidates for orcification */
+RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps16_shift0, gint32, gint32,
+ guint8, 16, 0)
+RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps12_shift0, gint32, gint32,
+ guint8, 12, 0)
+RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps8_shift0, gint32, gint32,
+ guint8, 8, 0)
+RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps4_shift0, gint32, gint32,
+ guint8, 4, 0)
+RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps16_shift0, gint16, gint16,
+ guint8, 16, 0)
+RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps12_shift0, gint16, gint16,
+ guint8, 12, 0)
+RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps8_shift0, gint16, gint16,
+ guint8, 8, 0)
+RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps4_shift0, gint16, gint16,
+ guint8, 4, 0)
+
+RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps16_shift0, gint32, gint32,
+ guint8, 16, 0)
+RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps12_shift0, gint32, gint32,
+ guint8, 12, 0)
+RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps8_shift0, gint32, gint32,
+ guint8, 8, 0)
+RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps4_shift0, gint32, gint32,
+ guint8, 4, 0)
+RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps16_shift0, gint16, gint16,
+ guint8, 16, 0)
+RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps12_shift0, gint16, gint16,
+ guint8, 12, 0)
+RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps8_shift0, gint16, gint16,
+ guint8, 8, 0)
+RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps4_shift0, gint16, gint16,
+ guint8, 4, 0)
+/* *INDENT-ON* */
+
+#define RESAMPLE_VERT(function, tap_type, src_type, _n_taps, _shift) \
+static void \
+function (guint8 *dest, \
+ const tap_type *taps, const src_type *src, int stride, int n_taps, \
+ int shift, int n) \
+{ \
+ int i; \
+ int l; \
+ gint32 sum_y; \
+ gint32 offset = (1<<_shift) >> 1; \
+ for (i = 0; i < n; i++) { \
+ sum_y = 0; \
+ for (l = 0; l < n_taps; l++) { \
+ const src_type *line = PTR_OFFSET(src, stride * l); \
+ sum_y += line[i] * taps[l]; \
+ } \
+ dest[i] = CLAMP ((sum_y + offset) >> _shift, 0, 255); \
+ } \
+}
+
+#define RESAMPLE_VERT_DITHER(function, tap_type, src_type, _n_taps, _shift) \
+static void \
+function (guint8 *dest, \
+ const tap_type *taps, const src_type *src, int stride, int n_taps, \
+ int shift, int n) \
+{ \
+ int i; \
+ int l; \
+ gint32 sum_y; \
+ gint32 err_y = 0; \
+ gint32 mask = (1<<_shift) - 1; \
+ for (i = 0; i < n; i++) { \
+ sum_y = 0; \
+ for (l = 0; l < n_taps; l++) { \
+ const src_type *line = PTR_OFFSET(src, stride * l); \
+ sum_y += line[i] * taps[l]; \
+ } \
+ err_y += sum_y; \
+ dest[i] = CLAMP (err_y >> _shift, 0, 255); \
+ err_y &= mask; \
+ } \
+}
+
+/* *INDENT-OFF* */
+RESAMPLE_VERT (resample_vert_int32_generic, gint32, gint32, n_taps, shift)
+RESAMPLE_VERT_DITHER (resample_vert_dither_int32_generic, gint32, gint32,
+ n_taps, shift)
+RESAMPLE_VERT (resample_vert_int16_generic, gint16, gint16, n_taps, shift);
+RESAMPLE_VERT_DITHER (resample_vert_dither_int16_generic, gint16, gint16,
+ n_taps, shift)
+/* *INDENT-ON* */
+
+#define RESAMPLE_VERT_FLOAT(function, tap_type, src_type, _n_taps, _shift) \
+static void \
+function (guint8 *dest, \
+ const tap_type *taps, const src_type *src, int stride, int n_taps, \
+ int shift, int n) \
+{ \
+ int i; \
+ int l; \
+ src_type sum_y; \
+ for (i = 0; i < n; i++) { \
+ sum_y = 0; \
+ for (l = 0; l < n_taps; l++) { \
+ const src_type *line = PTR_OFFSET(src, stride * l); \
+ sum_y += line[i] * taps[l]; \
+ } \
+ dest[i] = CLAMP (floor(0.5 + sum_y), 0, 255); \
+ } \
+}
+
+#define RESAMPLE_VERT_FLOAT_DITHER(function, tap_type, src_type, _n_taps, _shift) \
+static void \
+function (guint8 *dest, \
+ const tap_type *taps, const src_type *src, int stride, int n_taps, \
+ int shift, int n) \
+{ \
+ int i; \
+ int l; \
+ src_type sum_y; \
+ src_type err_y = 0; \
+ for (i = 0; i < n; i++) { \
+ sum_y = 0; \
+ for (l = 0; l < n_taps; l++) { \
+ const src_type *line = PTR_OFFSET(src, stride * l); \
+ sum_y += line[i] * taps[l]; \
+ } \
+ err_y += sum_y; \
+ dest[i] = CLAMP (floor (err_y), 0, 255); \
+ err_y -= floor (err_y); \
+ } \
+}
+
+/* *INDENT-OFF* */
+RESAMPLE_VERT_FLOAT (resample_vert_double_generic, double, double, n_taps,
+ shift)
+RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_double_generic, double, double,
+ n_taps, shift)
+
+RESAMPLE_VERT_FLOAT (resample_vert_float_generic, float, float, n_taps, shift)
+RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_float_generic, float, float,
+ n_taps, shift)
+/* *INDENT-ON* */
+
+#define S16_SHIFT1 7
+#define S16_SHIFT2 7
+#define S16_MIDSHIFT 0
+#define S16_POSTSHIFT (S16_SHIFT1+S16_SHIFT2-S16_MIDSHIFT)
+
+static void
+vs_scale_lanczos_Y_int16 (Scale * scale)
+{
+ int j;
+ int yi;
+ int tmp_yi;
+
+ tmp_yi = 0;
+
+ for (j = 0; j < scale->dest->height; j++) {
+ guint8 *destline;
+ gint16 *taps;
+
+ destline = scale->dest->pixels + scale->dest->stride * j;
+
+ yi = scale->y_scale1d.offsets[j];
+
+ while (tmp_yi < yi + scale->y_scale1d.n_taps) {
+ scale->horiz_resample_func (TMP_LINE_S16 (tmp_yi),
+ scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
+ scale->x_scale1d.n_taps, S16_MIDSHIFT, scale->dest->width);
+ tmp_yi++;
+ }
+
+ taps = (gint16 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
+ if (scale->dither) {
+ resample_vert_dither_int16_generic (destline,
+ taps, TMP_LINE_S16 (scale->y_scale1d.offsets[j]),
+ sizeof (gint16) * scale->dest->width, scale->y_scale1d.n_taps,
+ S16_POSTSHIFT, scale->dest->width);
+ } else {
+ resample_vert_int16_generic (destline,
+ taps, TMP_LINE_S16 (scale->y_scale1d.offsets[j]),
+ sizeof (gint16) * scale->dest->width, scale->y_scale1d.n_taps,
+ S16_POSTSHIFT, scale->dest->width);
+ }
+ }
+}
+
+void
+vs_image_scale_lanczos_Y_int16 (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
+ double sharpen)
+{
+ Scale s = { 0 };
+ Scale *scale = &s;
+ int n_taps;
+
+ scale->dest = dest;
+ scale->src = src;
+
+ n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
+ n_taps = ROUND_UP_4 (n_taps);
+ scale1d_calculate_taps_int16 (&scale->x_scale1d,
+ src->width, dest->width, n_taps, a, sharpness, sharpen, S16_SHIFT1);
+
+ n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
+ scale1d_calculate_taps_int16 (&scale->y_scale1d,
+ src->height, dest->height, n_taps, a, sharpness, sharpen, S16_SHIFT2);
+
+ scale->dither = dither;
+
+ switch (scale->x_scale1d.n_taps) {
+ case 4:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int16_int16_u8_taps4_shift0;
+ break;
+ case 8:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int16_int16_u8_taps8_shift0;
+ break;
+ case 12:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int16_int16_u8_taps12_shift0;
+ break;
+ case 16:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int16_int16_u8_taps16_shift0;
+ break;
+ default:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int16_int16_u8_generic;
+ break;
+ }
+
+ scale->tmpdata =
+ g_malloc (sizeof (gint16) * scale->dest->width * scale->src->height);
+
+ vs_scale_lanczos_Y_int16 (scale);
+
+ scale1d_cleanup (&scale->x_scale1d);
+ scale1d_cleanup (&scale->y_scale1d);
+ g_free (scale->tmpdata);
+}
+
+
+#define S32_SHIFT1 11
+#define S32_SHIFT2 11
+#define S32_MIDSHIFT 0
+#define S32_POSTSHIFT (S32_SHIFT1+S32_SHIFT2-S32_MIDSHIFT)
+
+static void
+vs_scale_lanczos_Y_int32 (Scale * scale)
+{
+ int j;
+ int yi;
+ int tmp_yi;
+
+ tmp_yi = 0;
+
+ for (j = 0; j < scale->dest->height; j++) {
+ guint8 *destline;
+ gint32 *taps;
+
+ destline = scale->dest->pixels + scale->dest->stride * j;
+
+ yi = scale->y_scale1d.offsets[j];
+
+ while (tmp_yi < yi + scale->y_scale1d.n_taps) {
+ scale->horiz_resample_func (TMP_LINE_S32 (tmp_yi),
+ scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
+ scale->x_scale1d.n_taps, S32_MIDSHIFT, scale->dest->width);
+ tmp_yi++;
+ }
+
+ taps = (gint32 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
+ if (scale->dither) {
+ resample_vert_dither_int32_generic (destline,
+ taps, TMP_LINE_S32 (scale->y_scale1d.offsets[j]),
+ sizeof (gint32) * scale->dest->width,
+ scale->y_scale1d.n_taps, S32_POSTSHIFT, scale->dest->width);
+ } else {
+ resample_vert_int32_generic (destline,
+ taps, TMP_LINE_S32 (scale->y_scale1d.offsets[j]),
+ sizeof (gint32) * scale->dest->width,
+ scale->y_scale1d.n_taps, S32_POSTSHIFT, scale->dest->width);
+ }
+ }
+}
+
+void
+vs_image_scale_lanczos_Y_int32 (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
+ double sharpen)
+{
+ Scale s = { 0 };
+ Scale *scale = &s;
+ int n_taps;
+
+ scale->dest = dest;
+ scale->src = src;
+
+ n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
+ n_taps = ROUND_UP_4 (n_taps);
+ scale1d_calculate_taps_int32 (&scale->x_scale1d,
+ src->width, dest->width, n_taps, a, sharpness, sharpen, S32_SHIFT1);
+
+ n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
+ scale1d_calculate_taps_int32 (&scale->y_scale1d,
+ src->height, dest->height, n_taps, a, sharpness, sharpen, S32_SHIFT2);
+
+ scale->dither = dither;
+
+ switch (scale->x_scale1d.n_taps) {
+ case 4:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int32_int32_u8_taps4_shift0;
+ break;
+ case 8:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int32_int32_u8_taps8_shift0;
+ break;
+ case 12:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int32_int32_u8_taps12_shift0;
+ break;
+ case 16:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int32_int32_u8_taps16_shift0;
+ break;
+ default:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int32_int32_u8_generic;
+ break;
+ }
+
+ scale->tmpdata =
+ g_malloc (sizeof (int32_t) * scale->dest->width * scale->src->height);
+
+ vs_scale_lanczos_Y_int32 (scale);
+
+ scale1d_cleanup (&scale->x_scale1d);
+ scale1d_cleanup (&scale->y_scale1d);
+ g_free (scale->tmpdata);
+}
+
+static void
+vs_scale_lanczos_Y_double (Scale * scale)
+{
+ int j;
+ int yi;
+ int tmp_yi;
+
+ tmp_yi = 0;
+
+ for (j = 0; j < scale->dest->height; j++) {
+ guint8 *destline;
+ double *taps;
+
+ destline = scale->dest->pixels + scale->dest->stride * j;
+
+ yi = scale->y_scale1d.offsets[j];
+
+ while (tmp_yi < yi + scale->y_scale1d.n_taps) {
+ scale->horiz_resample_func (TMP_LINE_DOUBLE (tmp_yi),
+ scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
+ scale->x_scale1d.n_taps, 0, scale->dest->width);
+ tmp_yi++;
+ }
+
+ taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
+ if (scale->dither) {
+ resample_vert_dither_double_generic (destline,
+ taps, TMP_LINE_DOUBLE (scale->y_scale1d.offsets[j]),
+ sizeof (double) * scale->dest->width,
+ scale->y_scale1d.n_taps, 0, scale->dest->width);
+ } else {
+ resample_vert_double_generic (destline,
+ taps, TMP_LINE_DOUBLE (scale->y_scale1d.offsets[j]),
+ sizeof (double) * scale->dest->width,
+ scale->y_scale1d.n_taps, 0, scale->dest->width);
+ }
+ }
+}
+
+void
+vs_image_scale_lanczos_Y_double (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
+ double sharpen)
+{
+ Scale s = { 0 };
+ Scale *scale = &s;
+ int n_taps;
+
+ scale->dest = dest;
+ scale->src = src;
+
+ n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
+ scale1d_calculate_taps (&scale->x_scale1d,
+ src->width, dest->width, n_taps, a, sharpness, sharpen);
+
+ n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
+ scale1d_calculate_taps (&scale->y_scale1d,
+ src->height, dest->height, n_taps, a, sharpness, sharpen);
+
+ scale->dither = dither;
+
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_double_u8_generic;
+
+ scale->tmpdata =
+ g_malloc (sizeof (double) * scale->dest->width * scale->src->height);
+
+ vs_scale_lanczos_Y_double (scale);
+
+ scale1d_cleanup (&scale->x_scale1d);
+ scale1d_cleanup (&scale->y_scale1d);
+ g_free (scale->tmpdata);
+}
+
+static void
+vs_scale_lanczos_Y_float (Scale * scale)
+{
+ int j;
+ int yi;
+ int tmp_yi;
+
+ tmp_yi = 0;
+
+ for (j = 0; j < scale->dest->height; j++) {
+ guint8 *destline;
+ float *taps;
+
+ destline = scale->dest->pixels + scale->dest->stride * j;
+
+ yi = scale->y_scale1d.offsets[j];
+
+ while (tmp_yi < yi + scale->y_scale1d.n_taps) {
+ scale->horiz_resample_func (TMP_LINE_FLOAT (tmp_yi),
+ scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
+ scale->x_scale1d.n_taps, 0, scale->dest->width);
+ tmp_yi++;
+ }
+
+ taps = (float *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
+ if (scale->dither) {
+ resample_vert_dither_float_generic (destline,
+ taps, TMP_LINE_FLOAT (scale->y_scale1d.offsets[j]),
+ sizeof (float) * scale->dest->width,
+ scale->y_scale1d.n_taps, 0, scale->dest->width);
+ } else {
+ resample_vert_float_generic (destline,
+ taps, TMP_LINE_FLOAT (scale->y_scale1d.offsets[j]),
+ sizeof (float) * scale->dest->width,
+ scale->y_scale1d.n_taps, 0, scale->dest->width);
+ }
+ }
+}
+
+void
+vs_image_scale_lanczos_Y_float (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
+ double sharpen)
+{
+ Scale s = { 0 };
+ Scale *scale = &s;
+ int n_taps;
+
+ scale->dest = dest;
+ scale->src = src;
+
+ n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
+ scale1d_calculate_taps_float (&scale->x_scale1d,
+ src->width, dest->width, n_taps, a, sharpness, sharpen);
+
+ n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
+ scale1d_calculate_taps_float (&scale->y_scale1d,
+ src->height, dest->height, n_taps, a, sharpness, sharpen);
+
+ scale->dither = dither;
+
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_float_u8_generic;
+
+ scale->tmpdata =
+ g_malloc (sizeof (float) * scale->dest->width * scale->src->height);
+
+ vs_scale_lanczos_Y_float (scale);
+
+ scale1d_cleanup (&scale->x_scale1d);
+ scale1d_cleanup (&scale->y_scale1d);
+ g_free (scale->tmpdata);
+}
+
+
+
+
+
+static void
+vs_scale_lanczos_AYUV_int16 (Scale * scale)
+{
+ int j;
+ int yi;
+ int tmp_yi;
+
+ tmp_yi = 0;
+
+ for (j = 0; j < scale->dest->height; j++) {
+ guint8 *destline;
+ gint16 *taps;
+
+ destline = scale->dest->pixels + scale->dest->stride * j;
+
+ yi = scale->y_scale1d.offsets[j];
+
+ while (tmp_yi < yi + scale->y_scale1d.n_taps) {
+ scale->horiz_resample_func (TMP_LINE_S16_AYUV (tmp_yi),
+ scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
+ scale->x_scale1d.n_taps, S16_MIDSHIFT, scale->dest->width);
+ tmp_yi++;
+ }
+
+ taps = (gint16 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
+ if (scale->dither) {
+ resample_vert_dither_int16_generic (destline,
+ taps, TMP_LINE_S16_AYUV (scale->y_scale1d.offsets[j]),
+ sizeof (gint16) * 4 * scale->dest->width,
+ scale->y_scale1d.n_taps, S16_POSTSHIFT, scale->dest->width * 4);
+ } else {
+ resample_vert_int16_generic (destline,
+ taps, TMP_LINE_S16_AYUV (scale->y_scale1d.offsets[j]),
+ sizeof (gint16) * 4 * scale->dest->width,
+ scale->y_scale1d.n_taps, S16_POSTSHIFT, scale->dest->width * 4);
+ }
+ }
+}
+
+void
+vs_image_scale_lanczos_AYUV_int16 (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
+ double sharpen)
+{
+ Scale s = { 0 };
+ Scale *scale = &s;
+ int n_taps;
+
+ scale->dest = dest;
+ scale->src = src;
+
+ n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
+ n_taps = ROUND_UP_4 (n_taps);
+ scale1d_calculate_taps_int16 (&scale->x_scale1d,
+ src->width, dest->width, n_taps, a, sharpness, sharpen, S16_SHIFT1);
+
+ n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
+ scale1d_calculate_taps_int16 (&scale->y_scale1d,
+ src->height, dest->height, n_taps, a, sharpness, sharpen, S16_SHIFT2);
+
+ scale->dither = dither;
+
+ switch (scale->x_scale1d.n_taps) {
+ case 4:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps4_shift0;
+ break;
+ case 8:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps8_shift0;
+ break;
+ case 12:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps12_shift0;
+ break;
+ case 16:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps16_shift0;
+ break;
+ default:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int16_int16_ayuv_generic;
+ break;
+ }
+
+ scale->tmpdata =
+ g_malloc (sizeof (gint16) * scale->dest->width * scale->src->height * 4);
+
+ vs_scale_lanczos_AYUV_int16 (scale);
+
+ scale1d_cleanup (&scale->x_scale1d);
+ scale1d_cleanup (&scale->y_scale1d);
+ g_free (scale->tmpdata);
+}
+
+
+static void
+vs_scale_lanczos_AYUV_int32 (Scale * scale)
+{
+ int j;
+ int yi;
+ int tmp_yi;
+
+ tmp_yi = 0;
+
+ for (j = 0; j < scale->dest->height; j++) {
+ guint8 *destline;
+ gint32 *taps;
+
+ destline = scale->dest->pixels + scale->dest->stride * j;
+
+ yi = scale->y_scale1d.offsets[j];
+
+ while (tmp_yi < yi + scale->y_scale1d.n_taps) {
+ scale->horiz_resample_func (TMP_LINE_S32_AYUV (tmp_yi),
+ scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
+ scale->x_scale1d.n_taps, S32_MIDSHIFT, scale->dest->width);
+ tmp_yi++;
+ }
+
+ taps = (gint32 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
+ if (scale->dither) {
+ resample_vert_dither_int32_generic (destline,
+ taps, TMP_LINE_S32_AYUV (scale->y_scale1d.offsets[j]),
+ sizeof (gint32) * 4 * scale->dest->width, scale->y_scale1d.n_taps,
+ S32_POSTSHIFT, scale->dest->width * 4);
+ } else {
+ resample_vert_int32_generic (destline,
+ taps, TMP_LINE_S32_AYUV (scale->y_scale1d.offsets[j]),
+ sizeof (gint32) * 4 * scale->dest->width, scale->y_scale1d.n_taps,
+ S32_POSTSHIFT, scale->dest->width * 4);
+ }
+ }
+}
+
+void
+vs_image_scale_lanczos_AYUV_int32 (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
+ double sharpen)
+{
+ Scale s = { 0 };
+ Scale *scale = &s;
+ int n_taps;
+
+ scale->dest = dest;
+ scale->src = src;
+
+ n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
+ n_taps = ROUND_UP_4 (n_taps);
+ scale1d_calculate_taps_int32 (&scale->x_scale1d,
+ src->width, dest->width, n_taps, a, sharpness, sharpen, S32_SHIFT1);
+
+ n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
+ scale1d_calculate_taps_int32 (&scale->y_scale1d,
+ src->height, dest->height, n_taps, a, sharpness, sharpen, S32_SHIFT2);
+
+ scale->dither = dither;
+
+ switch (scale->x_scale1d.n_taps) {
+ case 4:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps4_shift0;
+ break;
+ case 8:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps8_shift0;
+ break;
+ case 12:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps12_shift0;
+ break;
+ case 16:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps16_shift0;
+ break;
+ default:
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_int32_int32_ayuv_generic;
+ break;
+ }
+
+ scale->tmpdata =
+ g_malloc (sizeof (int32_t) * scale->dest->width * scale->src->height * 4);
+
+ vs_scale_lanczos_AYUV_int32 (scale);
+
+ scale1d_cleanup (&scale->x_scale1d);
+ scale1d_cleanup (&scale->y_scale1d);
+ g_free (scale->tmpdata);
+}
+
+static void
+vs_scale_lanczos_AYUV_double (Scale * scale)
+{
+ int j;
+ int yi;
+ int tmp_yi;
+
+ tmp_yi = 0;
+
+ for (j = 0; j < scale->dest->height; j++) {
+ guint8 *destline;
+ double *taps;
+
+ destline = scale->dest->pixels + scale->dest->stride * j;
+
+ yi = scale->y_scale1d.offsets[j];
+
+ while (tmp_yi < yi + scale->y_scale1d.n_taps) {
+ scale->horiz_resample_func (TMP_LINE_DOUBLE_AYUV (tmp_yi),
+ scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
+ scale->x_scale1d.n_taps, 0, scale->dest->width);
+ tmp_yi++;
+ }
+
+ taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
+ if (scale->dither) {
+ resample_vert_dither_double_generic (destline,
+ taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]),
+ sizeof (double) * 4 * scale->dest->width,
+ scale->y_scale1d.n_taps, 0, scale->dest->width * 4);
+ } else {
+ resample_vert_double_generic (destline,
+ taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]),
+ sizeof (double) * 4 * scale->dest->width,
+ scale->y_scale1d.n_taps, 0, scale->dest->width * 4);
+ }
+ }
+}
+
+void
+vs_image_scale_lanczos_AYUV_double (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
+ double sharpen)
+{
+ Scale s = { 0 };
+ Scale *scale = &s;
+ int n_taps;
+
+ scale->dest = dest;
+ scale->src = src;
+
+ n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
+ scale1d_calculate_taps (&scale->x_scale1d,
+ src->width, dest->width, n_taps, a, sharpness, sharpen);
+
+ n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
+ scale1d_calculate_taps (&scale->y_scale1d,
+ src->height, dest->height, n_taps, a, sharpness, sharpen);
+
+ scale->dither = dither;
+
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_double_ayuv_generic;
+
+ scale->tmpdata =
+ g_malloc (sizeof (double) * scale->dest->width * scale->src->height * 4);
+
+ vs_scale_lanczos_AYUV_double (scale);
+
+ scale1d_cleanup (&scale->x_scale1d);
+ scale1d_cleanup (&scale->y_scale1d);
+ g_free (scale->tmpdata);
+}
+
+static void
+vs_scale_lanczos_AYUV_float (Scale * scale)
+{
+ int j;
+ int yi;
+ int tmp_yi;
+
+ tmp_yi = 0;
+
+ for (j = 0; j < scale->dest->height; j++) {
+ guint8 *destline;
+ float *taps;
+
+ destline = scale->dest->pixels + scale->dest->stride * j;
+
+ yi = scale->y_scale1d.offsets[j];
+
+ while (tmp_yi < yi + scale->y_scale1d.n_taps) {
+ scale->horiz_resample_func (TMP_LINE_FLOAT_AYUV (tmp_yi),
+ scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
+ scale->x_scale1d.n_taps, 0, scale->dest->width);
+ tmp_yi++;
+ }
+
+ taps = (float *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
+ if (scale->dither) {
+ resample_vert_dither_float_generic (destline,
+ taps, TMP_LINE_FLOAT_AYUV (scale->y_scale1d.offsets[j]),
+ sizeof (float) * 4 * scale->dest->width, scale->y_scale1d.n_taps, 0,
+ scale->dest->width * 4);
+ } else {
+ resample_vert_float_generic (destline,
+ taps, TMP_LINE_FLOAT_AYUV (scale->y_scale1d.offsets[j]),
+ sizeof (float) * 4 * scale->dest->width, scale->y_scale1d.n_taps, 0,
+ scale->dest->width * 4);
+ }
+ }
+}
+
+void
+vs_image_scale_lanczos_AYUV_float (const VSImage * dest, const VSImage * src,
+ uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
+ double sharpen)
+{
+ Scale s = { 0 };
+ Scale *scale = &s;
+ int n_taps;
+
+ scale->dest = dest;
+ scale->src = src;
+
+ n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
+ scale1d_calculate_taps_float (&scale->x_scale1d,
+ src->width, dest->width, n_taps, a, sharpness, sharpen);
+
+ n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
+ scale1d_calculate_taps_float (&scale->y_scale1d,
+ src->height, dest->height, n_taps, a, sharpness, sharpen);
+
+ scale->dither = dither;
+
+ scale->horiz_resample_func =
+ (HorizResampleFunc) resample_horiz_float_ayuv_generic;
+
+ scale->tmpdata =
+ g_malloc (sizeof (float) * scale->dest->width * scale->src->height * 4);
+
+ vs_scale_lanczos_AYUV_float (scale);
+
+ scale1d_cleanup (&scale->x_scale1d);
+ scale1d_cleanup (&scale->y_scale1d);
+ g_free (scale->tmpdata);
+}