2 * Image Scaling Functions
3 * Copyright (c) 2011 David A. Schleef <ds@schleef.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
29 * Modified Lanczos scaling algorithm
30 * ==================================
32 * This algorithm was developed by the author. The primary goals of
33 * the algorithm are high-quality video downscaling for medium scale
34 * factors (in the range of 1.3x to 5.0x) using methods that can be
35 * converted to SIMD code. Concerns with existing algorithms were
36 * mainly related to either over-soft filtering (Lanczos) or aliasing
37 * (bilinear or any other method with inadequate sampling).
39 * The problems with bilinear scaling are apparent when downscaling
40 * more than a factor of 2. For example, when downscaling by a factor
41 * of 3, only two-thirds of the input pixels contribute to the output
42 * pixels. This is only considering scaling in one direction; after
43 * scaling both vertically and horizontally in a 2-D image, fewer than
44 * half of the input pixels contribute to the output, so it should not
45 * be surprising that the output is suboptimal.
47 * The problems with Lanczos scaling are more subtle. From a theoretical
48 * perspective, Lanczos is an optimal algorithm for resampling equally-
49 * spaced values. This theoretical perspective is based on analysis
50 * done in frequency space, thus, Lanczos works very well for audio
51 * resampling, since the ear hears primarily in frequency space. The
52 * human visual system is sensitive primarily in the spatial domain,
53 * therefore any resampling algorithm should take this into account.
54 * This difference is immediately clear in the size of resampling
55 * window or envelope that is chosen for resampling: for audio, an
56 * envelope of a=64 is typical, in image scaling, the envelope is
59 * One result of the HVS being sensitive in the spatial domain (and
60 * also probably due to oversampling capabilities of the retina and
61 * visual cortex) is that it is less sensitive to the exact magnitude
62 * of high-frequency visual signals than to the appropriate amount of
63 * energy in the nearby frequency band. A Lanczos kernel with a=2
64 * or a=3 strongly decreases the amount of energy in the high frequency
65 * bands. The energy in this area can be increased by increasing a,
66 * which brings in energy from different areas of the image (bad for
67 * reasons mentioned above), or by oversampling the input data. We
68 * have chosen two methods for doing the latter. Firstly, there is
69 * a sharpness parameter, which increases the cutoff frequency of the
70 * filter, aliasing higher frequency noise into the passband. And
71 * secondly, there is the sharpen parameter, which increases the
72 * contribution of high-frequency (but in-band) components.
74 * An alternate explanation of the usefulness of a sharpening filter
75 * is that many natural images have a roughly 1/f spectrum. In order
76 * for a downsampled image to look more "natural" when high frequencies
77 * are removed, the frequencies in the pass band near the cutoff
78 * frequency are amplified, causing the spectrum to be more roughly
79 * 1/f. I said "roughly", not "literally".
81 * This alternate explanation is useful for understanding the author's
82 * secondary motivation for developing this algorithm, namely, as a
83 * method of video compression. Several recent techniques (such as
84 * HTTP Live Streaming and SVC) use image scaling as a method to get
85 * increased compression out of nominally non-scalable codecs such as
86 * H.264. For optimal quality, it is thusly important to consider
87 * the scaler and encoder as a combined unit. Tuning of the sharpness
88 * and sharpen parameters was performed using the Toro encoder tuner,
89 * where scaled and encoded video was compared to unscaled and encoded
90 * video. This tuning suggested values that were very close to the
91 * values chosen by manual inspection of scaled images and video.
93 * The optimal values of sharpen and sharpness were slightly different
94 * depending whether the comparison was still images or video. Video
95 * comparisons were more sensitive to aliasing, since the aliasing
96 * artifacts tended to move or "crawl" around the video. The default
97 * values are for video; image scaling may prefer higher values.
99 * A number of related techniques were rejected for various reasons.
100 * An early technique of selecting the sharpness factor locally based
101 * on edge detection (in order to use a higher sharpness values without
102 * the corresponding aliasing on edges) worked very well for still
103 * images, but caused too much "crawling" on textures in video. Also,
104 * this method is slow, as it does not parallelize well.
106 * Non-separable techniques were rejected because the fastest would
107 * have been at least 4x slower.
109 * It is infrequently appreciated that image scaling should ideally be
110 * done in linear light space. Converting to linear light space has
111 * a similar effect to a sharpening filter. This approach was not
112 * taken because the added benefit is minor compared to the additional
113 * computational cost. Morever, the benefit is decreased by increasing
114 * the strength of the sharpening filter.
119 #include "vs_scanline.h"
120 #include "vs_image.h"
122 #include "gstvideoscaleorc.h"
126 #define NEED_CLAMP(x,a,b) ((x) < (a) || (x) > (b))
128 #define ROUND_UP_2(x) (((x)+1)&~1)
129 #define ROUND_UP_4(x) (((x)+3)&~3)
130 #define ROUND_UP_8(x) (((x)+7)&~7)
132 #define SRC_LINE(i) (scale->src->pixels + scale->src->stride * (i))
134 #define TMP_LINE_S16(i) ((gint16 *)scale->tmpdata + (i)*(scale->dest->width))
135 #define TMP_LINE_S32(i) ((gint32 *)scale->tmpdata + (i)*(scale->dest->width))
136 #define TMP_LINE_FLOAT(i) ((float *)scale->tmpdata + (i)*(scale->dest->width))
137 #define TMP_LINE_DOUBLE(i) ((double *)scale->tmpdata + (i)*(scale->dest->width))
138 #define TMP_LINE_S16_AYUV(i) ((gint16 *)scale->tmpdata + (i)*4*(scale->dest->width))
139 #define TMP_LINE_S32_AYUV(i) ((gint32 *)scale->tmpdata + (i)*4*(scale->dest->width))
140 #define TMP_LINE_FLOAT_AYUV(i) ((float *)scale->tmpdata + (i)*4*(scale->dest->width))
141 #define TMP_LINE_DOUBLE_AYUV(i) ((double *)scale->tmpdata + (i)*4*(scale->dest->width))
143 #define PTR_OFFSET(a,b) ((void *)((char *)(a) + (b)))
145 typedef void (*HorizResampleFunc) (void *dest, const gint32 * offsets,
146 const void *taps, const void *src, int n_taps, int shift, int n);
148 typedef struct _Scale1D Scale1D;
164 typedef struct _Scale Scale;
175 HorizResampleFunc horiz_resample_func;
182 vs_image_scale_lanczos_Y_int16 (const VSImage * dest, const VSImage * src,
183 uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
185 static void vs_image_scale_lanczos_Y_int32 (const VSImage * dest,
186 const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
187 double a, double sharpen);
188 static void vs_image_scale_lanczos_Y_float (const VSImage * dest,
189 const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
190 double a, double sharpen);
191 static void vs_image_scale_lanczos_Y_double (const VSImage * dest,
192 const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
193 double a, double sharpen);
195 vs_image_scale_lanczos_AYUV_int16 (const VSImage * dest, const VSImage * src,
196 uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
198 static void vs_image_scale_lanczos_AYUV_int32 (const VSImage * dest,
199 const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
200 double a, double sharpen);
201 static void vs_image_scale_lanczos_AYUV_float (const VSImage * dest,
202 const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
203 double a, double sharpen);
204 static void vs_image_scale_lanczos_AYUV_double (const VSImage * dest,
205 const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
206 double a, double sharpen);
207 static void vs_image_scale_lanczos_AYUV64_double (const VSImage * dest,
208 const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither,
209 double a, double sharpen);
216 return sin (G_PI * x) / (G_PI * x);
222 if (x <= -1 || x >= 1)
228 scale1d_get_n_taps (int src_size, int dest_size, double a, double sharpness)
234 scale = src_size / (double) dest_size;
236 fx = (1.0 / scale) * sharpness;
238 fx = (1.0) * sharpness;
246 scale1d_cleanup (Scale1D * scale)
248 g_free (scale->taps);
249 g_free (scale->offsets);
253 * Calculates a set of taps for each destination element in double
254 * format. Each set of taps sums to 1.0.
258 scale1d_calculate_taps (Scale1D * scale, int src_size, int dest_size,
259 int n_taps, double a, double sharpness, double sharpen)
265 double scale_increment;
270 scale->scale = src_size / (double) dest_size;
271 scale->offset = scale->scale / 2 - 0.5;
273 if (scale->scale > 1.0) {
274 scale->fx = (1.0 / scale->scale) * sharpness;
276 scale->fx = (1.0) * sharpness;
278 scale->ex = scale->fx / a;
279 scale->dx = ceil (a / scale->fx);
281 g_assert (n_taps >= 2 * scale->dx);
282 scale->n_taps = n_taps;
284 scale->taps = g_malloc (sizeof (double) * scale->n_taps * dest_size);
285 scale->offsets = g_malloc (sizeof (gint32) * dest_size);
286 tap_array = scale->taps;
287 offsets = scale->offsets;
289 scale_offset = scale->offset;
290 scale_increment = scale->scale;
295 for (j = 0; j < dest_size; j++) {
302 x = scale_offset + scale_increment * j;
303 x = CLAMP (x, 0, src_size);
308 taps = tap_array + j * n_taps;
310 for (l = 0; l < n_taps; l++) {
312 taps[l] = sinc ((x - xl) * fx) * envelope ((x - xl) * ex);
313 taps[l] -= sharpen * envelope ((x - xl) * ex);
316 g_assert (envelope ((x - (xi - 1)) * ex) == 0);
317 g_assert (envelope ((x - (xi + n_taps)) * ex) == 0);
318 for (l = 0; l < n_taps; l++) {
325 for (l = 0; l < shift; l++) {
326 taps[shift] += taps[l];
328 for (l = 0; l < n_taps - shift; l++) {
329 taps[l] = taps[shift + l];
331 for (; l < n_taps; l++) {
337 if (xi > src_size - n_taps) {
338 int shift = xi - (src_size - n_taps);
340 for (l = 0; l < shift; l++) {
341 taps[n_taps - shift - 1] += taps[n_taps - shift + l];
343 for (l = 0; l < n_taps - shift; l++) {
344 taps[n_taps - 1 - l] = taps[n_taps - 1 - shift - l];
346 for (l = 0; l < shift; l++) {
355 * Calculates a set of taps for each destination element in float
356 * format. Each set of taps sums to 1.0.
359 scale1d_calculate_taps_float (Scale1D * scale, int src_size, int dest_size,
360 int n_taps, double a, double sharpness, double sharpen)
366 scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness,
369 taps_d = scale->taps;
370 taps_f = g_malloc (sizeof (float) * scale->n_taps * dest_size);
372 for (j = 0; j < dest_size * n_taps; j++) {
373 taps_f[j] = taps_d[j];
377 scale->taps = taps_f;
381 * Calculates a set of taps for each destination element in gint32
382 * format. Each set of taps sums to (very nearly) (1<<shift). A
383 * typical value for shift is 10 to 15, so that applying the taps to
384 * uint8 values and summing will fit in a (signed) int32.
387 scale1d_calculate_taps_int32 (Scale1D * scale, int src_size, int dest_size,
388 int n_taps, double a, double sharpness, double sharpen, int shift)
396 scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness,
399 taps_d = scale->taps;
400 taps_i = g_malloc (sizeof (gint32) * scale->n_taps * dest_size);
402 multiplier = (1 << shift);
404 for (j = 0; j < dest_size; j++) {
405 for (i = 0; i < n_taps; i++) {
406 taps_i[j * n_taps + i] =
407 floor (0.5 + taps_d[j * n_taps + i] * multiplier);
412 scale->taps = taps_i;
416 * Calculates a set of taps for each destination element in gint16
417 * format. Each set of taps sums to (1<<shift). A typical value
418 * for shift is 7, so that applying the taps to uint8 values and
419 * summing will fit in a (signed) int16.
422 scale1d_calculate_taps_int16 (Scale1D * scale, int src_size, int dest_size,
423 int n_taps, double a, double sharpness, double sharpen, int shift)
431 scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness,
434 taps_d = scale->taps;
435 taps_i = g_malloc (sizeof (gint16) * scale->n_taps * dest_size);
437 multiplier = (1 << shift);
439 /* Various methods for converting floating point taps to integer.
440 * The dB values are the SSIM value between scaling an image via
441 * the floating point pathway vs. the integer pathway using the
442 * given code to generate the taps. Only one image was tested,
443 * scaling from 1920x1080 to 640x360. Several variations of the
444 * methods were also tested, with nothing appearing useful. */
446 /* Standard round to integer. This causes bad DC errors. */
448 for (j = 0; j < dest_size; j++) {
449 for (i = 0; i < n_taps; i++) {
450 taps_i[j * n_taps + i] =
451 floor (0.5 + taps_d[j * n_taps + i] * multiplier);
456 /* Dithering via error propogation. Works pretty well, but
457 * really we want to propogate errors across rows, which would
458 * mean having several sets of tap arrays. Possible, but more work,
459 * and it may not even be better. */
463 for (j = 0; j < dest_size; j++) {
464 for (i = 0; i < n_taps; i++) {
465 err += taps_d[j * n_taps + i] * multiplier;
466 taps_i[j * n_taps + i] = floor (err);
473 /* Round to integer, but with an adjustable bias that we use to
474 * eliminate the DC error. This search method is a bit crude, and
475 * could perhaps be improved somewhat. */
477 for (j = 0; j < dest_size; j++) {
479 for (k = 0; k < 100; k++) {
484 for (i = 0; i < n_taps; i++) {
485 taps_i[j * n_taps + i] =
486 floor (offset + taps_d[j * n_taps + i] * multiplier);
487 sum += taps_i[j * n_taps + i];
490 if (sum >= (1 << shift))
496 /* Round to integer, but adjust the multiplier. The search method is
497 * wrong a lot, but was sufficient enough to calculate dB error. */
499 for (j = 0; j < dest_size; j++) {
502 for (k = 0; k < 200; k++) {
505 multiplier = (1 << shift) - 1.0 + k * 0.01;
506 for (i = 0; i < n_taps; i++) {
507 taps_i[j * n_taps + i] =
508 floor (0.5 + taps_d[j * n_taps + i] * multiplier);
509 sum += taps_i[j * n_taps + i];
512 if (sum >= (1 << shift))
515 if (sum != (1 << shift)) {
516 GST_ERROR ("%g %d", multiplier, sum);
521 /* Round to integer, but subtract the error from the largest tap */
523 for (j = 0; j < dest_size; j++) {
524 int err = -multiplier;
525 for (i = 0; i < n_taps; i++) {
526 taps_i[j * n_taps + i] =
527 floor (0.5 + taps_d[j * n_taps + i] * multiplier);
528 err += taps_i[j * n_taps + i];
530 if (taps_i[j * n_taps + (n_taps / 2 - 1)] >
531 taps_i[j * n_taps + (n_taps / 2)]) {
532 taps_i[j * n_taps + (n_taps / 2 - 1)] -= err;
534 taps_i[j * n_taps + (n_taps / 2)] -= err;
540 scale->taps = taps_i;
545 vs_image_scale_lanczos_Y (const VSImage * dest, const VSImage * src,
546 uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod,
547 double a, double sharpen)
552 vs_image_scale_lanczos_Y_int16 (dest, src, tmpbuf, sharpness, dither, a,
556 vs_image_scale_lanczos_Y_int32 (dest, src, tmpbuf, sharpness, dither, a,
560 vs_image_scale_lanczos_Y_float (dest, src, tmpbuf, sharpness, dither, a,
564 vs_image_scale_lanczos_Y_double (dest, src, tmpbuf, sharpness, dither, a,
571 vs_image_scale_lanczos_AYUV (const VSImage * dest, const VSImage * src,
572 uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod,
573 double a, double sharpen)
578 vs_image_scale_lanczos_AYUV_int16 (dest, src, tmpbuf, sharpness, dither,
582 vs_image_scale_lanczos_AYUV_int32 (dest, src, tmpbuf, sharpness, dither,
586 vs_image_scale_lanczos_AYUV_float (dest, src, tmpbuf, sharpness, dither,
590 vs_image_scale_lanczos_AYUV_double (dest, src, tmpbuf, sharpness, dither,
597 vs_image_scale_lanczos_AYUV64 (const VSImage * dest, const VSImage * src,
598 uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod,
599 double a, double sharpen)
601 vs_image_scale_lanczos_AYUV64_double (dest, src, tmpbuf, sharpness, dither,
607 #define RESAMPLE_HORIZ_FLOAT(function, dest_type, tap_type, src_type, _n_taps) \
609 function (dest_type *dest, const gint32 *offsets, \
610 const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
615 const src_type *srcline; \
616 const tap_type *tapsline; \
617 for (i = 0; i < n; i++) { \
618 srcline = src + offsets[i]; \
619 tapsline = taps + i * _n_taps; \
621 for (k = 0; k < _n_taps; k++) { \
622 sum += srcline[k] * tapsline[k]; \
628 #define RESAMPLE_HORIZ(function, dest_type, tap_type, src_type, _n_taps, _shift) \
630 function (dest_type *dest, const gint32 *offsets, \
631 const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
636 const src_type *srcline; \
637 const tap_type *tapsline; \
639 if (_shift > 0) offset = (1<<_shift)>>1; \
641 for (i = 0; i < n; i++) { \
642 srcline = src + offsets[i]; \
643 tapsline = taps + i * _n_taps; \
645 for (k = 0; k < _n_taps; k++) { \
646 sum += srcline[k] * tapsline[k]; \
648 dest[i] = (sum + offset) >> _shift; \
652 #define RESAMPLE_HORIZ_AYUV_FLOAT(function, dest_type, tap_type, src_type, _n_taps) \
654 function (dest_type *dest, const gint32 *offsets, \
655 const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
663 const src_type *srcline; \
664 const tap_type *tapsline; \
665 for (i = 0; i < n; i++) { \
666 srcline = src + 4*offsets[i]; \
667 tapsline = taps + i * _n_taps; \
672 for (k = 0; k < _n_taps; k++) { \
673 sum1 += srcline[k*4+0] * tapsline[k]; \
674 sum2 += srcline[k*4+1] * tapsline[k]; \
675 sum3 += srcline[k*4+2] * tapsline[k]; \
676 sum4 += srcline[k*4+3] * tapsline[k]; \
678 dest[i*4+0] = sum1; \
679 dest[i*4+1] = sum2; \
680 dest[i*4+2] = sum3; \
681 dest[i*4+3] = sum4; \
685 #define RESAMPLE_HORIZ_AYUV(function, dest_type, tap_type, src_type, _n_taps, _shift) \
687 function (dest_type *dest, const gint32 *offsets, \
688 const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \
696 const src_type *srcline; \
697 const tap_type *tapsline; \
699 if (_shift > 0) offset = (1<<_shift)>>1; \
701 for (i = 0; i < n; i++) { \
702 srcline = src + 4*offsets[i]; \
703 tapsline = taps + i * _n_taps; \
708 for (k = 0; k < _n_taps; k++) { \
709 sum1 += srcline[k*4+0] * tapsline[k]; \
710 sum2 += srcline[k*4+1] * tapsline[k]; \
711 sum3 += srcline[k*4+2] * tapsline[k]; \
712 sum4 += srcline[k*4+3] * tapsline[k]; \
714 dest[i*4+0] = (sum1 + offset) >> _shift; \
715 dest[i*4+1] = (sum2 + offset) >> _shift; \
716 dest[i*4+2] = (sum3 + offset) >> _shift; \
717 dest[i*4+3] = (sum4 + offset) >> _shift; \
722 RESAMPLE_HORIZ_FLOAT (resample_horiz_double_u8_generic, double, double,
724 RESAMPLE_HORIZ_FLOAT (resample_horiz_float_u8_generic, float, float,
726 RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_double_ayuv_generic, double, double,
728 RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_float_ayuv_generic, float, float,
731 RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_double_ayuv_generic_s16, double, double,
734 RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_generic, gint32, gint32,
735 guint8, n_taps, shift)
736 RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_generic, gint16, gint16,
737 guint8, n_taps, shift)
738 RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_generic, gint32, gint32,
739 guint8, n_taps, shift)
740 RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_generic, gint16, gint16,
741 guint8, n_taps, shift)
743 /* Candidates for orcification */
744 RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps16_shift0, gint32, gint32,
746 RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps12_shift0, gint32, gint32,
748 RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps8_shift0, gint32, gint32,
750 RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps4_shift0, gint32, gint32,
752 RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps16_shift0, gint16, gint16,
754 RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps12_shift0, gint16, gint16,
756 RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps8_shift0, gint16, gint16,
758 RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps4_shift0, gint16, gint16,
761 RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps16_shift0, gint32, gint32,
763 RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps12_shift0, gint32, gint32,
765 RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps8_shift0, gint32, gint32,
767 RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps4_shift0, gint32, gint32,
769 RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps16_shift0, gint16, gint16,
771 RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps12_shift0, gint16, gint16,
773 RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps8_shift0, gint16, gint16,
775 RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps4_shift0, gint16, gint16,
779 #define RESAMPLE_VERT(function, tap_type, src_type, _n_taps, _shift) \
781 function (guint8 *dest, \
782 const tap_type *taps, const src_type *src, int stride, int n_taps, \
788 gint32 offset = (1<<_shift) >> 1; \
789 for (i = 0; i < n; i++) { \
791 for (l = 0; l < n_taps; l++) { \
792 const src_type *line = PTR_OFFSET(src, stride * l); \
793 sum_y += line[i] * taps[l]; \
795 dest[i] = CLAMP ((sum_y + offset) >> _shift, 0, 255); \
799 #define RESAMPLE_VERT_DITHER(function, tap_type, src_type, _n_taps, _shift) \
801 function (guint8 *dest, \
802 const tap_type *taps, const src_type *src, int stride, int n_taps, \
809 gint32 mask = (1<<_shift) - 1; \
810 for (i = 0; i < n; i++) { \
812 for (l = 0; l < n_taps; l++) { \
813 const src_type *line = PTR_OFFSET(src, stride * l); \
814 sum_y += line[i] * taps[l]; \
817 dest[i] = CLAMP (err_y >> _shift, 0, 255); \
823 RESAMPLE_VERT (resample_vert_int32_generic, gint32, gint32, n_taps, shift)
824 RESAMPLE_VERT_DITHER (resample_vert_dither_int32_generic, gint32, gint32,
826 RESAMPLE_VERT (resample_vert_int16_generic, gint16, gint16, n_taps, shift);
827 RESAMPLE_VERT_DITHER (resample_vert_dither_int16_generic, gint16, gint16,
831 #define RESAMPLE_VERT_FLOAT(function, dest_type, clamp, tap_type, src_type, _n_taps, _shift) \
833 function (dest_type *dest, \
834 const tap_type *taps, const src_type *src, int stride, int n_taps, \
840 for (i = 0; i < n; i++) { \
842 for (l = 0; l < n_taps; l++) { \
843 const src_type *line = PTR_OFFSET(src, stride * l); \
844 sum_y += line[i] * taps[l]; \
846 dest[i] = CLAMP (floor(0.5 + sum_y), 0, clamp); \
850 #define RESAMPLE_VERT_FLOAT_DITHER(function, dest_type, clamp, tap_type, src_type, _n_taps, _shift) \
852 function (dest_type *dest, \
853 const tap_type *taps, const src_type *src, int stride, int n_taps, \
859 src_type err_y = 0; \
860 for (i = 0; i < n; i++) { \
862 for (l = 0; l < n_taps; l++) { \
863 const src_type *line = PTR_OFFSET(src, stride * l); \
864 sum_y += line[i] * taps[l]; \
867 dest[i] = CLAMP (floor (err_y), 0, clamp); \
868 err_y -= floor (err_y); \
873 RESAMPLE_VERT_FLOAT (resample_vert_double_generic, guint8, 255, double, double, n_taps,
875 RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_double_generic, guint8, 255, double, double,
878 RESAMPLE_VERT_FLOAT (resample_vert_double_generic_u16, guint16, 65535, double, double, n_taps,
880 RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_double_generic_u16, guint16, 65535, double, double,
883 RESAMPLE_VERT_FLOAT (resample_vert_float_generic, guint8, 255, float, float, n_taps, shift)
884 RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_float_generic, guint8, 255, float, float,
890 #define S16_MIDSHIFT 0
891 #define S16_POSTSHIFT (S16_SHIFT1+S16_SHIFT2-S16_MIDSHIFT)
894 vs_scale_lanczos_Y_int16 (Scale * scale)
902 for (j = 0; j < scale->dest->height; j++) {
906 destline = scale->dest->pixels + scale->dest->stride * j;
908 yi = scale->y_scale1d.offsets[j];
910 while (tmp_yi < yi + scale->y_scale1d.n_taps) {
911 scale->horiz_resample_func (TMP_LINE_S16 (tmp_yi),
912 scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
913 scale->x_scale1d.n_taps, S16_MIDSHIFT, scale->dest->width);
917 taps = (gint16 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
919 resample_vert_dither_int16_generic (destline,
920 taps, TMP_LINE_S16 (scale->y_scale1d.offsets[j]),
921 sizeof (gint16) * scale->dest->width, scale->y_scale1d.n_taps,
922 S16_POSTSHIFT, scale->dest->width);
924 resample_vert_int16_generic (destline,
925 taps, TMP_LINE_S16 (scale->y_scale1d.offsets[j]),
926 sizeof (gint16) * scale->dest->width, scale->y_scale1d.n_taps,
927 S16_POSTSHIFT, scale->dest->width);
933 vs_image_scale_lanczos_Y_int16 (const VSImage * dest, const VSImage * src,
934 uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
944 n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
945 n_taps = ROUND_UP_4 (n_taps);
946 scale1d_calculate_taps_int16 (&scale->x_scale1d,
947 src->width, dest->width, n_taps, a, sharpness, sharpen, S16_SHIFT1);
949 n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
950 scale1d_calculate_taps_int16 (&scale->y_scale1d,
951 src->height, dest->height, n_taps, a, sharpness, sharpen, S16_SHIFT2);
953 scale->dither = dither;
955 switch (scale->x_scale1d.n_taps) {
957 scale->horiz_resample_func =
958 (HorizResampleFunc) resample_horiz_int16_int16_u8_taps4_shift0;
961 scale->horiz_resample_func =
962 (HorizResampleFunc) resample_horiz_int16_int16_u8_taps8_shift0;
965 scale->horiz_resample_func =
966 (HorizResampleFunc) resample_horiz_int16_int16_u8_taps12_shift0;
969 scale->horiz_resample_func =
970 (HorizResampleFunc) resample_horiz_int16_int16_u8_taps16_shift0;
973 scale->horiz_resample_func =
974 (HorizResampleFunc) resample_horiz_int16_int16_u8_generic;
979 g_malloc (sizeof (gint16) * scale->dest->width * scale->src->height);
981 vs_scale_lanczos_Y_int16 (scale);
983 scale1d_cleanup (&scale->x_scale1d);
984 scale1d_cleanup (&scale->y_scale1d);
985 g_free (scale->tmpdata);
989 #define S32_SHIFT1 11
990 #define S32_SHIFT2 11
991 #define S32_MIDSHIFT 0
992 #define S32_POSTSHIFT (S32_SHIFT1+S32_SHIFT2-S32_MIDSHIFT)
995 vs_scale_lanczos_Y_int32 (Scale * scale)
1003 for (j = 0; j < scale->dest->height; j++) {
1007 destline = scale->dest->pixels + scale->dest->stride * j;
1009 yi = scale->y_scale1d.offsets[j];
1011 while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1012 scale->horiz_resample_func (TMP_LINE_S32 (tmp_yi),
1013 scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1014 scale->x_scale1d.n_taps, S32_MIDSHIFT, scale->dest->width);
1018 taps = (gint32 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1019 if (scale->dither) {
1020 resample_vert_dither_int32_generic (destline,
1021 taps, TMP_LINE_S32 (scale->y_scale1d.offsets[j]),
1022 sizeof (gint32) * scale->dest->width,
1023 scale->y_scale1d.n_taps, S32_POSTSHIFT, scale->dest->width);
1025 resample_vert_int32_generic (destline,
1026 taps, TMP_LINE_S32 (scale->y_scale1d.offsets[j]),
1027 sizeof (gint32) * scale->dest->width,
1028 scale->y_scale1d.n_taps, S32_POSTSHIFT, scale->dest->width);
1034 vs_image_scale_lanczos_Y_int32 (const VSImage * dest, const VSImage * src,
1035 uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1045 n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1046 n_taps = ROUND_UP_4 (n_taps);
1047 scale1d_calculate_taps_int32 (&scale->x_scale1d,
1048 src->width, dest->width, n_taps, a, sharpness, sharpen, S32_SHIFT1);
1050 n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1051 scale1d_calculate_taps_int32 (&scale->y_scale1d,
1052 src->height, dest->height, n_taps, a, sharpness, sharpen, S32_SHIFT2);
1054 scale->dither = dither;
1056 switch (scale->x_scale1d.n_taps) {
1058 scale->horiz_resample_func =
1059 (HorizResampleFunc) resample_horiz_int32_int32_u8_taps4_shift0;
1062 scale->horiz_resample_func =
1063 (HorizResampleFunc) resample_horiz_int32_int32_u8_taps8_shift0;
1066 scale->horiz_resample_func =
1067 (HorizResampleFunc) resample_horiz_int32_int32_u8_taps12_shift0;
1070 scale->horiz_resample_func =
1071 (HorizResampleFunc) resample_horiz_int32_int32_u8_taps16_shift0;
1074 scale->horiz_resample_func =
1075 (HorizResampleFunc) resample_horiz_int32_int32_u8_generic;
1080 g_malloc (sizeof (int32_t) * scale->dest->width * scale->src->height);
1082 vs_scale_lanczos_Y_int32 (scale);
1084 scale1d_cleanup (&scale->x_scale1d);
1085 scale1d_cleanup (&scale->y_scale1d);
1086 g_free (scale->tmpdata);
1090 vs_scale_lanczos_Y_double (Scale * scale)
1098 for (j = 0; j < scale->dest->height; j++) {
1102 destline = scale->dest->pixels + scale->dest->stride * j;
1104 yi = scale->y_scale1d.offsets[j];
1106 while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1107 scale->horiz_resample_func (TMP_LINE_DOUBLE (tmp_yi),
1108 scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1109 scale->x_scale1d.n_taps, 0, scale->dest->width);
1113 taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1114 if (scale->dither) {
1115 resample_vert_dither_double_generic (destline,
1116 taps, TMP_LINE_DOUBLE (scale->y_scale1d.offsets[j]),
1117 sizeof (double) * scale->dest->width,
1118 scale->y_scale1d.n_taps, 0, scale->dest->width);
1120 resample_vert_double_generic (destline,
1121 taps, TMP_LINE_DOUBLE (scale->y_scale1d.offsets[j]),
1122 sizeof (double) * scale->dest->width,
1123 scale->y_scale1d.n_taps, 0, scale->dest->width);
1129 vs_image_scale_lanczos_Y_double (const VSImage * dest, const VSImage * src,
1130 uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1140 n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1141 scale1d_calculate_taps (&scale->x_scale1d,
1142 src->width, dest->width, n_taps, a, sharpness, sharpen);
1144 n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1145 scale1d_calculate_taps (&scale->y_scale1d,
1146 src->height, dest->height, n_taps, a, sharpness, sharpen);
1148 scale->dither = dither;
1150 scale->horiz_resample_func =
1151 (HorizResampleFunc) resample_horiz_double_u8_generic;
1154 g_malloc (sizeof (double) * scale->dest->width * scale->src->height);
1156 vs_scale_lanczos_Y_double (scale);
1158 scale1d_cleanup (&scale->x_scale1d);
1159 scale1d_cleanup (&scale->y_scale1d);
1160 g_free (scale->tmpdata);
1164 vs_scale_lanczos_Y_float (Scale * scale)
1172 for (j = 0; j < scale->dest->height; j++) {
1176 destline = scale->dest->pixels + scale->dest->stride * j;
1178 yi = scale->y_scale1d.offsets[j];
1180 while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1181 scale->horiz_resample_func (TMP_LINE_FLOAT (tmp_yi),
1182 scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1183 scale->x_scale1d.n_taps, 0, scale->dest->width);
1187 taps = (float *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1188 if (scale->dither) {
1189 resample_vert_dither_float_generic (destline,
1190 taps, TMP_LINE_FLOAT (scale->y_scale1d.offsets[j]),
1191 sizeof (float) * scale->dest->width,
1192 scale->y_scale1d.n_taps, 0, scale->dest->width);
1194 resample_vert_float_generic (destline,
1195 taps, TMP_LINE_FLOAT (scale->y_scale1d.offsets[j]),
1196 sizeof (float) * scale->dest->width,
1197 scale->y_scale1d.n_taps, 0, scale->dest->width);
1203 vs_image_scale_lanczos_Y_float (const VSImage * dest, const VSImage * src,
1204 uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1214 n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1215 scale1d_calculate_taps_float (&scale->x_scale1d,
1216 src->width, dest->width, n_taps, a, sharpness, sharpen);
1218 n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1219 scale1d_calculate_taps_float (&scale->y_scale1d,
1220 src->height, dest->height, n_taps, a, sharpness, sharpen);
1222 scale->dither = dither;
1224 scale->horiz_resample_func =
1225 (HorizResampleFunc) resample_horiz_float_u8_generic;
1228 g_malloc (sizeof (float) * scale->dest->width * scale->src->height);
1230 vs_scale_lanczos_Y_float (scale);
1232 scale1d_cleanup (&scale->x_scale1d);
1233 scale1d_cleanup (&scale->y_scale1d);
1234 g_free (scale->tmpdata);
1242 vs_scale_lanczos_AYUV_int16 (Scale * scale)
1250 for (j = 0; j < scale->dest->height; j++) {
1254 destline = scale->dest->pixels + scale->dest->stride * j;
1256 yi = scale->y_scale1d.offsets[j];
1258 while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1259 scale->horiz_resample_func (TMP_LINE_S16_AYUV (tmp_yi),
1260 scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1261 scale->x_scale1d.n_taps, S16_MIDSHIFT, scale->dest->width);
1265 taps = (gint16 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1266 if (scale->dither) {
1267 resample_vert_dither_int16_generic (destline,
1268 taps, TMP_LINE_S16_AYUV (scale->y_scale1d.offsets[j]),
1269 sizeof (gint16) * 4 * scale->dest->width,
1270 scale->y_scale1d.n_taps, S16_POSTSHIFT, scale->dest->width * 4);
1272 resample_vert_int16_generic (destline,
1273 taps, TMP_LINE_S16_AYUV (scale->y_scale1d.offsets[j]),
1274 sizeof (gint16) * 4 * scale->dest->width,
1275 scale->y_scale1d.n_taps, S16_POSTSHIFT, scale->dest->width * 4);
1281 vs_image_scale_lanczos_AYUV_int16 (const VSImage * dest, const VSImage * src,
1282 uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1292 n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1293 n_taps = ROUND_UP_4 (n_taps);
1294 scale1d_calculate_taps_int16 (&scale->x_scale1d,
1295 src->width, dest->width, n_taps, a, sharpness, sharpen, S16_SHIFT1);
1297 n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1298 scale1d_calculate_taps_int16 (&scale->y_scale1d,
1299 src->height, dest->height, n_taps, a, sharpness, sharpen, S16_SHIFT2);
1301 scale->dither = dither;
1303 switch (scale->x_scale1d.n_taps) {
1305 scale->horiz_resample_func =
1306 (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps4_shift0;
1309 scale->horiz_resample_func =
1310 (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps8_shift0;
1313 scale->horiz_resample_func =
1314 (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps12_shift0;
1317 scale->horiz_resample_func =
1318 (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps16_shift0;
1321 scale->horiz_resample_func =
1322 (HorizResampleFunc) resample_horiz_int16_int16_ayuv_generic;
1327 g_malloc (sizeof (gint16) * scale->dest->width * scale->src->height * 4);
1329 vs_scale_lanczos_AYUV_int16 (scale);
1331 scale1d_cleanup (&scale->x_scale1d);
1332 scale1d_cleanup (&scale->y_scale1d);
1333 g_free (scale->tmpdata);
1338 vs_scale_lanczos_AYUV_int32 (Scale * scale)
1346 for (j = 0; j < scale->dest->height; j++) {
1350 destline = scale->dest->pixels + scale->dest->stride * j;
1352 yi = scale->y_scale1d.offsets[j];
1354 while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1355 scale->horiz_resample_func (TMP_LINE_S32_AYUV (tmp_yi),
1356 scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1357 scale->x_scale1d.n_taps, S32_MIDSHIFT, scale->dest->width);
1361 taps = (gint32 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1362 if (scale->dither) {
1363 resample_vert_dither_int32_generic (destline,
1364 taps, TMP_LINE_S32_AYUV (scale->y_scale1d.offsets[j]),
1365 sizeof (gint32) * 4 * scale->dest->width, scale->y_scale1d.n_taps,
1366 S32_POSTSHIFT, scale->dest->width * 4);
1368 resample_vert_int32_generic (destline,
1369 taps, TMP_LINE_S32_AYUV (scale->y_scale1d.offsets[j]),
1370 sizeof (gint32) * 4 * scale->dest->width, scale->y_scale1d.n_taps,
1371 S32_POSTSHIFT, scale->dest->width * 4);
1377 vs_image_scale_lanczos_AYUV_int32 (const VSImage * dest, const VSImage * src,
1378 uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1388 n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1389 n_taps = ROUND_UP_4 (n_taps);
1390 scale1d_calculate_taps_int32 (&scale->x_scale1d,
1391 src->width, dest->width, n_taps, a, sharpness, sharpen, S32_SHIFT1);
1393 n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1394 scale1d_calculate_taps_int32 (&scale->y_scale1d,
1395 src->height, dest->height, n_taps, a, sharpness, sharpen, S32_SHIFT2);
1397 scale->dither = dither;
1399 switch (scale->x_scale1d.n_taps) {
1401 scale->horiz_resample_func =
1402 (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps4_shift0;
1405 scale->horiz_resample_func =
1406 (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps8_shift0;
1409 scale->horiz_resample_func =
1410 (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps12_shift0;
1413 scale->horiz_resample_func =
1414 (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps16_shift0;
1417 scale->horiz_resample_func =
1418 (HorizResampleFunc) resample_horiz_int32_int32_ayuv_generic;
1423 g_malloc (sizeof (int32_t) * scale->dest->width * scale->src->height * 4);
1425 vs_scale_lanczos_AYUV_int32 (scale);
1427 scale1d_cleanup (&scale->x_scale1d);
1428 scale1d_cleanup (&scale->y_scale1d);
1429 g_free (scale->tmpdata);
1433 vs_scale_lanczos_AYUV_double (Scale * scale)
1441 for (j = 0; j < scale->dest->height; j++) {
1445 destline = scale->dest->pixels + scale->dest->stride * j;
1447 yi = scale->y_scale1d.offsets[j];
1449 while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1450 scale->horiz_resample_func (TMP_LINE_DOUBLE_AYUV (tmp_yi),
1451 scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1452 scale->x_scale1d.n_taps, 0, scale->dest->width);
1456 taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1457 if (scale->dither) {
1458 resample_vert_dither_double_generic (destline,
1459 taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]),
1460 sizeof (double) * 4 * scale->dest->width,
1461 scale->y_scale1d.n_taps, 0, scale->dest->width * 4);
1463 resample_vert_double_generic (destline,
1464 taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]),
1465 sizeof (double) * 4 * scale->dest->width,
1466 scale->y_scale1d.n_taps, 0, scale->dest->width * 4);
1472 vs_image_scale_lanczos_AYUV_double (const VSImage * dest, const VSImage * src,
1473 uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1483 n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1484 scale1d_calculate_taps (&scale->x_scale1d,
1485 src->width, dest->width, n_taps, a, sharpness, sharpen);
1487 n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1488 scale1d_calculate_taps (&scale->y_scale1d,
1489 src->height, dest->height, n_taps, a, sharpness, sharpen);
1491 scale->dither = dither;
1493 scale->horiz_resample_func =
1494 (HorizResampleFunc) resample_horiz_double_ayuv_generic;
1497 g_malloc (sizeof (double) * scale->dest->width * scale->src->height * 4);
1499 vs_scale_lanczos_AYUV_double (scale);
1501 scale1d_cleanup (&scale->x_scale1d);
1502 scale1d_cleanup (&scale->y_scale1d);
1503 g_free (scale->tmpdata);
1507 vs_scale_lanczos_AYUV_float (Scale * scale)
1515 for (j = 0; j < scale->dest->height; j++) {
1519 destline = scale->dest->pixels + scale->dest->stride * j;
1521 yi = scale->y_scale1d.offsets[j];
1523 while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1524 scale->horiz_resample_func (TMP_LINE_FLOAT_AYUV (tmp_yi),
1525 scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1526 scale->x_scale1d.n_taps, 0, scale->dest->width);
1530 taps = (float *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1531 if (scale->dither) {
1532 resample_vert_dither_float_generic (destline,
1533 taps, TMP_LINE_FLOAT_AYUV (scale->y_scale1d.offsets[j]),
1534 sizeof (float) * 4 * scale->dest->width, scale->y_scale1d.n_taps, 0,
1535 scale->dest->width * 4);
1537 resample_vert_float_generic (destline,
1538 taps, TMP_LINE_FLOAT_AYUV (scale->y_scale1d.offsets[j]),
1539 sizeof (float) * 4 * scale->dest->width, scale->y_scale1d.n_taps, 0,
1540 scale->dest->width * 4);
1546 vs_image_scale_lanczos_AYUV_float (const VSImage * dest, const VSImage * src,
1547 uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1557 n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1558 scale1d_calculate_taps_float (&scale->x_scale1d,
1559 src->width, dest->width, n_taps, a, sharpness, sharpen);
1561 n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1562 scale1d_calculate_taps_float (&scale->y_scale1d,
1563 src->height, dest->height, n_taps, a, sharpness, sharpen);
1565 scale->dither = dither;
1567 scale->horiz_resample_func =
1568 (HorizResampleFunc) resample_horiz_float_ayuv_generic;
1571 g_malloc (sizeof (float) * scale->dest->width * scale->src->height * 4);
1573 vs_scale_lanczos_AYUV_float (scale);
1575 scale1d_cleanup (&scale->x_scale1d);
1576 scale1d_cleanup (&scale->y_scale1d);
1577 g_free (scale->tmpdata);
1581 vs_scale_lanczos_AYUV64_double (Scale * scale)
1589 for (j = 0; j < scale->dest->height; j++) {
1593 destline = (guint16 *) (scale->dest->pixels + scale->dest->stride * j);
1595 yi = scale->y_scale1d.offsets[j];
1597 while (tmp_yi < yi + scale->y_scale1d.n_taps) {
1598 scale->horiz_resample_func (TMP_LINE_DOUBLE_AYUV (tmp_yi),
1599 scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi),
1600 scale->x_scale1d.n_taps, 0, scale->dest->width);
1604 taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps;
1605 if (scale->dither) {
1606 resample_vert_dither_double_generic_u16 (destline,
1607 taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]),
1608 sizeof (double) * 4 * scale->dest->width,
1609 scale->y_scale1d.n_taps, 0, scale->dest->width * 4);
1611 resample_vert_double_generic_u16 (destline,
1612 taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]),
1613 sizeof (double) * 4 * scale->dest->width,
1614 scale->y_scale1d.n_taps, 0, scale->dest->width * 4);
1620 vs_image_scale_lanczos_AYUV64_double (const VSImage * dest, const VSImage * src,
1621 uint8_t * tmpbuf, double sharpness, gboolean dither, double a,
1631 n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness);
1632 scale1d_calculate_taps (&scale->x_scale1d,
1633 src->width, dest->width, n_taps, a, sharpness, sharpen);
1635 n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness);
1636 scale1d_calculate_taps (&scale->y_scale1d,
1637 src->height, dest->height, n_taps, a, sharpness, sharpen);
1639 scale->dither = dither;
1641 scale->horiz_resample_func =
1642 (HorizResampleFunc) resample_horiz_double_ayuv_generic_s16;
1645 g_malloc (sizeof (double) * scale->dest->width * scale->src->height * 4);
1647 vs_scale_lanczos_AYUV64_double (scale);
1649 scale1d_cleanup (&scale->x_scale1d);
1650 scale1d_cleanup (&scale->y_scale1d);
1651 g_free (scale->tmpdata);