From 4e38577b30bda1ccf43c15a4211e957eee078897 Mon Sep 17 00:00:00 2001 From: David Schleef Date: Thu, 17 Mar 2011 19:13:58 -0700 Subject: [PATCH] videoscale: Add modified Lanczos scaling method Adds a Lanczos-derived scaling method, which is rather slow, but very high quality. Adds a few properties that can be used to tune various scaling properties: sharpness, sharpen, envelope, dither. Not currently Orcified, but was designed with that in mind. --- gst/videoscale/Makefile.am | 3 +- gst/videoscale/gstvideoscale.c | 115 ++- gst/videoscale/gstvideoscale.h | 10 +- gst/videoscale/vs_image.h | 7 + gst/videoscale/vs_lanczos.c | 1558 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1689 insertions(+), 4 deletions(-) create mode 100644 gst/videoscale/vs_lanczos.c diff --git a/gst/videoscale/Makefile.am b/gst/videoscale/Makefile.am index 79d2cb4..1d68f80 100644 --- a/gst/videoscale/Makefile.am +++ b/gst/videoscale/Makefile.am @@ -8,7 +8,8 @@ libgstvideoscale_la_SOURCES = \ vs_image.c \ vs_scanline.c \ vs_4tap.c \ - vs_fill_borders.c + vs_fill_borders.c \ + vs_lanczos.c nodist_libgstvideoscale_la_SOURCES = $(ORC_NODIST_SOURCES) diff --git a/gst/videoscale/gstvideoscale.c b/gst/videoscale/gstvideoscale.c index b941ed0..f452e62 100644 --- a/gst/videoscale/gstvideoscale.c +++ b/gst/videoscale/gstvideoscale.c @@ -89,13 +89,22 @@ GST_DEBUG_CATEGORY (video_scale_debug); #define DEFAULT_PROP_METHOD GST_VIDEO_SCALE_BILINEAR #define DEFAULT_PROP_ADD_BORDERS FALSE +#define DEFAULT_PROP_SHARPNESS 1.0 +#define DEFAULT_PROP_SHARPEN 0.0 +#define DEFAULT_PROP_DITHER FALSE +#define DEFAULT_PROP_SUBMETHOD 1 +#define DEFAULT_PROP_ENVELOPE 2.0 enum { PROP_0, PROP_METHOD, - PROP_ADD_BORDERS - /* FILL ME */ + PROP_ADD_BORDERS, + PROP_SHARPNESS, + PROP_SHARPEN, + PROP_DITHER, + PROP_SUBMETHOD, + PROP_ENVELOPE }; #undef GST_VIDEO_SIZE_RANGE @@ -144,6 +153,7 @@ gst_video_scale_method_get_type (void) {GST_VIDEO_SCALE_NEAREST, "Nearest Neighbour", "nearest-neighbour"}, {GST_VIDEO_SCALE_BILINEAR, "Bilinear", "bilinear"}, {GST_VIDEO_SCALE_4TAP, "4-tap", "4-tap"}, + {GST_VIDEO_SCALE_LANCZOS, "Lanczos", "lanczos"}, {0, NULL, NULL}, }; @@ -251,6 +261,36 @@ gst_video_scale_class_init (GstVideoScaleClass * klass) DEFAULT_PROP_ADD_BORDERS, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); + g_object_class_install_property (gobject_class, PROP_SHARPNESS, + g_param_spec_double ("sharpness", "Sharpness", + "Sharpness of filter", 0.0, 2.0, DEFAULT_PROP_SHARPNESS, + G_PARAM_CONSTRUCT | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); + + g_object_class_install_property (gobject_class, PROP_SHARPEN, + g_param_spec_double ("sharpen", "Sharpen", + "Sharpening", 0.0, 1.0, DEFAULT_PROP_SHARPEN, + G_PARAM_CONSTRUCT | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); + + g_object_class_install_property (gobject_class, PROP_DITHER, + g_param_spec_boolean ("dither", "Dither", + "Add dither (only used for Lanczos method)", + DEFAULT_PROP_DITHER, + G_PARAM_CONSTRUCT | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); + +#if 0 + /* I am hiding submethod for now, since it's poorly named, poorly + * documented, and will probably just get people into trouble. */ + g_object_class_install_property (gobject_class, PROP_SUBMETHOD, + g_param_spec_int ("submethod", "submethod", + "submethod", 0, 3, DEFAULT_PROP_SUBMETHOD, + G_PARAM_CONSTRUCT | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); +#endif + + g_object_class_install_property (gobject_class, PROP_ENVELOPE, + g_param_spec_double ("envelope", "Envelope", + "Size of filter envelope", 0.0, 5.0, DEFAULT_PROP_ENVELOPE, + G_PARAM_CONSTRUCT | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); + trans_class->transform_caps = GST_DEBUG_FUNCPTR (gst_video_scale_transform_caps); trans_class->set_caps = GST_DEBUG_FUNCPTR (gst_video_scale_set_caps); @@ -267,6 +307,11 @@ gst_video_scale_init (GstVideoScale * videoscale, GstVideoScaleClass * klass) videoscale->tmp_buf = NULL; videoscale->method = DEFAULT_PROP_METHOD; videoscale->add_borders = DEFAULT_PROP_ADD_BORDERS; + videoscale->submethod = DEFAULT_PROP_SUBMETHOD; + videoscale->sharpness = DEFAULT_PROP_SHARPNESS; + videoscale->sharpen = DEFAULT_PROP_SHARPEN; + videoscale->dither = DEFAULT_PROP_DITHER; + videoscale->envelope = DEFAULT_PROP_ENVELOPE; } static void @@ -296,6 +341,31 @@ gst_video_scale_set_property (GObject * object, guint prop_id, GST_OBJECT_UNLOCK (vscale); gst_base_transform_reconfigure (GST_BASE_TRANSFORM_CAST (vscale)); break; + case PROP_SHARPNESS: + GST_OBJECT_LOCK (vscale); + vscale->sharpness = g_value_get_double (value); + GST_OBJECT_UNLOCK (vscale); + break; + case PROP_SHARPEN: + GST_OBJECT_LOCK (vscale); + vscale->sharpen = g_value_get_double (value); + GST_OBJECT_UNLOCK (vscale); + break; + case PROP_DITHER: + GST_OBJECT_LOCK (vscale); + vscale->dither = g_value_get_boolean (value); + GST_OBJECT_UNLOCK (vscale); + break; + case PROP_SUBMETHOD: + GST_OBJECT_LOCK (vscale); + vscale->submethod = g_value_get_int (value); + GST_OBJECT_UNLOCK (vscale); + break; + case PROP_ENVELOPE: + GST_OBJECT_LOCK (vscale); + vscale->envelope = g_value_get_double (value); + GST_OBJECT_UNLOCK (vscale); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -319,6 +389,31 @@ gst_video_scale_get_property (GObject * object, guint prop_id, GValue * value, g_value_set_boolean (value, vscale->add_borders); GST_OBJECT_UNLOCK (vscale); break; + case PROP_SHARPNESS: + GST_OBJECT_LOCK (vscale); + g_value_set_double (value, vscale->sharpness); + GST_OBJECT_UNLOCK (vscale); + break; + case PROP_SHARPEN: + GST_OBJECT_LOCK (vscale); + g_value_set_double (value, vscale->sharpen); + GST_OBJECT_UNLOCK (vscale); + break; + case PROP_DITHER: + GST_OBJECT_LOCK (vscale); + g_value_set_boolean (value, vscale->dither); + GST_OBJECT_UNLOCK (vscale); + break; + case PROP_SUBMETHOD: + GST_OBJECT_LOCK (vscale); + g_value_set_int (value, vscale->submethod); + GST_OBJECT_UNLOCK (vscale); + break; + case PROP_ENVELOPE: + GST_OBJECT_LOCK (vscale); + g_value_set_double (value, vscale->envelope); + GST_OBJECT_UNLOCK (vscale); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -1078,6 +1173,11 @@ gst_video_scale_transform (GstBaseTransform * trans, GstBuffer * in, case GST_VIDEO_SCALE_4TAP: vs_image_scale_4tap_RGBA (&dest, &src, videoscale->tmp_buf); break; + case GST_VIDEO_SCALE_LANCZOS: + vs_image_scale_lanczos_AYUV (&dest, &src, videoscale->tmp_buf, + videoscale->sharpness, videoscale->dither, videoscale->submethod, + videoscale->envelope, videoscale->sharpen); + break; default: goto unknown_mode; } @@ -1217,6 +1317,17 @@ gst_video_scale_transform (GstBaseTransform * trans, GstBuffer * in, vs_image_scale_4tap_Y (&dest_u, &src_u, videoscale->tmp_buf); vs_image_scale_4tap_Y (&dest_v, &src_v, videoscale->tmp_buf); break; + case GST_VIDEO_SCALE_LANCZOS: + vs_image_scale_lanczos_Y (&dest, &src, videoscale->tmp_buf, + videoscale->sharpness, videoscale->dither, videoscale->submethod, + videoscale->envelope, videoscale->sharpen); + vs_image_scale_lanczos_Y (&dest_u, &src_u, videoscale->tmp_buf, + videoscale->sharpness, videoscale->dither, videoscale->submethod, + videoscale->envelope, videoscale->sharpen); + vs_image_scale_lanczos_Y (&dest_v, &src_v, videoscale->tmp_buf, + videoscale->sharpness, videoscale->dither, videoscale->submethod, + videoscale->envelope, videoscale->sharpen); + break; default: goto unknown_mode; } diff --git a/gst/videoscale/gstvideoscale.h b/gst/videoscale/gstvideoscale.h index a09d769..655268d 100644 --- a/gst/videoscale/gstvideoscale.h +++ b/gst/videoscale/gstvideoscale.h @@ -47,13 +47,15 @@ GST_DEBUG_CATEGORY_EXTERN (video_scale_debug); * @GST_VIDEO_SCALE_NEAREST: use nearest neighbour scaling (fast and ugly) * @GST_VIDEO_SCALE_BILINEAR: use bilinear scaling (slower but prettier). * @GST_VIDEO_SCALE_4TAP: use a 4-tap filter for scaling (slow). + * @GST_VIDEO_SCALE_LANCZOS: use a multitap Lanczos filter for scaling (slow). * * The videoscale method to use. */ typedef enum { GST_VIDEO_SCALE_NEAREST, GST_VIDEO_SCALE_BILINEAR, - GST_VIDEO_SCALE_4TAP + GST_VIDEO_SCALE_4TAP, + GST_VIDEO_SCALE_LANCZOS } GstVideoScaleMethod; typedef struct _GstVideoScale GstVideoScale; @@ -67,8 +69,14 @@ typedef struct _GstVideoScaleClass GstVideoScaleClass; struct _GstVideoScale { GstVideoFilter element; + /* properties */ GstVideoScaleMethod method; gboolean add_borders; + double sharpness; + double sharpen; + gboolean dither; + int submethod; + double envelope; /* negotiated stuff */ GstVideoFormat format; diff --git a/gst/videoscale/vs_image.h b/gst/videoscale/vs_image.h index 3a23dd4..2312acc 100644 --- a/gst/videoscale/vs_image.h +++ b/gst/videoscale/vs_image.h @@ -28,6 +28,7 @@ #ifndef __VS_IMAGE_H__ #define __VS_IMAGE_H__ +#include #include <_stdint.h> typedef struct _VSImage VSImage; @@ -48,6 +49,9 @@ void vs_image_scale_nearest_RGBA (const VSImage *dest, const VSImage *src, uint8_t *tmpbuf); void vs_image_scale_linear_RGBA (const VSImage *dest, const VSImage *src, uint8_t *tmpbuf); +void vs_image_scale_lanczos_AYUV (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod, + double a, double sharpen); void vs_image_scale_nearest_RGB (const VSImage *dest, const VSImage *src, uint8_t *tmpbuf); @@ -68,6 +72,9 @@ void vs_image_scale_nearest_Y (const VSImage *dest, const VSImage *src, uint8_t *tmpbuf); void vs_image_scale_linear_Y (const VSImage *dest, const VSImage *src, uint8_t *tmpbuf); +void vs_image_scale_lanczos_Y (const VSImage *dest, const VSImage *src, + uint8_t *tmpbuf, double sharpness, gboolean dither, int submethod, + double a, double sharpen); void vs_image_scale_nearest_RGB565 (const VSImage *dest, const VSImage *src, uint8_t *tmpbuf); diff --git a/gst/videoscale/vs_lanczos.c b/gst/videoscale/vs_lanczos.c new file mode 100644 index 0000000..1c87ba3 --- /dev/null +++ b/gst/videoscale/vs_lanczos.c @@ -0,0 +1,1558 @@ +/* + * Image Scaling Functions + * Copyright (c) 2011 David A. Schleef + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/* + * + * Modified Lanczos scaling algorithm + * ================================== + * + * This algorithm was developed by the author. The primary goals of + * the algorithm are high-quality video downscaling for medium scale + * factors (in the range of 1.3x to 5.0x) using methods that can be + * converted to SIMD code. Concerns with existing algorithms were + * mainly related to either over-soft filtering (Lanczos) or aliasing + * (bilinear or any other method with inadequate sampling). + * + * The problems with bilinear scaling are apparent when downscaling + * more than a factor of 2. For example, when downscaling by a factor + * of 3, only two-thirds of the input pixels contribute to the output + * pixels. This is only considering scaling in one direction; after + * scaling both vertically and horizontally in a 2-D image, fewer than + * half of the input pixels contribute to the output, so it should not + * be surprising that the output is suboptimal. + * + * The problems with Lanczos scaling are more subtle. From a theoretical + * perspective, Lanczos is an optimal algorithm for resampling equally- + * spaced values. This theoretical perspective is based on analysis + * done in frequency space, thus, Lanczos works very well for audio + * resampling, since the ear hears primarily in frequency space. The + * human visual system is sensitive primarily in the spatial domain, + * therefore any resampling algorithm should take this into account. + * This difference is immediately clear in the size of resampling + * window or envelope that is chosen for resampling: for audio, an + * envelope of a=64 is typical, in image scaling, the envelope is + * usually a=2 or a=3. + * + * One result of the HVS being sensitive in the spatial domain (and + * also probably due to oversampling capabilities of the retina and + * visual cortex) is that it is less sensitive to the exact magnitude + * of high-frequency visual signals than to the appropriate amount of + * energy in the nearby frequency band. A Lanczos kernel with a=2 + * or a=3 strongly decreases the amount of energy in the high frequency + * bands. The energy in this area can be increased by increasing a, + * which brings in energy from different areas of the image (bad for + * reasons mentioned above), or by oversampling the input data. We + * have chosen two methods for doing the latter. Firstly, there is + * a sharpness parameter, which increases the cutoff frequency of the + * filter, aliasing higher frequency noise into the passband. And + * secondly, there is the sharpen parameter, which increases the + * contribution of high-frequency (but in-band) components. + * + * An alternate explanation of the usefulness of a sharpening filter + * is that many natural images have a roughly 1/f spectrum. In order + * for a downsampled image to look more "natural" when high frequencies + * are removed, the frequencies in the pass band near the cutoff + * frequency are amplified, causing the spectrum to be more roughly + * 1/f. I said "roughly", not "literally". + * + * This alternate explanation is useful for understanding the author's + * secondary motivation for developing this algorithm, namely, as a + * method of video compression. Several recent techniques (such as + * HTTP Live Streaming and SVC) use image scaling as a method to get + * increased compression out of nominally non-scalable codecs such as + * H.264. For optimal quality, it is thusly important to consider + * the scaler and encoder as a combined unit. Tuning of the sharpness + * and sharpen parameters was performed using the Toro encoder tuner, + * where scaled and encoded video was compared to unscaled and encoded + * video. This tuning suggested values that were very close to the + * values chosen by manual inspection of scaled images and video. + * + * The optimal values of sharpen and sharpness were slightly different + * depending whether the comparison was still images or video. Video + * comparisons were more sensitive to aliasing, since the aliasing + * artifacts tended to move or "crawl" around the video. The default + * values are for video; image scaling may prefer higher values. + * + * A number of related techniques were rejected for various reasons. + * An early technique of selecting the sharpness factor locally based + * on edge detection (in order to use a higher sharpness values without + * the corresponding aliasing on edges) worked very well for still + * images, but caused too much "crawling" on textures in video. Also, + * this method is slow, as it does not parallelize well. + * + * Non-separable techniques were rejected because the fastest would + * have been at least 4x slower. + * + * It is infrequently appreciated that image scaling should ideally be + * done in linear light space. Converting to linear light space has + * a similar effect to a sharpening filter. This approach was not + * taken because the added benefit is minor compared to the additional + * computational cost. Morever, the benefit is decreased by increasing + * the strength of the sharpening filter. + * + */ +#include + +#include "vs_scanline.h" +#include "vs_image.h" + +#include "gstvideoscaleorc.h" +#include +#include + +#define NEED_CLAMP(x,a,b) ((x) < (a) || (x) > (b)) + +#define ROUND_UP_2(x) (((x)+1)&~1) +#define ROUND_UP_4(x) (((x)+3)&~3) +#define ROUND_UP_8(x) (((x)+7)&~7) + +#define SRC_LINE(i) (scale->src->pixels + scale->src->stride * (i)) + +#define TMP_LINE_S16(i) ((gint16 *)scale->tmpdata + (i)*(scale->dest->width)) +#define TMP_LINE_S32(i) ((gint32 *)scale->tmpdata + (i)*(scale->dest->width)) +#define TMP_LINE_FLOAT(i) ((float *)scale->tmpdata + (i)*(scale->dest->width)) +#define TMP_LINE_DOUBLE(i) ((double *)scale->tmpdata + (i)*(scale->dest->width)) +#define TMP_LINE_S16_AYUV(i) ((gint16 *)scale->tmpdata + (i)*4*(scale->dest->width)) +#define TMP_LINE_S32_AYUV(i) ((gint32 *)scale->tmpdata + (i)*4*(scale->dest->width)) +#define TMP_LINE_FLOAT_AYUV(i) ((float *)scale->tmpdata + (i)*4*(scale->dest->width)) +#define TMP_LINE_DOUBLE_AYUV(i) ((double *)scale->tmpdata + (i)*4*(scale->dest->width)) + +#define PTR_OFFSET(a,b) ((void *)((char *)(a) + (b))) + +typedef void (*HorizResampleFunc) (void *dest, const gint32 * offsets, + const void *taps, const void *src, int n_taps, int shift, int n); + +typedef struct _Scale1D Scale1D; +struct _Scale1D +{ + int n; + double offset; + double scale; + + double fx; + double ex; + int dx; + + int n_taps; + gint32 *offsets; + void *taps; +}; + +typedef struct _Scale Scale; +struct _Scale +{ + const VSImage *dest; + const VSImage *src; + + double sharpness; + gboolean dither; + + void *tmpdata; + + HorizResampleFunc horiz_resample_func; + + Scale1D x_scale1d; + Scale1D y_scale1d; +}; + +static void +vs_image_scale_lanczos_Y_int16 (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, double a, + double sharpen); +static void vs_image_scale_lanczos_Y_int32 (const VSImage * dest, + const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, + double a, double sharpen); +static void vs_image_scale_lanczos_Y_float (const VSImage * dest, + const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, + double a, double sharpen); +static void vs_image_scale_lanczos_Y_double (const VSImage * dest, + const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, + double a, double sharpen); +static void +vs_image_scale_lanczos_AYUV_int16 (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, double a, + double sharpen); +static void vs_image_scale_lanczos_AYUV_int32 (const VSImage * dest, + const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, + double a, double sharpen); +static void vs_image_scale_lanczos_AYUV_float (const VSImage * dest, + const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, + double a, double sharpen); +static void vs_image_scale_lanczos_AYUV_double (const VSImage * dest, + const VSImage * src, uint8_t * tmpbuf, double sharpness, gboolean dither, + double a, double sharpen); + +static double +sinc (double x) +{ + if (x == 0) + return 1; + return sin (G_PI * x) / (G_PI * x); +} + +static double +envelope (double x) +{ + if (x <= -1 || x >= 1) + return 0; + return sinc (x); +} + +static int +scale1d_get_n_taps (int src_size, int dest_size, double a, double sharpness) +{ + double scale; + double fx; + int dx; + + scale = src_size / (double) dest_size; + if (scale > 1.0) { + fx = (1.0 / scale) * sharpness; + } else { + fx = (1.0) * sharpness; + } + dx = ceil (a / fx); + + return 2 * dx; +} + +static void +scale1d_cleanup (Scale1D * scale) +{ + g_free (scale->taps); + g_free (scale->offsets); +} + +/* + * Calculates a set of taps for each destination element in double + * format. Each set of taps sums to 1.0. + * + */ +static void +scale1d_calculate_taps (Scale1D * scale, int src_size, int dest_size, + int n_taps, double a, double sharpness, double sharpen) +{ + int j; + double *tap_array; + gint32 *offsets; + double scale_offset; + double scale_increment; + int dx; + double fx; + double ex; + + scale->scale = src_size / (double) dest_size; + scale->offset = scale->scale / 2 - 0.5; + + if (scale->scale > 1.0) { + scale->fx = (1.0 / scale->scale) * sharpness; + } else { + scale->fx = (1.0) * sharpness; + } + scale->ex = scale->fx / a; + scale->dx = ceil (a / scale->fx); + + g_assert (n_taps >= 2 * scale->dx); + scale->n_taps = n_taps; + + scale->taps = g_malloc (sizeof (double) * scale->n_taps * dest_size); + scale->offsets = g_malloc (sizeof (gint32) * dest_size); + tap_array = scale->taps; + offsets = scale->offsets; + + scale_offset = scale->offset; + scale_increment = scale->scale; + dx = scale->dx; + fx = scale->fx; + ex = scale->ex; + + for (j = 0; j < dest_size; j++) { + double x; + int xi; + int l; + double weight; + double *taps; + + x = scale_offset + scale_increment * j; + x = CLAMP (x, 0, src_size); + xi = ceil (x) - dx; + + offsets[j] = xi; + weight = 0; + taps = tap_array + j * n_taps; + + for (l = 0; l < n_taps; l++) { + int xl = xi + l; + taps[l] = sinc ((x - xl) * fx) * envelope ((x - xl) * ex); + taps[l] -= sharpen * envelope ((x - xl) * ex); + weight += taps[l]; + } + g_assert (envelope ((x - (xi - 1)) * ex) == 0); + g_assert (envelope ((x - (xi + n_taps)) * ex) == 0); + for (l = 0; l < n_taps; l++) { + taps[l] /= weight; + } + + if (xi < 0) { + int shift = -xi; + + for (l = 0; l < shift; l++) { + taps[shift] += taps[l]; + } + for (l = 0; l < n_taps - shift; l++) { + taps[l] = taps[shift + l]; + } + for (; l < n_taps; l++) { + taps[l] = 0; + } + offsets[j] += shift; + } + + if (xi > src_size - n_taps) { + int shift = xi - (src_size - n_taps); + + for (l = 0; l < shift; l++) { + taps[n_taps - shift - 1] += taps[n_taps - shift + l]; + } + for (l = 0; l < n_taps - shift; l++) { + taps[n_taps - 1 - l] = taps[n_taps - 1 - shift - l]; + } + for (l = 0; l < shift; l++) { + taps[l] = 0; + } + offsets[j] -= shift; + } + } +} + +/* + * Calculates a set of taps for each destination element in float + * format. Each set of taps sums to 1.0. + */ +static void +scale1d_calculate_taps_float (Scale1D * scale, int src_size, int dest_size, + int n_taps, double a, double sharpness, double sharpen) +{ + double *taps_d; + float *taps_f; + int j; + + scale1d_calculate_taps (scale, src_size, dest_size, n_taps, a, sharpness, + sharpen); + + taps_d = scale->taps; + taps_f = g_malloc (sizeof (float) * scale->n_taps * dest_size); + + for (j = 0; j < dest_size * n_taps; j++) { + taps_f[j] = taps_d[j]; + } + + g_free (taps_d); + scale->taps = taps_f; +} + +/* + * Calculates a set of taps for each destination element in gint32 + * format. Each set of taps sums to (very nearly) (1<taps; + taps_i = g_malloc (sizeof (gint32) * scale->n_taps * dest_size); + + multiplier = (1 << shift); + + for (j = 0; j < dest_size; j++) { + for (i = 0; i < n_taps; i++) { + taps_i[j * n_taps + i] = + floor (0.5 + taps_d[j * n_taps + i] * multiplier); + } + } + + g_free (taps_d); + scale->taps = taps_i; +} + +/* + * Calculates a set of taps for each destination element in gint16 + * format. Each set of taps sums to (1<taps; + taps_i = g_malloc (sizeof (gint16) * scale->n_taps * dest_size); + + multiplier = (1 << shift); + + /* Various methods for converting floating point taps to integer. + * The dB values are the SSIM value between scaling an image via + * the floating point pathway vs. the integer pathway using the + * given code to generate the taps. Only one image was tested, + * scaling from 1920x1080 to 640x360. Several variations of the + * methods were also tested, with nothing appearing useful. */ +#if 0 + /* Standard round to integer. This causes bad DC errors. */ + /* 44.588 dB */ + for (j = 0; j < dest_size; j++) { + for (i = 0; i < n_taps; i++) { + taps_i[j * n_taps + i] = + floor (0.5 + taps_d[j * n_taps + i] * multiplier); + } + } +#endif +#if 0 + /* Dithering via error propogation. Works pretty well, but + * really we want to propogate errors across rows, which would + * mean having several sets of tap arrays. Possible, but more work, + * and it may not even be better. */ + /* 57.0961 dB */ + { + double err = 0; + for (j = 0; j < dest_size; j++) { + for (i = 0; i < n_taps; i++) { + err += taps_d[j * n_taps + i] * multiplier; + taps_i[j * n_taps + i] = floor (err); + err -= floor (err); + } + } + } +#endif +#if 1 + /* Round to integer, but with an adjustable bias that we use to + * eliminate the DC error. This search method is a bit crude, and + * could perhaps be improved somewhat. */ + /* 60.4851 dB */ + for (j = 0; j < dest_size; j++) { + int k; + for (k = 0; k < 100; k++) { + int sum = 0; + double offset; + + offset = k * 0.01; + for (i = 0; i < n_taps; i++) { + taps_i[j * n_taps + i] = + floor (offset + taps_d[j * n_taps + i] * multiplier); + sum += taps_i[j * n_taps + i]; + } + + if (sum >= (1 << shift)) + break; + } + } +#endif +#if 0 + /* Round to integer, but adjust the multiplier. The search method is + * wrong a lot, but was sufficient enough to calculate dB error. */ + /* 58.6517 dB */ + for (j = 0; j < dest_size; j++) { + int k; + int sum = 0; + for (k = 0; k < 200; k++) { + sum = 0; + + multiplier = (1 << shift) - 1.0 + k * 0.01; + for (i = 0; i < n_taps; i++) { + taps_i[j * n_taps + i] = + floor (0.5 + taps_d[j * n_taps + i] * multiplier); + sum += taps_i[j * n_taps + i]; + } + + if (sum >= (1 << shift)) + break; + } + if (sum != (1 << shift)) { + GST_ERROR ("%g %d", multiplier, sum); + } + } +#endif +#if 0 + /* Round to integer, but subtract the error from the largest tap */ + /* 58.3677 dB */ + for (j = 0; j < dest_size; j++) { + int err = -multiplier; + for (i = 0; i < n_taps; i++) { + taps_i[j * n_taps + i] = + floor (0.5 + taps_d[j * n_taps + i] * multiplier); + err += taps_i[j * n_taps + i]; + } + if (taps_i[j * n_taps + (n_taps / 2 - 1)] > + taps_i[j * n_taps + (n_taps / 2)]) { + taps_i[j * n_taps + (n_taps / 2 - 1)] -= err; + } else { + taps_i[j * n_taps + (n_taps / 2)] -= err; + } + } +#endif + + g_free (taps_d); + scale->taps = taps_i; +} + + +void +vs_image_scale_lanczos_Y (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod, + double a, double sharpen) +{ + switch (submethod) { + case 0: + default: + vs_image_scale_lanczos_Y_int16 (dest, src, tmpbuf, sharpness, dither, a, + sharpen); + break; + case 1: + vs_image_scale_lanczos_Y_int32 (dest, src, tmpbuf, sharpness, dither, a, + sharpen); + break; + case 2: + vs_image_scale_lanczos_Y_float (dest, src, tmpbuf, sharpness, dither, a, + sharpen); + break; + case 3: + vs_image_scale_lanczos_Y_double (dest, src, tmpbuf, sharpness, dither, a, + sharpen); + break; + } +} + +void +vs_image_scale_lanczos_AYUV (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, int submethod, + double a, double sharpen) +{ + switch (submethod) { + case 0: + default: + vs_image_scale_lanczos_AYUV_int16 (dest, src, tmpbuf, sharpness, dither, + a, sharpen); + break; + case 1: + vs_image_scale_lanczos_AYUV_int32 (dest, src, tmpbuf, sharpness, dither, + a, sharpen); + break; + case 2: + vs_image_scale_lanczos_AYUV_float (dest, src, tmpbuf, sharpness, dither, + a, sharpen); + break; + case 3: + vs_image_scale_lanczos_AYUV_double (dest, src, tmpbuf, sharpness, dither, + a, sharpen); + break; + } +} + + + +#define RESAMPLE_HORIZ_FLOAT(function, dest_type, tap_type, src_type, _n_taps) \ +static void \ +function (dest_type *dest, const gint32 *offsets, \ + const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \ +{ \ + int i; \ + int k; \ + dest_type sum; \ + const src_type *srcline; \ + const tap_type *tapsline; \ + for (i = 0; i < n; i++) { \ + srcline = src + offsets[i]; \ + tapsline = taps + i * _n_taps; \ + sum = 0; \ + for (k = 0; k < _n_taps; k++) { \ + sum += srcline[k] * tapsline[k]; \ + } \ + dest[i] = sum; \ + } \ +} + +#define RESAMPLE_HORIZ(function, dest_type, tap_type, src_type, _n_taps, _shift) \ +static void \ +function (dest_type *dest, const gint32 *offsets, \ + const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \ +{ \ + int i; \ + int k; \ + dest_type sum; \ + const src_type *srcline; \ + const tap_type *tapsline; \ + int offset; \ + if (_shift > 0) offset = (1<<_shift)>>1; \ + else offset = 0; \ + for (i = 0; i < n; i++) { \ + srcline = src + offsets[i]; \ + tapsline = taps + i * _n_taps; \ + sum = 0; \ + for (k = 0; k < _n_taps; k++) { \ + sum += srcline[k] * tapsline[k]; \ + } \ + dest[i] = (sum + offset) >> _shift; \ + } \ +} + +#define RESAMPLE_HORIZ_AYUV_FLOAT(function, dest_type, tap_type, src_type, _n_taps) \ +static void \ +function (dest_type *dest, const gint32 *offsets, \ + const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \ +{ \ + int i; \ + int k; \ + dest_type sum1; \ + dest_type sum2; \ + dest_type sum3; \ + dest_type sum4; \ + const src_type *srcline; \ + const tap_type *tapsline; \ + for (i = 0; i < n; i++) { \ + srcline = src + 4*offsets[i]; \ + tapsline = taps + i * _n_taps; \ + sum1 = 0; \ + sum2 = 0; \ + sum3 = 0; \ + sum4 = 0; \ + for (k = 0; k < _n_taps; k++) { \ + sum1 += srcline[k*4+0] * tapsline[k]; \ + sum2 += srcline[k*4+1] * tapsline[k]; \ + sum3 += srcline[k*4+2] * tapsline[k]; \ + sum4 += srcline[k*4+3] * tapsline[k]; \ + } \ + dest[i*4+0] = sum1; \ + dest[i*4+1] = sum2; \ + dest[i*4+2] = sum3; \ + dest[i*4+3] = sum4; \ + } \ +} + +#define RESAMPLE_HORIZ_AYUV(function, dest_type, tap_type, src_type, _n_taps, _shift) \ +static void \ +function (dest_type *dest, const gint32 *offsets, \ + const tap_type *taps, const src_type *src, int n_taps, int shift, int n) \ +{ \ + int i; \ + int k; \ + dest_type sum1; \ + dest_type sum2; \ + dest_type sum3; \ + dest_type sum4; \ + const src_type *srcline; \ + const tap_type *tapsline; \ + int offset; \ + if (_shift > 0) offset = (1<<_shift)>>1; \ + else offset = 0; \ + for (i = 0; i < n; i++) { \ + srcline = src + 4*offsets[i]; \ + tapsline = taps + i * _n_taps; \ + sum1 = 0; \ + sum2 = 0; \ + sum3 = 0; \ + sum4 = 0; \ + for (k = 0; k < _n_taps; k++) { \ + sum1 += srcline[k*4+0] * tapsline[k]; \ + sum2 += srcline[k*4+1] * tapsline[k]; \ + sum3 += srcline[k*4+2] * tapsline[k]; \ + sum4 += srcline[k*4+3] * tapsline[k]; \ + } \ + dest[i*4+0] = (sum1 + offset) >> _shift; \ + dest[i*4+1] = (sum2 + offset) >> _shift; \ + dest[i*4+2] = (sum3 + offset) >> _shift; \ + dest[i*4+3] = (sum4 + offset) >> _shift; \ + } \ +} + +/* *INDENT-OFF* */ +RESAMPLE_HORIZ_FLOAT (resample_horiz_double_u8_generic, double, double, + guint8, n_taps) +RESAMPLE_HORIZ_FLOAT (resample_horiz_float_u8_generic, float, float, + guint8, n_taps) +RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_double_ayuv_generic, double, double, + guint8, n_taps) +RESAMPLE_HORIZ_AYUV_FLOAT (resample_horiz_float_ayuv_generic, float, float, + guint8, n_taps) + +RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_generic, gint32, gint32, + guint8, n_taps, shift) +RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_generic, gint16, gint16, + guint8, n_taps, shift) +RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_generic, gint32, gint32, + guint8, n_taps, shift) +RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_generic, gint16, gint16, + guint8, n_taps, shift) + +/* Candidates for orcification */ +RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps16_shift0, gint32, gint32, + guint8, 16, 0) +RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps12_shift0, gint32, gint32, + guint8, 12, 0) +RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps8_shift0, gint32, gint32, + guint8, 8, 0) +RESAMPLE_HORIZ (resample_horiz_int32_int32_u8_taps4_shift0, gint32, gint32, + guint8, 4, 0) +RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps16_shift0, gint16, gint16, + guint8, 16, 0) +RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps12_shift0, gint16, gint16, + guint8, 12, 0) +RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps8_shift0, gint16, gint16, + guint8, 8, 0) +RESAMPLE_HORIZ (resample_horiz_int16_int16_u8_taps4_shift0, gint16, gint16, + guint8, 4, 0) + +RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps16_shift0, gint32, gint32, + guint8, 16, 0) +RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps12_shift0, gint32, gint32, + guint8, 12, 0) +RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps8_shift0, gint32, gint32, + guint8, 8, 0) +RESAMPLE_HORIZ_AYUV (resample_horiz_int32_int32_ayuv_taps4_shift0, gint32, gint32, + guint8, 4, 0) +RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps16_shift0, gint16, gint16, + guint8, 16, 0) +RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps12_shift0, gint16, gint16, + guint8, 12, 0) +RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps8_shift0, gint16, gint16, + guint8, 8, 0) +RESAMPLE_HORIZ_AYUV (resample_horiz_int16_int16_ayuv_taps4_shift0, gint16, gint16, + guint8, 4, 0) +/* *INDENT-ON* */ + +#define RESAMPLE_VERT(function, tap_type, src_type, _n_taps, _shift) \ +static void \ +function (guint8 *dest, \ + const tap_type *taps, const src_type *src, int stride, int n_taps, \ + int shift, int n) \ +{ \ + int i; \ + int l; \ + gint32 sum_y; \ + gint32 offset = (1<<_shift) >> 1; \ + for (i = 0; i < n; i++) { \ + sum_y = 0; \ + for (l = 0; l < n_taps; l++) { \ + const src_type *line = PTR_OFFSET(src, stride * l); \ + sum_y += line[i] * taps[l]; \ + } \ + dest[i] = CLAMP ((sum_y + offset) >> _shift, 0, 255); \ + } \ +} + +#define RESAMPLE_VERT_DITHER(function, tap_type, src_type, _n_taps, _shift) \ +static void \ +function (guint8 *dest, \ + const tap_type *taps, const src_type *src, int stride, int n_taps, \ + int shift, int n) \ +{ \ + int i; \ + int l; \ + gint32 sum_y; \ + gint32 err_y = 0; \ + gint32 mask = (1<<_shift) - 1; \ + for (i = 0; i < n; i++) { \ + sum_y = 0; \ + for (l = 0; l < n_taps; l++) { \ + const src_type *line = PTR_OFFSET(src, stride * l); \ + sum_y += line[i] * taps[l]; \ + } \ + err_y += sum_y; \ + dest[i] = CLAMP (err_y >> _shift, 0, 255); \ + err_y &= mask; \ + } \ +} + +/* *INDENT-OFF* */ +RESAMPLE_VERT (resample_vert_int32_generic, gint32, gint32, n_taps, shift) +RESAMPLE_VERT_DITHER (resample_vert_dither_int32_generic, gint32, gint32, + n_taps, shift) +RESAMPLE_VERT (resample_vert_int16_generic, gint16, gint16, n_taps, shift); +RESAMPLE_VERT_DITHER (resample_vert_dither_int16_generic, gint16, gint16, + n_taps, shift) +/* *INDENT-ON* */ + +#define RESAMPLE_VERT_FLOAT(function, tap_type, src_type, _n_taps, _shift) \ +static void \ +function (guint8 *dest, \ + const tap_type *taps, const src_type *src, int stride, int n_taps, \ + int shift, int n) \ +{ \ + int i; \ + int l; \ + src_type sum_y; \ + for (i = 0; i < n; i++) { \ + sum_y = 0; \ + for (l = 0; l < n_taps; l++) { \ + const src_type *line = PTR_OFFSET(src, stride * l); \ + sum_y += line[i] * taps[l]; \ + } \ + dest[i] = CLAMP (floor(0.5 + sum_y), 0, 255); \ + } \ +} + +#define RESAMPLE_VERT_FLOAT_DITHER(function, tap_type, src_type, _n_taps, _shift) \ +static void \ +function (guint8 *dest, \ + const tap_type *taps, const src_type *src, int stride, int n_taps, \ + int shift, int n) \ +{ \ + int i; \ + int l; \ + src_type sum_y; \ + src_type err_y = 0; \ + for (i = 0; i < n; i++) { \ + sum_y = 0; \ + for (l = 0; l < n_taps; l++) { \ + const src_type *line = PTR_OFFSET(src, stride * l); \ + sum_y += line[i] * taps[l]; \ + } \ + err_y += sum_y; \ + dest[i] = CLAMP (floor (err_y), 0, 255); \ + err_y -= floor (err_y); \ + } \ +} + +/* *INDENT-OFF* */ +RESAMPLE_VERT_FLOAT (resample_vert_double_generic, double, double, n_taps, + shift) +RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_double_generic, double, double, + n_taps, shift) + +RESAMPLE_VERT_FLOAT (resample_vert_float_generic, float, float, n_taps, shift) +RESAMPLE_VERT_FLOAT_DITHER (resample_vert_dither_float_generic, float, float, + n_taps, shift) +/* *INDENT-ON* */ + +#define S16_SHIFT1 7 +#define S16_SHIFT2 7 +#define S16_MIDSHIFT 0 +#define S16_POSTSHIFT (S16_SHIFT1+S16_SHIFT2-S16_MIDSHIFT) + +static void +vs_scale_lanczos_Y_int16 (Scale * scale) +{ + int j; + int yi; + int tmp_yi; + + tmp_yi = 0; + + for (j = 0; j < scale->dest->height; j++) { + guint8 *destline; + gint16 *taps; + + destline = scale->dest->pixels + scale->dest->stride * j; + + yi = scale->y_scale1d.offsets[j]; + + while (tmp_yi < yi + scale->y_scale1d.n_taps) { + scale->horiz_resample_func (TMP_LINE_S16 (tmp_yi), + scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), + scale->x_scale1d.n_taps, S16_MIDSHIFT, scale->dest->width); + tmp_yi++; + } + + taps = (gint16 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; + if (scale->dither) { + resample_vert_dither_int16_generic (destline, + taps, TMP_LINE_S16 (scale->y_scale1d.offsets[j]), + sizeof (gint16) * scale->dest->width, scale->y_scale1d.n_taps, + S16_POSTSHIFT, scale->dest->width); + } else { + resample_vert_int16_generic (destline, + taps, TMP_LINE_S16 (scale->y_scale1d.offsets[j]), + sizeof (gint16) * scale->dest->width, scale->y_scale1d.n_taps, + S16_POSTSHIFT, scale->dest->width); + } + } +} + +void +vs_image_scale_lanczos_Y_int16 (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, double a, + double sharpen) +{ + Scale s = { 0 }; + Scale *scale = &s; + int n_taps; + + scale->dest = dest; + scale->src = src; + + n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); + n_taps = ROUND_UP_4 (n_taps); + scale1d_calculate_taps_int16 (&scale->x_scale1d, + src->width, dest->width, n_taps, a, sharpness, sharpen, S16_SHIFT1); + + n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); + scale1d_calculate_taps_int16 (&scale->y_scale1d, + src->height, dest->height, n_taps, a, sharpness, sharpen, S16_SHIFT2); + + scale->dither = dither; + + switch (scale->x_scale1d.n_taps) { + case 4: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int16_int16_u8_taps4_shift0; + break; + case 8: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int16_int16_u8_taps8_shift0; + break; + case 12: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int16_int16_u8_taps12_shift0; + break; + case 16: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int16_int16_u8_taps16_shift0; + break; + default: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int16_int16_u8_generic; + break; + } + + scale->tmpdata = + g_malloc (sizeof (gint16) * scale->dest->width * scale->src->height); + + vs_scale_lanczos_Y_int16 (scale); + + scale1d_cleanup (&scale->x_scale1d); + scale1d_cleanup (&scale->y_scale1d); + g_free (scale->tmpdata); +} + + +#define S32_SHIFT1 11 +#define S32_SHIFT2 11 +#define S32_MIDSHIFT 0 +#define S32_POSTSHIFT (S32_SHIFT1+S32_SHIFT2-S32_MIDSHIFT) + +static void +vs_scale_lanczos_Y_int32 (Scale * scale) +{ + int j; + int yi; + int tmp_yi; + + tmp_yi = 0; + + for (j = 0; j < scale->dest->height; j++) { + guint8 *destline; + gint32 *taps; + + destline = scale->dest->pixels + scale->dest->stride * j; + + yi = scale->y_scale1d.offsets[j]; + + while (tmp_yi < yi + scale->y_scale1d.n_taps) { + scale->horiz_resample_func (TMP_LINE_S32 (tmp_yi), + scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), + scale->x_scale1d.n_taps, S32_MIDSHIFT, scale->dest->width); + tmp_yi++; + } + + taps = (gint32 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; + if (scale->dither) { + resample_vert_dither_int32_generic (destline, + taps, TMP_LINE_S32 (scale->y_scale1d.offsets[j]), + sizeof (gint32) * scale->dest->width, + scale->y_scale1d.n_taps, S32_POSTSHIFT, scale->dest->width); + } else { + resample_vert_int32_generic (destline, + taps, TMP_LINE_S32 (scale->y_scale1d.offsets[j]), + sizeof (gint32) * scale->dest->width, + scale->y_scale1d.n_taps, S32_POSTSHIFT, scale->dest->width); + } + } +} + +void +vs_image_scale_lanczos_Y_int32 (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, double a, + double sharpen) +{ + Scale s = { 0 }; + Scale *scale = &s; + int n_taps; + + scale->dest = dest; + scale->src = src; + + n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); + n_taps = ROUND_UP_4 (n_taps); + scale1d_calculate_taps_int32 (&scale->x_scale1d, + src->width, dest->width, n_taps, a, sharpness, sharpen, S32_SHIFT1); + + n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); + scale1d_calculate_taps_int32 (&scale->y_scale1d, + src->height, dest->height, n_taps, a, sharpness, sharpen, S32_SHIFT2); + + scale->dither = dither; + + switch (scale->x_scale1d.n_taps) { + case 4: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int32_int32_u8_taps4_shift0; + break; + case 8: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int32_int32_u8_taps8_shift0; + break; + case 12: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int32_int32_u8_taps12_shift0; + break; + case 16: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int32_int32_u8_taps16_shift0; + break; + default: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int32_int32_u8_generic; + break; + } + + scale->tmpdata = + g_malloc (sizeof (int32_t) * scale->dest->width * scale->src->height); + + vs_scale_lanczos_Y_int32 (scale); + + scale1d_cleanup (&scale->x_scale1d); + scale1d_cleanup (&scale->y_scale1d); + g_free (scale->tmpdata); +} + +static void +vs_scale_lanczos_Y_double (Scale * scale) +{ + int j; + int yi; + int tmp_yi; + + tmp_yi = 0; + + for (j = 0; j < scale->dest->height; j++) { + guint8 *destline; + double *taps; + + destline = scale->dest->pixels + scale->dest->stride * j; + + yi = scale->y_scale1d.offsets[j]; + + while (tmp_yi < yi + scale->y_scale1d.n_taps) { + scale->horiz_resample_func (TMP_LINE_DOUBLE (tmp_yi), + scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), + scale->x_scale1d.n_taps, 0, scale->dest->width); + tmp_yi++; + } + + taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; + if (scale->dither) { + resample_vert_dither_double_generic (destline, + taps, TMP_LINE_DOUBLE (scale->y_scale1d.offsets[j]), + sizeof (double) * scale->dest->width, + scale->y_scale1d.n_taps, 0, scale->dest->width); + } else { + resample_vert_double_generic (destline, + taps, TMP_LINE_DOUBLE (scale->y_scale1d.offsets[j]), + sizeof (double) * scale->dest->width, + scale->y_scale1d.n_taps, 0, scale->dest->width); + } + } +} + +void +vs_image_scale_lanczos_Y_double (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, double a, + double sharpen) +{ + Scale s = { 0 }; + Scale *scale = &s; + int n_taps; + + scale->dest = dest; + scale->src = src; + + n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); + scale1d_calculate_taps (&scale->x_scale1d, + src->width, dest->width, n_taps, a, sharpness, sharpen); + + n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); + scale1d_calculate_taps (&scale->y_scale1d, + src->height, dest->height, n_taps, a, sharpness, sharpen); + + scale->dither = dither; + + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_double_u8_generic; + + scale->tmpdata = + g_malloc (sizeof (double) * scale->dest->width * scale->src->height); + + vs_scale_lanczos_Y_double (scale); + + scale1d_cleanup (&scale->x_scale1d); + scale1d_cleanup (&scale->y_scale1d); + g_free (scale->tmpdata); +} + +static void +vs_scale_lanczos_Y_float (Scale * scale) +{ + int j; + int yi; + int tmp_yi; + + tmp_yi = 0; + + for (j = 0; j < scale->dest->height; j++) { + guint8 *destline; + float *taps; + + destline = scale->dest->pixels + scale->dest->stride * j; + + yi = scale->y_scale1d.offsets[j]; + + while (tmp_yi < yi + scale->y_scale1d.n_taps) { + scale->horiz_resample_func (TMP_LINE_FLOAT (tmp_yi), + scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), + scale->x_scale1d.n_taps, 0, scale->dest->width); + tmp_yi++; + } + + taps = (float *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; + if (scale->dither) { + resample_vert_dither_float_generic (destline, + taps, TMP_LINE_FLOAT (scale->y_scale1d.offsets[j]), + sizeof (float) * scale->dest->width, + scale->y_scale1d.n_taps, 0, scale->dest->width); + } else { + resample_vert_float_generic (destline, + taps, TMP_LINE_FLOAT (scale->y_scale1d.offsets[j]), + sizeof (float) * scale->dest->width, + scale->y_scale1d.n_taps, 0, scale->dest->width); + } + } +} + +void +vs_image_scale_lanczos_Y_float (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, double a, + double sharpen) +{ + Scale s = { 0 }; + Scale *scale = &s; + int n_taps; + + scale->dest = dest; + scale->src = src; + + n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); + scale1d_calculate_taps_float (&scale->x_scale1d, + src->width, dest->width, n_taps, a, sharpness, sharpen); + + n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); + scale1d_calculate_taps_float (&scale->y_scale1d, + src->height, dest->height, n_taps, a, sharpness, sharpen); + + scale->dither = dither; + + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_float_u8_generic; + + scale->tmpdata = + g_malloc (sizeof (float) * scale->dest->width * scale->src->height); + + vs_scale_lanczos_Y_float (scale); + + scale1d_cleanup (&scale->x_scale1d); + scale1d_cleanup (&scale->y_scale1d); + g_free (scale->tmpdata); +} + + + + + +static void +vs_scale_lanczos_AYUV_int16 (Scale * scale) +{ + int j; + int yi; + int tmp_yi; + + tmp_yi = 0; + + for (j = 0; j < scale->dest->height; j++) { + guint8 *destline; + gint16 *taps; + + destline = scale->dest->pixels + scale->dest->stride * j; + + yi = scale->y_scale1d.offsets[j]; + + while (tmp_yi < yi + scale->y_scale1d.n_taps) { + scale->horiz_resample_func (TMP_LINE_S16_AYUV (tmp_yi), + scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), + scale->x_scale1d.n_taps, S16_MIDSHIFT, scale->dest->width); + tmp_yi++; + } + + taps = (gint16 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; + if (scale->dither) { + resample_vert_dither_int16_generic (destline, + taps, TMP_LINE_S16_AYUV (scale->y_scale1d.offsets[j]), + sizeof (gint16) * 4 * scale->dest->width, + scale->y_scale1d.n_taps, S16_POSTSHIFT, scale->dest->width * 4); + } else { + resample_vert_int16_generic (destline, + taps, TMP_LINE_S16_AYUV (scale->y_scale1d.offsets[j]), + sizeof (gint16) * 4 * scale->dest->width, + scale->y_scale1d.n_taps, S16_POSTSHIFT, scale->dest->width * 4); + } + } +} + +void +vs_image_scale_lanczos_AYUV_int16 (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, double a, + double sharpen) +{ + Scale s = { 0 }; + Scale *scale = &s; + int n_taps; + + scale->dest = dest; + scale->src = src; + + n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); + n_taps = ROUND_UP_4 (n_taps); + scale1d_calculate_taps_int16 (&scale->x_scale1d, + src->width, dest->width, n_taps, a, sharpness, sharpen, S16_SHIFT1); + + n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); + scale1d_calculate_taps_int16 (&scale->y_scale1d, + src->height, dest->height, n_taps, a, sharpness, sharpen, S16_SHIFT2); + + scale->dither = dither; + + switch (scale->x_scale1d.n_taps) { + case 4: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps4_shift0; + break; + case 8: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps8_shift0; + break; + case 12: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps12_shift0; + break; + case 16: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int16_int16_ayuv_taps16_shift0; + break; + default: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int16_int16_ayuv_generic; + break; + } + + scale->tmpdata = + g_malloc (sizeof (gint16) * scale->dest->width * scale->src->height * 4); + + vs_scale_lanczos_AYUV_int16 (scale); + + scale1d_cleanup (&scale->x_scale1d); + scale1d_cleanup (&scale->y_scale1d); + g_free (scale->tmpdata); +} + + +static void +vs_scale_lanczos_AYUV_int32 (Scale * scale) +{ + int j; + int yi; + int tmp_yi; + + tmp_yi = 0; + + for (j = 0; j < scale->dest->height; j++) { + guint8 *destline; + gint32 *taps; + + destline = scale->dest->pixels + scale->dest->stride * j; + + yi = scale->y_scale1d.offsets[j]; + + while (tmp_yi < yi + scale->y_scale1d.n_taps) { + scale->horiz_resample_func (TMP_LINE_S32_AYUV (tmp_yi), + scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), + scale->x_scale1d.n_taps, S32_MIDSHIFT, scale->dest->width); + tmp_yi++; + } + + taps = (gint32 *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; + if (scale->dither) { + resample_vert_dither_int32_generic (destline, + taps, TMP_LINE_S32_AYUV (scale->y_scale1d.offsets[j]), + sizeof (gint32) * 4 * scale->dest->width, scale->y_scale1d.n_taps, + S32_POSTSHIFT, scale->dest->width * 4); + } else { + resample_vert_int32_generic (destline, + taps, TMP_LINE_S32_AYUV (scale->y_scale1d.offsets[j]), + sizeof (gint32) * 4 * scale->dest->width, scale->y_scale1d.n_taps, + S32_POSTSHIFT, scale->dest->width * 4); + } + } +} + +void +vs_image_scale_lanczos_AYUV_int32 (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, double a, + double sharpen) +{ + Scale s = { 0 }; + Scale *scale = &s; + int n_taps; + + scale->dest = dest; + scale->src = src; + + n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); + n_taps = ROUND_UP_4 (n_taps); + scale1d_calculate_taps_int32 (&scale->x_scale1d, + src->width, dest->width, n_taps, a, sharpness, sharpen, S32_SHIFT1); + + n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); + scale1d_calculate_taps_int32 (&scale->y_scale1d, + src->height, dest->height, n_taps, a, sharpness, sharpen, S32_SHIFT2); + + scale->dither = dither; + + switch (scale->x_scale1d.n_taps) { + case 4: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps4_shift0; + break; + case 8: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps8_shift0; + break; + case 12: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps12_shift0; + break; + case 16: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int32_int32_ayuv_taps16_shift0; + break; + default: + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_int32_int32_ayuv_generic; + break; + } + + scale->tmpdata = + g_malloc (sizeof (int32_t) * scale->dest->width * scale->src->height * 4); + + vs_scale_lanczos_AYUV_int32 (scale); + + scale1d_cleanup (&scale->x_scale1d); + scale1d_cleanup (&scale->y_scale1d); + g_free (scale->tmpdata); +} + +static void +vs_scale_lanczos_AYUV_double (Scale * scale) +{ + int j; + int yi; + int tmp_yi; + + tmp_yi = 0; + + for (j = 0; j < scale->dest->height; j++) { + guint8 *destline; + double *taps; + + destline = scale->dest->pixels + scale->dest->stride * j; + + yi = scale->y_scale1d.offsets[j]; + + while (tmp_yi < yi + scale->y_scale1d.n_taps) { + scale->horiz_resample_func (TMP_LINE_DOUBLE_AYUV (tmp_yi), + scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), + scale->x_scale1d.n_taps, 0, scale->dest->width); + tmp_yi++; + } + + taps = (double *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; + if (scale->dither) { + resample_vert_dither_double_generic (destline, + taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]), + sizeof (double) * 4 * scale->dest->width, + scale->y_scale1d.n_taps, 0, scale->dest->width * 4); + } else { + resample_vert_double_generic (destline, + taps, TMP_LINE_DOUBLE_AYUV (scale->y_scale1d.offsets[j]), + sizeof (double) * 4 * scale->dest->width, + scale->y_scale1d.n_taps, 0, scale->dest->width * 4); + } + } +} + +void +vs_image_scale_lanczos_AYUV_double (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, double a, + double sharpen) +{ + Scale s = { 0 }; + Scale *scale = &s; + int n_taps; + + scale->dest = dest; + scale->src = src; + + n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); + scale1d_calculate_taps (&scale->x_scale1d, + src->width, dest->width, n_taps, a, sharpness, sharpen); + + n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); + scale1d_calculate_taps (&scale->y_scale1d, + src->height, dest->height, n_taps, a, sharpness, sharpen); + + scale->dither = dither; + + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_double_ayuv_generic; + + scale->tmpdata = + g_malloc (sizeof (double) * scale->dest->width * scale->src->height * 4); + + vs_scale_lanczos_AYUV_double (scale); + + scale1d_cleanup (&scale->x_scale1d); + scale1d_cleanup (&scale->y_scale1d); + g_free (scale->tmpdata); +} + +static void +vs_scale_lanczos_AYUV_float (Scale * scale) +{ + int j; + int yi; + int tmp_yi; + + tmp_yi = 0; + + for (j = 0; j < scale->dest->height; j++) { + guint8 *destline; + float *taps; + + destline = scale->dest->pixels + scale->dest->stride * j; + + yi = scale->y_scale1d.offsets[j]; + + while (tmp_yi < yi + scale->y_scale1d.n_taps) { + scale->horiz_resample_func (TMP_LINE_FLOAT_AYUV (tmp_yi), + scale->x_scale1d.offsets, scale->x_scale1d.taps, SRC_LINE (tmp_yi), + scale->x_scale1d.n_taps, 0, scale->dest->width); + tmp_yi++; + } + + taps = (float *) scale->y_scale1d.taps + j * scale->y_scale1d.n_taps; + if (scale->dither) { + resample_vert_dither_float_generic (destline, + taps, TMP_LINE_FLOAT_AYUV (scale->y_scale1d.offsets[j]), + sizeof (float) * 4 * scale->dest->width, scale->y_scale1d.n_taps, 0, + scale->dest->width * 4); + } else { + resample_vert_float_generic (destline, + taps, TMP_LINE_FLOAT_AYUV (scale->y_scale1d.offsets[j]), + sizeof (float) * 4 * scale->dest->width, scale->y_scale1d.n_taps, 0, + scale->dest->width * 4); + } + } +} + +void +vs_image_scale_lanczos_AYUV_float (const VSImage * dest, const VSImage * src, + uint8_t * tmpbuf, double sharpness, gboolean dither, double a, + double sharpen) +{ + Scale s = { 0 }; + Scale *scale = &s; + int n_taps; + + scale->dest = dest; + scale->src = src; + + n_taps = scale1d_get_n_taps (src->width, dest->width, a, sharpness); + scale1d_calculate_taps_float (&scale->x_scale1d, + src->width, dest->width, n_taps, a, sharpness, sharpen); + + n_taps = scale1d_get_n_taps (src->height, dest->height, a, sharpness); + scale1d_calculate_taps_float (&scale->y_scale1d, + src->height, dest->height, n_taps, a, sharpness, sharpen); + + scale->dither = dither; + + scale->horiz_resample_func = + (HorizResampleFunc) resample_horiz_float_ayuv_generic; + + scale->tmpdata = + g_malloc (sizeof (float) * scale->dest->width * scale->src->height * 4); + + vs_scale_lanczos_AYUV_float (scale); + + scale1d_cleanup (&scale->x_scale1d); + scale1d_cleanup (&scale->y_scale1d); + g_free (scale->tmpdata); +} -- 2.7.4