2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
13 #include "./vpx_config.h"
14 #include "./vp9_rtcd.h"
15 #include "vp9/common/vp9_common.h"
16 #include "vp9/common/vp9_convolve.h"
17 #include "vp9/common/vp9_filter.h"
18 #include "vpx/vpx_integer.h"
19 #include "vpx_ports/mem.h"
21 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
22 uint8_t *dst, ptrdiff_t dst_stride,
23 const int16_t *filter_x0, int x_step_q4,
24 const int16_t *filter_y, int y_step_q4,
25 int w, int h, int taps) {
28 /* NOTE: This assumes that the filter table is 256-byte aligned. */
29 /* TODO(agrange) Modify to make independent of table alignment. */
30 const int16_t *const filter_x_base =
31 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
33 /* Adjust base pointer address for this source line */
36 for (y = 0; y < h; ++y) {
37 /* Initial phase offset */
38 int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
40 for (x = 0; x < w; ++x) {
41 /* Per-pixel src offset */
42 const int src_x = x_q4 >> SUBPEL_BITS;
45 /* Pointer to filter to use */
46 const int16_t *const filter_x = filter_x_base +
47 (x_q4 & SUBPEL_MASK) * taps;
49 for (k = 0; k < taps; ++k)
50 sum += src[src_x + k] * filter_x[k];
52 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
54 /* Move to the next source pixel */
62 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
63 uint8_t *dst, ptrdiff_t dst_stride,
64 const int16_t *filter_x0, int x_step_q4,
65 const int16_t *filter_y, int y_step_q4,
66 int w, int h, int taps) {
69 /* NOTE: This assumes that the filter table is 256-byte aligned. */
70 /* TODO(agrange) Modify to make independent of table alignment. */
71 const int16_t *const filter_x_base =
72 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
74 /* Adjust base pointer address for this source line */
77 for (y = 0; y < h; ++y) {
78 /* Initial phase offset */
79 int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
81 for (x = 0; x < w; ++x) {
82 /* Per-pixel src offset */
83 const int src_x = x_q4 >> SUBPEL_BITS;
86 /* Pointer to filter to use */
87 const int16_t *const filter_x = filter_x_base +
88 (x_q4 & SUBPEL_MASK) * taps;
90 for (k = 0; k < taps; ++k)
91 sum += src[src_x + k] * filter_x[k];
93 dst[x] = ROUND_POWER_OF_TWO(dst[x] +
94 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
96 /* Move to the next source pixel */
104 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
105 uint8_t *dst, ptrdiff_t dst_stride,
106 const int16_t *filter_x, int x_step_q4,
107 const int16_t *filter_y0, int y_step_q4,
108 int w, int h, int taps) {
111 /* NOTE: This assumes that the filter table is 256-byte aligned. */
112 /* TODO(agrange) Modify to make independent of table alignment. */
113 const int16_t *const filter_y_base =
114 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
116 /* Adjust base pointer address for this source column */
117 src -= src_stride * (taps / 2 - 1);
119 for (x = 0; x < w; ++x) {
120 /* Initial phase offset */
121 int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
123 for (y = 0; y < h; ++y) {
124 /* Per-pixel src offset */
125 const int src_y = y_q4 >> SUBPEL_BITS;
128 /* Pointer to filter to use */
129 const int16_t *const filter_y = filter_y_base +
130 (y_q4 & SUBPEL_MASK) * taps;
132 for (k = 0; k < taps; ++k)
133 sum += src[(src_y + k) * src_stride] * filter_y[k];
135 dst[y * dst_stride] =
136 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
138 /* Move to the next source pixel */
146 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
147 uint8_t *dst, ptrdiff_t dst_stride,
148 const int16_t *filter_x, int x_step_q4,
149 const int16_t *filter_y0, int y_step_q4,
150 int w, int h, int taps) {
153 /* NOTE: This assumes that the filter table is 256-byte aligned. */
154 /* TODO(agrange) Modify to make independent of table alignment. */
155 const int16_t *const filter_y_base =
156 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
158 /* Adjust base pointer address for this source column */
159 src -= src_stride * (taps / 2 - 1);
161 for (x = 0; x < w; ++x) {
162 /* Initial phase offset */
163 int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
165 for (y = 0; y < h; ++y) {
166 /* Per-pixel src offset */
167 const int src_y = y_q4 >> SUBPEL_BITS;
170 /* Pointer to filter to use */
171 const int16_t *const filter_y = filter_y_base +
172 (y_q4 & SUBPEL_MASK) * taps;
174 for (k = 0; k < taps; ++k)
175 sum += src[(src_y + k) * src_stride] * filter_y[k];
177 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
178 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
180 /* Move to the next source pixel */
188 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
189 uint8_t *dst, ptrdiff_t dst_stride,
190 const int16_t *filter_x, int x_step_q4,
191 const int16_t *filter_y, int y_step_q4,
192 int w, int h, int taps) {
193 /* Fixed size intermediate buffer places limits on parameters.
194 * Maximum intermediate_height is 324, for y_step_q4 == 80,
195 * h == 64, taps == 8.
196 * y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
198 uint8_t temp[64 * 324];
199 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps;
204 assert(y_step_q4 <= 80);
205 assert(x_step_q4 <= 80);
207 if (intermediate_height < h)
208 intermediate_height = h;
210 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,
211 filter_x, x_step_q4, filter_y, y_step_q4, w,
212 intermediate_height, taps);
213 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,
214 x_step_q4, filter_y, y_step_q4, w, h, taps);
217 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
218 uint8_t *dst, ptrdiff_t dst_stride,
219 const int16_t *filter_x, int x_step_q4,
220 const int16_t *filter_y, int y_step_q4,
222 convolve_horiz_c(src, src_stride, dst, dst_stride,
223 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
226 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
227 uint8_t *dst, ptrdiff_t dst_stride,
228 const int16_t *filter_x, int x_step_q4,
229 const int16_t *filter_y, int y_step_q4,
231 convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
232 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
235 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
236 uint8_t *dst, ptrdiff_t dst_stride,
237 const int16_t *filter_x, int x_step_q4,
238 const int16_t *filter_y, int y_step_q4,
240 convolve_vert_c(src, src_stride, dst, dst_stride,
241 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
244 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
245 uint8_t *dst, ptrdiff_t dst_stride,
246 const int16_t *filter_x, int x_step_q4,
247 const int16_t *filter_y, int y_step_q4,
249 convolve_avg_vert_c(src, src_stride, dst, dst_stride,
250 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
253 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
254 uint8_t *dst, ptrdiff_t dst_stride,
255 const int16_t *filter_x, int x_step_q4,
256 const int16_t *filter_y, int y_step_q4,
258 convolve_c(src, src_stride, dst, dst_stride,
259 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
262 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
263 uint8_t *dst, ptrdiff_t dst_stride,
264 const int16_t *filter_x, int x_step_q4,
265 const int16_t *filter_y, int y_step_q4,
267 /* Fixed size intermediate buffer places limits on parameters. */
268 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
272 vp9_convolve8(src, src_stride, temp, 64,
273 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
274 vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
277 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
278 uint8_t *dst, ptrdiff_t dst_stride,
279 const int16_t *filter_x, int filter_x_stride,
280 const int16_t *filter_y, int filter_y_stride,
284 for (r = h; r > 0; --r) {
285 vpx_memcpy(dst, src, w);
291 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
292 uint8_t *dst, ptrdiff_t dst_stride,
293 const int16_t *filter_x, int filter_x_stride,
294 const int16_t *filter_y, int filter_y_stride,
298 for (y = 0; y < h; ++y) {
299 for (x = 0; x < w; ++x)
300 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);