Upstream version 11.40.271.0
[platform/framework/web/crosswalk.git] / src / third_party / libyuv / source / rotate_neon64.cc
1 /*
2  *  Copyright 2014 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11 #include "libyuv/row.h"
12
13 #include "libyuv/basic_types.h"
14
15 #ifdef __cplusplus
16 namespace libyuv {
17 extern "C" {
18 #endif
19
20 // This module is for GCC Neon armv8 64 bit.
21 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
22
23 static uvec8 kVTbl4x4Transpose =
24   { 0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15 };
25
26 void TransposeWx8_NEON(const uint8* src, int src_stride,
27                        uint8* dst, int dst_stride,
28                        int width) {
29   const uint8* src_temp = NULL;
30   int64 width64 = (int64) width;  // Work around clang 3.4 warning.
31   asm volatile (
32     // loops are on blocks of 8. loop will stop when
33     // counter gets to or below 0. starting the counter
34     // at w-8 allow for this
35     "sub         %3, %3, #8                      \n"
36
37     // handle 8x8 blocks. this should be the majority of the plane
38     "1:                                          \n"
39       "mov         %0, %1                        \n"
40
41       MEMACCESS(0)
42       "ld1        {v0.8b}, [%0], %5              \n"
43       MEMACCESS(0)
44       "ld1        {v1.8b}, [%0], %5              \n"
45       MEMACCESS(0)
46       "ld1        {v2.8b}, [%0], %5              \n"
47       MEMACCESS(0)
48       "ld1        {v3.8b}, [%0], %5              \n"
49       MEMACCESS(0)
50       "ld1        {v4.8b}, [%0], %5              \n"
51       MEMACCESS(0)
52       "ld1        {v5.8b}, [%0], %5              \n"
53       MEMACCESS(0)
54       "ld1        {v6.8b}, [%0], %5              \n"
55       MEMACCESS(0)
56       "ld1        {v7.8b}, [%0]                  \n"
57
58       "trn2     v16.8b, v0.8b, v1.8b             \n"
59       "trn1     v17.8b, v0.8b, v1.8b             \n"
60       "trn2     v18.8b, v2.8b, v3.8b             \n"
61       "trn1     v19.8b, v2.8b, v3.8b             \n"
62       "trn2     v20.8b, v4.8b, v5.8b             \n"
63       "trn1     v21.8b, v4.8b, v5.8b             \n"
64       "trn2     v22.8b, v6.8b, v7.8b             \n"
65       "trn1     v23.8b, v6.8b, v7.8b             \n"
66
67       "trn2     v3.4h, v17.4h, v19.4h            \n"
68       "trn1     v1.4h, v17.4h, v19.4h            \n"
69       "trn2     v2.4h, v16.4h, v18.4h            \n"
70       "trn1     v0.4h, v16.4h, v18.4h            \n"
71       "trn2     v7.4h, v21.4h, v23.4h            \n"
72       "trn1     v5.4h, v21.4h, v23.4h            \n"
73       "trn2     v6.4h, v20.4h, v22.4h            \n"
74       "trn1     v4.4h, v20.4h, v22.4h            \n"
75
76       "trn2     v21.2s, v1.2s, v5.2s             \n"
77       "trn1     v17.2s, v1.2s, v5.2s             \n"
78       "trn2     v20.2s, v0.2s, v4.2s             \n"
79       "trn1     v16.2s, v0.2s, v4.2s             \n"
80       "trn2     v23.2s, v3.2s, v7.2s             \n"
81       "trn1     v19.2s, v3.2s, v7.2s             \n"
82       "trn2     v22.2s, v2.2s, v6.2s             \n"
83       "trn1     v18.2s, v2.2s, v6.2s             \n"
84
85       "mov         %0, %2                        \n"
86
87     MEMACCESS(0)
88       "st1      {v17.8b}, [%0], %6               \n"
89     MEMACCESS(0)
90       "st1      {v16.8b}, [%0], %6               \n"
91     MEMACCESS(0)
92       "st1      {v19.8b}, [%0], %6               \n"
93     MEMACCESS(0)
94       "st1      {v18.8b}, [%0], %6               \n"
95     MEMACCESS(0)
96       "st1      {v21.8b}, [%0], %6               \n"
97     MEMACCESS(0)
98       "st1      {v20.8b}, [%0], %6               \n"
99     MEMACCESS(0)
100       "st1      {v23.8b}, [%0], %6               \n"
101     MEMACCESS(0)
102       "st1      {v22.8b}, [%0]                   \n"
103
104       "add         %1, %1, #8                    \n"  // src += 8
105       "add         %2, %2, %6, lsl #3            \n"  // dst += 8 * dst_stride
106       "subs        %3, %3, #8                    \n"  // w   -= 8
107       "b.ge        1b                            \n"
108
109     // add 8 back to counter. if the result is 0 there are
110     // no residuals.
111     "adds        %3, %3, #8                      \n"
112     "b.eq        4f                              \n"
113
114     // some residual, so between 1 and 7 lines left to transpose
115     "cmp         %3, #2                          \n"
116     "b.lt        3f                              \n"
117
118     "cmp         %3, #4                          \n"
119     "b.lt        2f                              \n"
120
121     // 4x8 block
122     "mov         %0, %1                          \n"
123     MEMACCESS(0)
124     "ld1     {v0.s}[0], [%0], %5                 \n"
125     MEMACCESS(0)
126     "ld1     {v0.s}[1], [%0], %5                 \n"
127     MEMACCESS(0)
128     "ld1     {v0.s}[2], [%0], %5                 \n"
129     MEMACCESS(0)
130     "ld1     {v0.s}[3], [%0], %5                 \n"
131     MEMACCESS(0)
132     "ld1     {v1.s}[0], [%0], %5                 \n"
133     MEMACCESS(0)
134     "ld1     {v1.s}[1], [%0], %5                 \n"
135     MEMACCESS(0)
136     "ld1     {v1.s}[2], [%0], %5                 \n"
137     MEMACCESS(0)
138     "ld1     {v1.s}[3], [%0]                     \n"
139
140     "mov         %0, %2                          \n"
141
142     MEMACCESS(4)
143     "ld1      {v2.16b}, [%4]                     \n"
144
145     "tbl      v3.16b, {v0.16b}, v2.16b           \n"
146     "tbl      v0.16b, {v1.16b}, v2.16b           \n"
147
148     // TODO(frkoenig): Rework shuffle above to
149     // write out with 4 instead of 8 writes.
150     MEMACCESS(0)
151     "st1 {v3.s}[0], [%0], %6                     \n"
152     MEMACCESS(0)
153     "st1 {v3.s}[1], [%0], %6                     \n"
154     MEMACCESS(0)
155     "st1 {v3.s}[2], [%0], %6                     \n"
156     MEMACCESS(0)
157     "st1 {v3.s}[3], [%0]                         \n"
158
159     "add         %0, %2, #4                      \n"
160     MEMACCESS(0)
161     "st1 {v0.s}[0], [%0], %6                     \n"
162     MEMACCESS(0)
163     "st1 {v0.s}[1], [%0], %6                     \n"
164     MEMACCESS(0)
165     "st1 {v0.s}[2], [%0], %6                     \n"
166     MEMACCESS(0)
167     "st1 {v0.s}[3], [%0]                         \n"
168
169     "add         %1, %1, #4                      \n"  // src += 4
170     "add         %2, %2, %6, lsl #2              \n"  // dst += 4 * dst_stride
171     "subs        %3, %3, #4                      \n"  // w   -= 4
172     "b.eq        4f                              \n"
173
174     // some residual, check to see if it includes a 2x8 block,
175     // or less
176     "cmp         %3, #2                          \n"
177     "b.lt        3f                              \n"
178
179     // 2x8 block
180     "2:                                          \n"
181     "mov         %0, %1                          \n"
182     MEMACCESS(0)
183     "ld1     {v0.h}[0], [%0], %5                 \n"
184     MEMACCESS(0)
185     "ld1     {v1.h}[0], [%0], %5                 \n"
186     MEMACCESS(0)
187     "ld1     {v0.h}[1], [%0], %5                 \n"
188     MEMACCESS(0)
189     "ld1     {v1.h}[1], [%0], %5                 \n"
190     MEMACCESS(0)
191     "ld1     {v0.h}[2], [%0], %5                 \n"
192     MEMACCESS(0)
193     "ld1     {v1.h}[2], [%0], %5                 \n"
194     MEMACCESS(0)
195     "ld1     {v0.h}[3], [%0], %5                 \n"
196     MEMACCESS(0)
197     "ld1     {v1.h}[3], [%0]                     \n"
198
199     "trn2    v2.8b, v0.8b, v1.8b                 \n"
200     "trn1    v3.8b, v0.8b, v1.8b                 \n"
201
202     "mov         %0, %2                          \n"
203
204     MEMACCESS(0)
205     "st1     {v3.8b}, [%0], %6                   \n"
206     MEMACCESS(0)
207     "st1     {v2.8b}, [%0]                       \n"
208
209     "add         %1, %1, #2                      \n"  // src += 2
210     "add         %2, %2, %6, lsl #1              \n"  // dst += 2 * dst_stride
211     "subs        %3, %3,  #2                     \n"  // w   -= 2
212     "b.eq        4f                              \n"
213
214     // 1x8 block
215     "3:                                          \n"
216     MEMACCESS(1)
217     "ld1         {v0.b}[0], [%1], %5             \n"
218     MEMACCESS(1)
219     "ld1         {v0.b}[1], [%1], %5             \n"
220     MEMACCESS(1)
221     "ld1         {v0.b}[2], [%1], %5             \n"
222     MEMACCESS(1)
223     "ld1         {v0.b}[3], [%1], %5             \n"
224     MEMACCESS(1)
225     "ld1         {v0.b}[4], [%1], %5             \n"
226     MEMACCESS(1)
227     "ld1         {v0.b}[5], [%1], %5             \n"
228     MEMACCESS(1)
229     "ld1         {v0.b}[6], [%1], %5             \n"
230     MEMACCESS(1)
231     "ld1         {v0.b}[7], [%1]                 \n"
232
233     MEMACCESS(2)
234     "st1         {v0.8b}, [%2]                   \n"
235
236     "4:                                          \n"
237
238     : "+r"(src_temp),                             // %0
239       "+r"(src),                                  // %1
240       "+r"(dst),                                  // %2
241       "+r"(width64)                               // %3
242     : "r"(&kVTbl4x4Transpose),                    // %4
243       "r"(static_cast<ptrdiff_t>(src_stride)),    // %5
244       "r"(static_cast<ptrdiff_t>(dst_stride))     // %6
245     : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
246       "v17", "v18", "v19", "v20", "v21", "v22", "v23"
247   );
248 }
249
250 static uint8 kVTbl4x4TransposeDi[32] =
251   { 0,  16, 32, 48,  2, 18, 34, 50,  4, 20, 36, 52,  6, 22, 38, 54,
252     1,  17, 33, 49,  3, 19, 35, 51,  5, 21, 37, 53,  7, 23, 39, 55};
253
254 void TransposeUVWx8_NEON(const uint8* src, int src_stride,
255                          uint8* dst_a, int dst_stride_a,
256                          uint8* dst_b, int dst_stride_b,
257                          int width) {
258   const uint8* src_temp = NULL;
259   int64 width64 = (int64) width;  // Work around clang 3.4 warning.
260   asm volatile (
261     // loops are on blocks of 8. loop will stop when
262     // counter gets to or below 0. starting the counter
263     // at w-8 allow for this
264     "sub       %4, %4, #8                      \n"
265
266     // handle 8x8 blocks. this should be the majority of the plane
267     "1:                                        \n"
268     "mov       %0, %1                          \n"
269
270     MEMACCESS(0)
271     "ld1       {v0.16b}, [%0], %5              \n"
272     MEMACCESS(0)
273     "ld1       {v1.16b}, [%0], %5              \n"
274     MEMACCESS(0)
275     "ld1       {v2.16b}, [%0], %5              \n"
276     MEMACCESS(0)
277     "ld1       {v3.16b}, [%0], %5              \n"
278     MEMACCESS(0)
279     "ld1       {v4.16b}, [%0], %5              \n"
280     MEMACCESS(0)
281     "ld1       {v5.16b}, [%0], %5              \n"
282     MEMACCESS(0)
283     "ld1       {v6.16b}, [%0], %5              \n"
284     MEMACCESS(0)
285     "ld1       {v7.16b}, [%0]                  \n"
286
287     "trn1      v16.16b, v0.16b, v1.16b         \n"
288     "trn2      v17.16b, v0.16b, v1.16b         \n"
289     "trn1      v18.16b, v2.16b, v3.16b         \n"
290     "trn2      v19.16b, v2.16b, v3.16b         \n"
291     "trn1      v20.16b, v4.16b, v5.16b         \n"
292     "trn2      v21.16b, v4.16b, v5.16b         \n"
293     "trn1      v22.16b, v6.16b, v7.16b         \n"
294     "trn2      v23.16b, v6.16b, v7.16b         \n"
295
296     "trn1      v0.8h, v16.8h, v18.8h           \n"
297     "trn2      v1.8h, v16.8h, v18.8h           \n"
298     "trn1      v2.8h, v20.8h, v22.8h           \n"
299     "trn2      v3.8h, v20.8h, v22.8h           \n"
300     "trn1      v4.8h, v17.8h, v19.8h           \n"
301     "trn2      v5.8h, v17.8h, v19.8h           \n"
302     "trn1      v6.8h, v21.8h, v23.8h           \n"
303     "trn2      v7.8h, v21.8h, v23.8h           \n"
304
305     "trn1      v16.4s, v0.4s, v2.4s            \n"
306     "trn2      v17.4s, v0.4s, v2.4s            \n"
307     "trn1      v18.4s, v1.4s, v3.4s            \n"
308     "trn2      v19.4s, v1.4s, v3.4s            \n"
309     "trn1      v20.4s, v4.4s, v6.4s            \n"
310     "trn2      v21.4s, v4.4s, v6.4s            \n"
311     "trn1      v22.4s, v5.4s, v7.4s            \n"
312     "trn2      v23.4s, v5.4s, v7.4s            \n"
313
314     "mov       %0, %2                          \n"
315
316     MEMACCESS(0)
317     "st1       {v16.d}[0], [%0], %6            \n"
318     MEMACCESS(0)
319     "st1       {v18.d}[0], [%0], %6            \n"
320     MEMACCESS(0)
321     "st1       {v17.d}[0], [%0], %6            \n"
322     MEMACCESS(0)
323     "st1       {v19.d}[0], [%0], %6            \n"
324     MEMACCESS(0)
325     "st1       {v16.d}[1], [%0], %6            \n"
326     MEMACCESS(0)
327     "st1       {v18.d}[1], [%0], %6            \n"
328     MEMACCESS(0)
329     "st1       {v17.d}[1], [%0], %6            \n"
330     MEMACCESS(0)
331     "st1       {v19.d}[1], [%0]                \n"
332
333     "mov       %0, %3                          \n"
334
335     MEMACCESS(0)
336     "st1       {v20.d}[0], [%0], %7            \n"
337     MEMACCESS(0)
338     "st1       {v22.d}[0], [%0], %7            \n"
339     MEMACCESS(0)
340     "st1       {v21.d}[0], [%0], %7            \n"
341     MEMACCESS(0)
342     "st1       {v23.d}[0], [%0], %7            \n"
343     MEMACCESS(0)
344     "st1       {v20.d}[1], [%0], %7            \n"
345     MEMACCESS(0)
346     "st1       {v22.d}[1], [%0], %7            \n"
347     MEMACCESS(0)
348     "st1       {v21.d}[1], [%0], %7            \n"
349     MEMACCESS(0)
350     "st1       {v23.d}[1], [%0]                \n"
351
352     "add       %1, %1, #16                     \n"  // src   += 8*2
353     "add       %2, %2, %6, lsl #3              \n"  // dst_a += 8 * dst_stride_a
354     "add       %3, %3, %7, lsl #3              \n"  // dst_b += 8 * dst_stride_b
355     "subs      %4, %4,  #8                     \n"  // w     -= 8
356     "b.ge      1b                              \n"
357
358     // add 8 back to counter. if the result is 0 there are
359     // no residuals.
360     "adds      %4, %4, #8                      \n"
361     "b.eq      4f                              \n"
362
363     // some residual, so between 1 and 7 lines left to transpose
364     "cmp       %4, #2                          \n"
365     "b.lt      3f                              \n"
366
367     "cmp       %4, #4                          \n"
368     "b.lt      2f                              \n"
369
370     // TODO(frkoenig): Clean this up
371     // 4x8 block
372     "mov       %0, %1                          \n"
373     MEMACCESS(0)
374     "ld1       {v0.8b}, [%0], %5               \n"
375     MEMACCESS(0)
376     "ld1       {v1.8b}, [%0], %5               \n"
377     MEMACCESS(0)
378     "ld1       {v2.8b}, [%0], %5               \n"
379     MEMACCESS(0)
380     "ld1       {v3.8b}, [%0], %5               \n"
381     MEMACCESS(0)
382     "ld1       {v4.8b}, [%0], %5               \n"
383     MEMACCESS(0)
384     "ld1       {v5.8b}, [%0], %5               \n"
385     MEMACCESS(0)
386     "ld1       {v6.8b}, [%0], %5               \n"
387     MEMACCESS(0)
388     "ld1       {v7.8b}, [%0]                   \n"
389
390     MEMACCESS(8)
391     "ld1       {v30.16b}, [%8], #16            \n"
392     "ld1       {v31.16b}, [%8]                 \n"
393
394     "tbl       v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b  \n"
395     "tbl       v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b  \n"
396     "tbl       v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b  \n"
397     "tbl       v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b  \n"
398
399     "mov       %0, %2                          \n"
400
401     MEMACCESS(0)
402     "st1       {v16.s}[0],  [%0], %6           \n"
403     MEMACCESS(0)
404     "st1       {v16.s}[1],  [%0], %6           \n"
405     MEMACCESS(0)
406     "st1       {v16.s}[2],  [%0], %6           \n"
407     MEMACCESS(0)
408     "st1       {v16.s}[3],  [%0], %6           \n"
409
410     "add       %0, %2, #4                      \n"
411     MEMACCESS(0)
412     "st1       {v18.s}[0], [%0], %6            \n"
413     MEMACCESS(0)
414     "st1       {v18.s}[1], [%0], %6            \n"
415     MEMACCESS(0)
416     "st1       {v18.s}[2], [%0], %6            \n"
417     MEMACCESS(0)
418     "st1       {v18.s}[3], [%0]                \n"
419
420     "mov       %0, %3                          \n"
421
422     MEMACCESS(0)
423     "st1       {v17.s}[0], [%0], %7            \n"
424     MEMACCESS(0)
425     "st1       {v17.s}[1], [%0], %7            \n"
426     MEMACCESS(0)
427     "st1       {v17.s}[2], [%0], %7            \n"
428     MEMACCESS(0)
429     "st1       {v17.s}[3], [%0], %7            \n"
430
431     "add       %0, %3, #4                      \n"
432     MEMACCESS(0)
433     "st1       {v19.s}[0],  [%0], %7           \n"
434     MEMACCESS(0)
435     "st1       {v19.s}[1],  [%0], %7           \n"
436     MEMACCESS(0)
437     "st1       {v19.s}[2],  [%0], %7           \n"
438     MEMACCESS(0)
439     "st1       {v19.s}[3],  [%0]               \n"
440
441     "add       %1, %1, #8                      \n"  // src   += 4 * 2
442     "add       %2, %2, %6, lsl #2              \n"  // dst_a += 4 * dst_stride_a
443     "add       %3, %3, %7, lsl #2              \n"  // dst_b += 4 * dst_stride_b
444     "subs      %4,  %4,  #4                    \n"  // w     -= 4
445     "b.eq      4f                              \n"
446
447     // some residual, check to see if it includes a 2x8 block,
448     // or less
449     "cmp       %4, #2                          \n"
450     "b.lt      3f                              \n"
451
452     // 2x8 block
453     "2:                                        \n"
454     "mov       %0, %1                          \n"
455     MEMACCESS(0)
456     "ld2       {v0.h, v1.h}[0], [%0], %5       \n"
457     MEMACCESS(0)
458     "ld2       {v2.h, v3.h}[0], [%0], %5       \n"
459     MEMACCESS(0)
460     "ld2       {v0.h, v1.h}[1], [%0], %5       \n"
461     MEMACCESS(0)
462     "ld2       {v2.h, v3.h}[1], [%0], %5       \n"
463     MEMACCESS(0)
464     "ld2       {v0.h, v1.h}[2], [%0], %5       \n"
465     MEMACCESS(0)
466     "ld2       {v2.h, v3.h}[2], [%0], %5       \n"
467     MEMACCESS(0)
468     "ld2       {v0.h, v1.h}[3], [%0], %5       \n"
469     MEMACCESS(0)
470     "ld2       {v2.h, v3.h}[3], [%0]           \n"
471
472     "trn1      v4.8b, v0.8b, v2.8b             \n"
473     "trn2      v5.8b, v0.8b, v2.8b             \n"
474     "trn1      v6.8b, v1.8b, v3.8b             \n"
475     "trn2      v7.8b, v1.8b, v3.8b             \n"
476
477     "mov       %0, %2                          \n"
478
479     MEMACCESS(0)
480     "st1       {v4.d}[0], [%0], %6             \n"
481     MEMACCESS(0)
482     "st1       {v6.d}[0], [%0]                 \n"
483
484     "mov       %0, %3                          \n"
485
486     MEMACCESS(0)
487     "st1       {v5.d}[0], [%0], %7             \n"
488     MEMACCESS(0)
489     "st1       {v7.d}[0], [%0]                 \n"
490
491     "add       %1, %1, #4                      \n"  // src   += 2 * 2
492     "add       %2, %2, %6, lsl #1              \n"  // dst_a += 2 * dst_stride_a
493     "add       %3, %3, %7, lsl #1              \n"  // dst_b += 2 * dst_stride_b
494     "subs      %4,  %4,  #2                    \n"  // w     -= 2
495     "b.eq      4f                              \n"
496
497     // 1x8 block
498     "3:                                        \n"
499     MEMACCESS(1)
500     "ld2       {v0.b, v1.b}[0], [%1], %5       \n"
501     MEMACCESS(1)
502     "ld2       {v0.b, v1.b}[1], [%1], %5       \n"
503     MEMACCESS(1)
504     "ld2       {v0.b, v1.b}[2], [%1], %5       \n"
505     MEMACCESS(1)
506     "ld2       {v0.b, v1.b}[3], [%1], %5       \n"
507     MEMACCESS(1)
508     "ld2       {v0.b, v1.b}[4], [%1], %5       \n"
509     MEMACCESS(1)
510     "ld2       {v0.b, v1.b}[5], [%1], %5       \n"
511     MEMACCESS(1)
512     "ld2       {v0.b, v1.b}[6], [%1], %5       \n"
513     MEMACCESS(1)
514     "ld2       {v0.b, v1.b}[7], [%1]           \n"
515
516     MEMACCESS(2)
517     "st1       {v0.d}[0], [%2]                 \n"
518     MEMACCESS(3)
519     "st1       {v1.d}[0], [%3]                 \n"
520
521     "4:                                        \n"
522
523     : "+r"(src_temp),                             // %0
524       "+r"(src),                                  // %1
525       "+r"(dst_a),                                // %2
526       "+r"(dst_b),                                // %3
527       "+r"(width64)                               // %4
528     : "r"(static_cast<ptrdiff_t>(src_stride)),    // %5
529       "r"(static_cast<ptrdiff_t>(dst_stride_a)),  // %6
530       "r"(static_cast<ptrdiff_t>(dst_stride_b)),  // %7
531       "r"(&kVTbl4x4TransposeDi)                   // %8
532     : "memory", "cc",
533       "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
534       "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
535       "v30", "v31"
536   );
537 }
538 #endif  // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
539
540 #ifdef __cplusplus
541 }  // extern "C"
542 }  // namespace libyuv
543 #endif