"vadd.f32 d0, d0, d1 \n\t"
"vadd.f32 d2, d2, d3 \n\t"
"vadd.f32 d0, d0, d2 \n\t"
- "vmul.f32 d0, d0, %[quart] \n\t"
+ "vmul.f32 d0, d0, %P[quart] \n\t"
"vst1.32 {d0}, [%[dst]]! \n\t"
: [dst] "+r" (dst), [src] "+r" (src) /* output operands */
: [quart] "w" (quart) /* input operands */
for (; n >= 2; n -= 2) {
__asm__ __volatile__ (
"vld1.s16 d0, [%[src]]! \n\t"
- "vtbl.8 d0, {d0}, %[t] \n\t"
+ "vtbl.8 d0, {d0}, %P[t] \n\t"
"vst1.s16 d0, [%[dst]]! \n\t"
: [dst] "+r" (dst), [src] "+r" (src) /* output operands */
: [t] "w" (t) /* input operands */
if (n > 0) {
__asm__ __volatile__ (
"vld1.32 d0[0], [%[src]]! \n\t"
- "vtbl.8 d0, {d0}, %[t] \n\t"
+ "vtbl.8 d0, {d0}, %P[t] \n\t"
"vst1.32 d0[0], [%[dst]]! \n\t"
: [dst] "+r" (dst), [src] "+r" (src) /* output operands */
: [t] "w" (t) /* input operands */
for (; n > 0; n--) {
__asm__ __volatile__ (
"vld1.32 d0[0], [%[src]]! \n\t"
- "vtbl.8 d0, {d0}, %[t] \n\t"
- "vst1.s16 d0, [%[dst]]! \n\t"
+ "vtbl.8 d0, {d0}, %P[t] \n\t"
+ "vst1.s16 d0, [%[dst]]! \n\t"
: [dst] "+r" (dst), [src] "+r" (src) /* output operands */
: [t] "w" (t) /* input operands */
: "memory", "d0" /* clobber list */
for (; n > 0; n--) {
__asm__ __volatile__ (
"vld1.s16 d0, [%[src]]! \n\t"
- "vtbl.8 d0, {d0}, %[t] \n\t"
+ "vtbl.8 d0, {d0}, %P[t] \n\t"
"vst1.s16 d0, [%[dst]]! \n\t"
: [dst] "+r" (dst), [src] "+r" (src) /* output operands */
: [t] "w" (t) /* input operands */
for (; n > 0; n--) {
__asm__ __volatile__ (
"vld1.f32 d0, [%[src]]! \n\t"
- "vtbl.8 d0, {d0}, %[t] \n\t"
+ "vtbl.8 d0, {d0}, %P[t] \n\t"
"vst1.s16 {d0}, [%[dst]]! \n\t"
: [dst] "+r" (dst), [src] "+r" (src) /* output operands */
: [t] "w" (t) /* input operands */
for (; n > 0; n--) {
__asm__ __volatile__ (
"vld1.f32 d0, [%[src]]! \n\t"
- "vtbl.8 d1, {d0}, %[t0] \n\t"
- "vtbl.8 d2, {d0}, %[t1] \n\t"
+ "vtbl.8 d1, {d0}, %P[t0] \n\t"
+ "vtbl.8 d2, {d0}, %P[t1] \n\t"
"vst1.s16 {d1,d2}, [%[dst]]! \n\t"
: [dst] "+r" (dst), [src] "+r" (src) /* output operands */
: [t0] "w" (t0), [t1] "w" (t1) /* input operands */
for (; n > 0; n--) {
__asm__ __volatile__ (
"vld1.f32 {d0,d1}, [%[src]]! \n\t"
- "vtbl.8 d2, {d0,d1}, %[t0] \n\t"
- "vtbl.8 d3, {d0,d1}, %[t1] \n\t"
+ "vtbl.8 d2, {d0,d1}, %P[t0] \n\t"
+ "vtbl.8 d3, {d0,d1}, %P[t1] \n\t"
"vst1.s16 {d2,d3}, [%[dst]]! \n\t"
: [dst] "+r" (dst), [src] "+r" (src) /* output operands */
: [t0] "w" (t0), [t1] "w" (t1) /* input operands */