s/__aarch64__/VPX_ARCH_AARCH64/

author James Zern <jzern@google.com>

Wed, 3 May 2023 01:37:59 +0000 (18:37 -0700)

committer James Zern <jzern@google.com>

Wed, 3 May 2023 17:04:34 +0000 (10:04 -0700)
author James Zern <jzern@google.com>
Wed, 3 May 2023 01:37:59 +0000 (18:37 -0700)
committer James Zern <jzern@google.com>
Wed, 3 May 2023 17:04:34 +0000 (10:04 -0700)
diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.c b/vp8/encoder/arm/neon/fastquantizeb_neon.c

index 6fc6080..950c943 100644 (file)
--- a/vp8/encoder/arm/neon/fastquantizeb_neon.c
+++ b/vp8/encoder/arm/neon/fastquantizeb_neon.c
@@ -28,11 +28,11 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
                     zig_zag1 = vld1q_u16(inv_zig_zag + 8);
    int16x8_t x0, x1, sz0, sz1, y0, y1;
    uint16x8_t eob0, eob1;
-#ifndef __aarch64__
+#if !VPX_ARCH_AARCH64
    uint16x4_t eob_d16;
    uint32x2_t eob_d32;
    uint32x4_t eob_q32;
-#endif  // __arch64__
+#endif  // !VPX_ARCH_AARCH64
  
    /* sign of z: z >> 15 */
    sz0 = vshrq_n_s16(z0, 15);
@@ -70,7 +70,7 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
  
    /* select the largest value */
    eob0 = vmaxq_u16(eob0, eob1);
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
    *d->eob = (int8_t)vmaxvq_u16(eob0);
  #else
    eob_d16 = vmax_u16(vget_low_u16(eob0), vget_high_u16(eob0));
@@ -79,7 +79,7 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
    eob_d32 = vpmax_u32(eob_d32, eob_d32);
  
    vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0);
-#endif  // __aarch64__
+#endif  // VPX_ARCH_AARCH64
  
    /* qcoeff = x */
    vst1q_s16(d->qcoeff, x0);
diff --git a/vp9/encoder/arm/neon/vp9_denoiser_neon.c b/vp9/encoder/arm/neon/vp9_denoiser_neon.c

index 53e8c7e..d631cd4 100644 (file)
--- a/vp9/encoder/arm/neon/vp9_denoiser_neon.c
+++ b/vp9/encoder/arm/neon/vp9_denoiser_neon.c
@@ -21,7 +21,7 @@
  
  // Compute the sum of all pixel differences of this MB.
  static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddlvq_s8(v_sum_diff_total);
  #else
    const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff_total);
diff --git a/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c b/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c

index 255e6fb..b82b3f9 100644 (file)
--- a/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c
+++ b/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c
@@ -94,7 +94,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
    // Work out the start point for the search
    const uint8_t *best_address = in_what;
    const uint8_t *new_best_address = best_address;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    int64x2_t v_ba_q = vdupq_n_s64((intptr_t)best_address);
  #else
    int32x4_t v_ba_d = vdupq_n_s32((intptr_t)best_address);
@@ -117,7 +117,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
        int8x16_t v_inside_d;
        uint32x4_t v_outside_d;
        int32x4_t v_cost_d, v_sad_d;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
        int64x2_t v_blocka[2];
  #else
        int32x4_t v_blocka[1];
@@ -138,7 +138,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
                      vreinterpretq_s32_s16(v_these_mv_w)));
  
        // If none of them are inside, then move on
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
        horiz_max = vmaxvq_u32(vreinterpretq_u32_s8(v_inside_d));
  #else
        horiz_max_0 = vmax_u32(vget_low_u32(vreinterpretq_u32_s8(v_inside_d)),
@@ -167,7 +167,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
  
        // Compute the SIMD pointer offsets.
        {
-#if defined(__aarch64__)  //  sizeof(intptr_t) == 8
+#if VPX_ARCH_AARCH64  //  sizeof(intptr_t) == 8
          // Load the offsets
          int64x2_t v_bo10_q = vld1q_s64((const int64_t *)&ss_os[i + 0]);
          int64x2_t v_bo32_q = vld1q_s64((const int64_t *)&ss_os[i + 2]);
@@ -234,7 +234,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
        // Find the minimum value and index horizontally in v_sad_d
        {
          uint32_t local_best_sad;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
          local_best_sad = vminvq_u32(vreinterpretq_u32_s32(v_sad_d));
  #else
          uint32x2_t horiz_min_0 =
@@ -256,7 +256,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
            uint32x4_t v_mask_d = vandq_u32(v_sel_d, v_idx_d);
            v_mask_d = vbslq_u32(v_sel_d, v_mask_d, vdupq_n_u32(0xffffffff));
  
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
            local_best_idx = vminvq_u32(v_mask_d);
  #else
            horiz_min_0 =
@@ -280,7 +280,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
      best_address = new_best_address;
  
      v_bmv_w = vreinterpretq_s16_s32(vdupq_n_s32(bmv.as_int));
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
      v_ba_q = vdupq_n_s64((intptr_t)best_address);
  #else
      v_ba_d = vdupq_n_s32((intptr_t)best_address);
diff --git a/vp9/encoder/arm/neon/vp9_quantize_neon.c b/vp9/encoder/arm/neon/vp9_quantize_neon.c

index c2b55fc..97ab136 100644 (file)
--- a/vp9/encoder/arm/neon/vp9_quantize_neon.c
+++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c
@@ -50,7 +50,7 @@ static VPX_FORCE_INLINE int16x8_t get_max_lane_eob(const int16_t *iscan_ptr,
  }
  
  static VPX_FORCE_INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
    return (uint16_t)vmaxvq_s16(v_eobmax);
  #else
    const int16x4_t v_eobmax_3210 =
@@ -65,7 +65,7 @@ static VPX_FORCE_INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
        vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
  
    return (uint16_t)vget_lane_s16(v_eobmax_final, 0);
-#endif  // __aarch64__
+#endif  // VPX_ARCH_AARCH64
  }
  
  static VPX_FORCE_INLINE void load_fp_values(const int16_t *round_ptr,
@@ -81,7 +81,7 @@ static VPX_FORCE_INLINE void load_fp_values(const int16_t *round_ptr,
  static VPX_FORCE_INLINE void update_fp_values(int16x8_t *v_round,
                                                int16x8_t *v_quant,
                                                int16x8_t *v_dequant) {
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
    *v_round = vdupq_laneq_s16(*v_round, 1);
    *v_quant = vdupq_laneq_s16(*v_quant, 1);
    *v_dequant = vdupq_laneq_s16(*v_dequant, 1);
diff --git a/vpx_dsp/arm/avg_neon.c b/vpx_dsp/arm/avg_neon.c

index d48115d..8c61fc2 100644 (file)
--- a/vpx_dsp/arm/avg_neon.c
+++ b/vpx_dsp/arm/avg_neon.c
@@ -210,7 +210,7 @@ void vpx_minmax_8x8_neon(const uint8_t *a, int a_stride, const uint8_t *b,
    const uint8x16_t ab07_max = vmaxq_u8(ab0123_max, ab4567_max);
    const uint8x16_t ab07_min = vminq_u8(ab0123_min, ab4567_min);
  
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    *min = *max = 0;  // Clear high bits
    *((uint8_t *)max) = vmaxvq_u8(ab07_max);
    *((uint8_t *)min) = vminvq_u8(ab07_min);
diff --git a/vpx_dsp/arm/highbd_avg_neon.c b/vpx_dsp/arm/highbd_avg_neon.c

index fc10197..8939ee1 100644 (file)
--- a/vpx_dsp/arm/highbd_avg_neon.c
+++ b/vpx_dsp/arm/highbd_avg_neon.c
@@ -114,7 +114,7 @@ void vpx_highbd_minmax_8x8_neon(const uint8_t *a, int a_stride,
    const uint16x8_t min4567 = vminq_u16(min45, min67);
    const uint16x8_t min07 = vminq_u16(min0123, min4567);
  
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    *min = *max = 0;  // Clear high bits
    *((uint16_t *)max) = vmaxvq_u16(max07);
    *((uint16_t *)min) = vminvq_u16(min07);
diff --git a/vpx_dsp/arm/highbd_quantize_neon.c b/vpx_dsp/arm/highbd_quantize_neon.c

index 526447a..d2a7add 100644 (file)
--- a/vpx_dsp/arm/highbd_quantize_neon.c
+++ b/vpx_dsp/arm/highbd_quantize_neon.c
@@ -166,7 +166,7 @@ void vpx_highbd_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
      } while (n_coeffs > 0);
    }
  
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
    *eob_ptr = vmaxvq_u16(eob_max);
  #else
    {
@@ -176,7 +176,7 @@ void vpx_highbd_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
      const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
      vst1_lane_u16(eob_ptr, eob_max_2, 0);
    }
-#endif  // __aarch64__
+#endif  // VPX_ARCH_AARCH64
    // Need these here, else the compiler complains about mixing declarations and
    // code in C90
    (void)n_coeffs;
@@ -291,7 +291,7 @@ void vpx_highbd_quantize_b_32x32_neon(
      }
    }
  
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
    *eob_ptr = vmaxvq_u16(eob_max);
  #else
    {
@@ -301,5 +301,5 @@ void vpx_highbd_quantize_b_32x32_neon(
      const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
      vst1_lane_u16(eob_ptr, eob_max_2, 0);
    }
-#endif  // __aarch64__
+#endif  // VPX_ARCH_AARCH64
  }
diff --git a/vpx_dsp/arm/quantize_neon.c b/vpx_dsp/arm/quantize_neon.c

index cc8f623..35c67f6 100644 (file)
--- a/vpx_dsp/arm/quantize_neon.c
+++ b/vpx_dsp/arm/quantize_neon.c
@@ -134,7 +134,7 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
      } while (n_coeffs > 0);
    }
  
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
    *eob_ptr = vmaxvq_u16(eob_max);
  #else
    {
@@ -144,7 +144,7 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
      const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
      vst1_lane_u16(eob_ptr, eob_max_2, 0);
    }
-#endif  // __aarch64__
+#endif  // VPX_ARCH_AARCH64
    // Need these here, else the compiler complains about mixing declarations and
    // code in C90
    (void)scan;
@@ -276,7 +276,7 @@ void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr,
      }
    }
  
-#ifdef __aarch64__
+#if VPX_ARCH_AARCH64
    *eob_ptr = vmaxvq_u16(eob_max);
  #else
    {
@@ -286,5 +286,5 @@ void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr,
      const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
      vst1_lane_u16(eob_ptr, eob_max_2, 0);
    }
-#endif  // __aarch64__
+#endif  // VPX_ARCH_AARCH64
  }
diff --git a/vpx_dsp/arm/sum_neon.h b/vpx_dsp/arm/sum_neon.h

index a0c72f9..48a2fc0 100644 (file)
--- a/vpx_dsp/arm/sum_neon.h
+++ b/vpx_dsp/arm/sum_neon.h
@@ -17,7 +17,7 @@
  #include "vpx/vpx_integer.h"
  
  static INLINE uint16_t horizontal_add_uint8x4(const uint8x8_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddlv_u8(a);
  #else
    const uint16x4_t b = vpaddl_u8(a);
@@ -27,7 +27,7 @@ static INLINE uint16_t horizontal_add_uint8x4(const uint8x8_t a) {
  }
  
  static INLINE uint16_t horizontal_add_uint8x8(const uint8x8_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddlv_u8(a);
  #else
    const uint16x4_t b = vpaddl_u8(a);
@@ -38,7 +38,7 @@ static INLINE uint16_t horizontal_add_uint8x8(const uint8x8_t a) {
  }
  
  static INLINE uint16_t horizontal_add_uint8x16(const uint8x16_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddlvq_u8(a);
  #else
    const uint16x8_t b = vpaddlq_u8(a);
@@ -50,7 +50,7 @@ static INLINE uint16_t horizontal_add_uint8x16(const uint8x16_t a) {
  }
  
  static INLINE uint16_t horizontal_add_uint16x4(const uint16x4_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddv_u16(a);
  #else
    const uint16x4_t b = vpadd_u16(a, a);
@@ -60,7 +60,7 @@ static INLINE uint16_t horizontal_add_uint16x4(const uint16x4_t a) {
  }
  
  static INLINE int32_t horizontal_add_int16x8(const int16x8_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddlvq_s16(a);
  #else
    const int32x4_t b = vpaddlq_s16(a);
@@ -72,7 +72,7 @@ static INLINE int32_t horizontal_add_int16x8(const int16x8_t a) {
  }
  
  static INLINE uint32_t horizontal_add_uint16x8(const uint16x8_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddlvq_u16(a);
  #else
    const uint32x4_t b = vpaddlq_u16(a);
@@ -84,7 +84,7 @@ static INLINE uint32_t horizontal_add_uint16x8(const uint16x8_t a) {
  }
  
  static INLINE uint32x4_t horizontal_add_4d_uint16x8(const uint16x8_t sum[4]) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]);
    const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]);
    const uint16x8_t b0 = vpaddq_u16(a0, a1);
@@ -102,7 +102,7 @@ static INLINE uint32x4_t horizontal_add_4d_uint16x8(const uint16x8_t sum[4]) {
  
  static INLINE uint32_t horizontal_long_add_uint16x8(const uint16x8_t vec_lo,
                                                      const uint16x8_t vec_hi) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddlvq_u16(vec_lo) + vaddlvq_u16(vec_hi);
  #else
    const uint32x4_t vec_l_lo =
@@ -127,7 +127,7 @@ static INLINE uint32x4_t horizontal_long_add_4d_uint16x8(
    const uint32x4_t b1 = vpadalq_u16(a1, sum_hi[1]);
    const uint32x4_t b2 = vpadalq_u16(a2, sum_hi[2]);
    const uint32x4_t b3 = vpadalq_u16(a3, sum_hi[3]);
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    const uint32x4_t c0 = vpaddq_u32(b0, b1);
    const uint32x4_t c1 = vpaddq_u32(b2, b3);
    return vpaddq_u32(c0, c1);
@@ -143,7 +143,7 @@ static INLINE uint32x4_t horizontal_long_add_4d_uint16x8(
  }
  
  static INLINE int32_t horizontal_add_int32x2(const int32x2_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddv_s32(a);
  #else
    return vget_lane_s32(a, 0) + vget_lane_s32(a, 1);
@@ -151,7 +151,7 @@ static INLINE int32_t horizontal_add_int32x2(const int32x2_t a) {
  }
  
  static INLINE uint32_t horizontal_add_uint32x2(const uint32x2_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddv_u32(a);
  #else
    return vget_lane_u32(a, 0) + vget_lane_u32(a, 1);
@@ -159,7 +159,7 @@ static INLINE uint32_t horizontal_add_uint32x2(const uint32x2_t a) {
  }
  
  static INLINE int32_t horizontal_add_int32x4(const int32x4_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddvq_s32(a);
  #else
    const int64x2_t b = vpaddlq_s32(a);
@@ -170,7 +170,7 @@ static INLINE int32_t horizontal_add_int32x4(const int32x4_t a) {
  }
  
  static INLINE uint32_t horizontal_add_uint32x4(const uint32x4_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddvq_u32(a);
  #else
    const uint64x2_t b = vpaddlq_u32(a);
@@ -181,7 +181,7 @@ static INLINE uint32_t horizontal_add_uint32x4(const uint32x4_t a) {
  }
  
  static INLINE uint32x4_t horizontal_add_4d_uint32x4(const uint32x4_t sum[4]) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    uint32x4_t res01 = vpaddq_u32(sum[0], sum[1]);
    uint32x4_t res23 = vpaddq_u32(sum[2], sum[3]);
    return vpaddq_u32(res01, res23);
@@ -196,7 +196,7 @@ static INLINE uint32x4_t horizontal_add_4d_uint32x4(const uint32x4_t sum[4]) {
  }
  
  static INLINE uint64_t horizontal_long_add_uint32x4(const uint32x4_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddlvq_u32(a);
  #else
    const uint64x2_t b = vpaddlq_u32(a);
@@ -205,7 +205,7 @@ static INLINE uint64_t horizontal_long_add_uint32x4(const uint32x4_t a) {
  }
  
  static INLINE int64_t horizontal_add_int64x2(const int64x2_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddvq_s64(a);
  #else
    return vgetq_lane_s64(a, 0) + vgetq_lane_s64(a, 1);
@@ -213,7 +213,7 @@ static INLINE int64_t horizontal_add_int64x2(const int64x2_t a) {
  }
  
  static INLINE uint64_t horizontal_add_uint64x2(const uint64x2_t a) {
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    return vaddvq_u64(a);
  #else
    return vgetq_lane_u64(a, 0) + vgetq_lane_u64(a, 1);
diff --git a/vpx_dsp/arm/transpose_neon.h b/vpx_dsp/arm/transpose_neon.h

index 518278f..74f85a6 100644 (file)
--- a/vpx_dsp/arm/transpose_neon.h
+++ b/vpx_dsp/arm/transpose_neon.h
@@ -23,7 +23,7 @@
  // b0.val[1]: 04 05 06 07 20 21 22 23
  static INLINE int16x8x2_t vpx_vtrnq_s64_to_s16(int32x4_t a0, int32x4_t a1) {
    int16x8x2_t b0;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    b0.val[0] = vreinterpretq_s16_s64(
        vtrn1q_s64(vreinterpretq_s64_s32(a0), vreinterpretq_s64_s32(a1)));
    b0.val[1] = vreinterpretq_s16_s64(
@@ -39,7 +39,7 @@ static INLINE int16x8x2_t vpx_vtrnq_s64_to_s16(int32x4_t a0, int32x4_t a1) {
  
  static INLINE int32x4x2_t vpx_vtrnq_s64_to_s32(int32x4_t a0, int32x4_t a1) {
    int32x4x2_t b0;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    b0.val[0] = vreinterpretq_s32_s64(
        vtrn1q_s64(vreinterpretq_s64_s32(a0), vreinterpretq_s64_s32(a1)));
    b0.val[1] = vreinterpretq_s32_s64(
@@ -53,7 +53,7 @@ static INLINE int32x4x2_t vpx_vtrnq_s64_to_s32(int32x4_t a0, int32x4_t a1) {
  
  static INLINE int64x2x2_t vpx_vtrnq_s64(int32x4_t a0, int32x4_t a1) {
    int64x2x2_t b0;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    b0.val[0] = vtrn1q_s64(vreinterpretq_s64_s32(a0), vreinterpretq_s64_s32(a1));
    b0.val[1] = vtrn2q_s64(vreinterpretq_s64_s32(a0), vreinterpretq_s64_s32(a1));
  #else
@@ -67,7 +67,7 @@ static INLINE int64x2x2_t vpx_vtrnq_s64(int32x4_t a0, int32x4_t a1) {
  
  static INLINE uint8x16x2_t vpx_vtrnq_u64_to_u8(uint32x4_t a0, uint32x4_t a1) {
    uint8x16x2_t b0;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    b0.val[0] = vreinterpretq_u8_u64(
        vtrn1q_u64(vreinterpretq_u64_u32(a0), vreinterpretq_u64_u32(a1)));
    b0.val[1] = vreinterpretq_u8_u64(
@@ -83,7 +83,7 @@ static INLINE uint8x16x2_t vpx_vtrnq_u64_to_u8(uint32x4_t a0, uint32x4_t a1) {
  
  static INLINE uint16x8x2_t vpx_vtrnq_u64_to_u16(uint32x4_t a0, uint32x4_t a1) {
    uint16x8x2_t b0;
-#if defined(__aarch64__)
+#if VPX_ARCH_AARCH64
    b0.val[0] = vreinterpretq_u16_u64(
        vtrn1q_u64(vreinterpretq_u64_u32(a0), vreinterpretq_u64_u32(a1)));
    b0.val[1] = vreinterpretq_u16_u64(
diff --git a/vpx_dsp/arm/vpx_convolve8_neon.c b/vpx_dsp/arm/vpx_convolve8_neon.c

index b4cdd58..b312cc7 100644 (file)
--- a/vpx_dsp/arm/vpx_convolve8_neon.c
+++ b/vpx_dsp/arm/vpx_convolve8_neon.c
@@ -31,7 +31,7 @@
  // instructions. This optimization is much faster in speed unit test, but slowed
  // down the whole decoder by 5%.
  
-#if defined(__aarch64__) && \
+#if VPX_ARCH_AARCH64 && \
      (defined(__ARM_FEATURE_DOTPROD) || defined(__ARM_FEATURE_MATMUL_INT8))
  
  DECLARE_ALIGNED(16, static const uint8_t, dot_prod_permute_tbl[48]) = {
@@ -1261,7 +1261,7 @@ void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
  
  #endif  // defined(__ARM_FEATURE_MATMUL_INT8)
  
-#else  // !(defined(__aarch64__) &&
+#else  // !(VPX_ARCH_AARCH64 &&
         //   (defined(__ARM_FEATURE_DOTPROD) ||
         //    defined(__ARM_FEATURE_MATMUL_INT8)))
  
@@ -2105,6 +2105,6 @@ void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
    }
  }
  
-#endif  // #if defined(__aarch64__) &&
+#endif  // #if VPX_ARCH_AARCH64 &&
          //     (defined(__ARM_FEATURE_DOTPROD) ||
          //      defined(__ARM_FEATURE_MATMUL_INT8))
diff --git a/vpx_dsp/arm/vpx_convolve8_neon.h b/vpx_dsp/arm/vpx_convolve8_neon.h

index ed7f180..07cf824 100644 (file)
--- a/vpx_dsp/arm/vpx_convolve8_neon.h
+++ b/vpx_dsp/arm/vpx_convolve8_neon.h
@@ -16,7 +16,7 @@
  #include "./vpx_config.h"
  #include "./vpx_dsp_rtcd.h"
  
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if VPX_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
  
  static INLINE int32x4_t convolve8_4_sdot_partial(const int8x16_t samples_lo,
                                                   const int8x16_t samples_hi,
@@ -114,9 +114,9 @@ static INLINE uint8x8_t convolve8_8_sdot(uint8x16_t samples,
    return vqrshrun_n_s16(sum, 7);
  }
  
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif  // VPX_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
  
-#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if VPX_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
  
  static INLINE int32x4_t convolve8_4_usdot_partial(const uint8x16_t samples_lo,
                                                    const uint8x16_t samples_hi,
@@ -199,7 +199,7 @@ static INLINE uint8x8_t convolve8_8_usdot(uint8x16_t samples,
    return vqrshrun_n_s16(sum, 7);
  }
  
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#endif  // VPX_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
  
  static INLINE int16x4_t convolve8_4(const int16x4_t s0, const int16x4_t s1,
                                      const int16x4_t s2, const int16x4_t s3,
author	James Zern <jzern@google.com>
	Wed, 3 May 2023 01:37:59 +0000 (18:37 -0700)
committer	James Zern <jzern@google.com>
	Wed, 3 May 2023 17:04:34 +0000 (10:04 -0700)
vp8/encoder/arm/neon/fastquantizeb_neon.c		patch \| blob \| history
vp9/encoder/arm/neon/vp9_denoiser_neon.c		patch \| blob \| history
vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c		patch \| blob \| history
vp9/encoder/arm/neon/vp9_quantize_neon.c		patch \| blob \| history
vpx_dsp/arm/avg_neon.c		patch \| blob \| history
vpx_dsp/arm/highbd_avg_neon.c		patch \| blob \| history
vpx_dsp/arm/highbd_quantize_neon.c		patch \| blob \| history
vpx_dsp/arm/quantize_neon.c		patch \| blob \| history
vpx_dsp/arm/sum_neon.h		patch \| blob \| history
vpx_dsp/arm/transpose_neon.h		patch \| blob \| history
vpx_dsp/arm/vpx_convolve8_neon.c		patch \| blob \| history
vpx_dsp/arm/vpx_convolve8_neon.h		patch \| blob \| history