From d341f843e2a5b03fd4a10ee83237c1e32a2b8671 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Wed, 22 Jul 2015 10:52:07 -0700 Subject: [PATCH] Refactor forward/inverse transform msa implementations This commit factors out common macro definitions from the forward and inverse transform implementations into vpx_dsp. It removes the duplicate macro definitions from encoder and decoder folders. Change-Id: I92301acbd3317075e9c5f03328a25abb123bca78 --- vp9/common/mips/msa/vp9_idct16x16_msa.c | 76 ++++++++--------- vp9/common/mips/msa/vp9_idct32x32_msa.c | 132 ++++++++++++++--------------- vp9/common/mips/msa/vp9_idct_msa.h | 145 ++++++++------------------------ vp9/encoder/mips/msa/vp9_fdct_msa.h | 79 +---------------- vpx_dsp/mips/txfm_macros_msa.h | 91 ++++++++++++++++++++ vpx_dsp/vpx_dsp.mk | 1 + 6 files changed, 231 insertions(+), 293 deletions(-) create mode 100644 vpx_dsp/mips/txfm_macros_msa.h diff --git a/vp9/common/mips/msa/vp9_idct16x16_msa.c b/vp9/common/mips/msa/vp9_idct16x16_msa.c index dd7ca35..d0ffdf6 100644 --- a/vp9/common/mips/msa/vp9_idct16x16_msa.c +++ b/vp9/common/mips/msa/vp9_idct16x16_msa.c @@ -25,12 +25,12 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) { reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); TRANSPOSE8x8_SH_SH(reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15); - VP9_DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14); - VP9_DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6); + DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14); + DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6); BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2); - VP9_DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3); - VP9_DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8); - VP9_DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12); + DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3); + DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8); + DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12); BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14); SUB4(reg2, loc1, reg14, loc0, reg6, loc3, reg10, loc2, reg0, reg12, reg4, reg8); @@ -38,16 +38,16 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) { reg10); /* stage 2 */ - VP9_DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15); - VP9_DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3); + DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15); + DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3); reg9 = reg1 - loc2; reg1 = reg1 + loc2; reg7 = reg15 - loc3; reg15 = reg15 + loc3; - VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11); - VP9_DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1); + DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11); + DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1); BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5); loc1 = reg15 + reg3; @@ -63,8 +63,8 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) { tmp7 = loc1; reg0 = loc2; - VP9_DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9); - VP9_DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11); + DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9); + DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11); loc0 = reg9 + reg5; reg5 = reg9 - reg5; @@ -77,13 +77,13 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) { loc2 = reg4 - loc0; tmp5 = loc1; - VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11); + DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11); BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1); reg10 = loc0; reg11 = loc1; - VP9_DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13); + DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13); BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5); reg13 = loc2; @@ -117,12 +117,12 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, /* load bottom 8x8 */ LD_SH8(input, 16, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15); - VP9_DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14); - VP9_DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6); + DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14); + DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6); BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2); - VP9_DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3); - VP9_DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8); - VP9_DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12); + DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3); + DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8); + DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12); BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14); reg0 = reg2 - loc1; @@ -135,16 +135,16 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, reg10 = reg10 + loc2; /* stage 2 */ - VP9_DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15); - VP9_DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3); + DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15); + DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3); reg9 = reg1 - loc2; reg1 = reg1 + loc2; reg7 = reg15 - loc3; reg15 = reg15 + loc3; - VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11); - VP9_DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1); + DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11); + DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1); BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5); loc1 = reg15 + reg3; @@ -160,8 +160,8 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, tmp7 = loc1; reg0 = loc2; - VP9_DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9); - VP9_DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11); + DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9); + DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11); loc0 = reg9 + reg5; reg5 = reg9 - reg5; @@ -174,13 +174,13 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, loc2 = reg4 - loc0; tmp5 = loc1; - VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11); + DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11); BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1); reg10 = loc0; reg11 = loc1; - VP9_DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13); + DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13); BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5); reg13 = loc2; @@ -350,17 +350,17 @@ static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, k1 = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64); k2 = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64); k3 = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64); - VP9_MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3); + MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3); k0 = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64); k1 = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64); k2 = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64); k3 = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64); - VP9_MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11); + MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11); BUTTERFLY_4(g0, g2, g10, g8, h8, h9, v2, v0); k0 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64); k1 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64); k2 = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64); - VP9_MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3); + MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3); r1 = LD_SH(input + 1 * 16); r2 = LD_SH(input + 2 * 16); @@ -375,12 +375,12 @@ static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, k1 = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64); k2 = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64); k3 = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64); - VP9_MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, g4, g5, g6, g7); + MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, g4, g5, g6, g7); k0 = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64); k1 = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64); k2 = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64); k3 = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64); - VP9_MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g12, g13, g14, g15); + MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g12, g13, g14, g15); BUTTERFLY_4(g4, g6, g14, g12, h10, h11, v6, v4); BUTTERFLY_4(h8, h9, h11, h10, out0, out1, h11, h10); out1 = -out1; @@ -397,7 +397,7 @@ static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, k0 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64); k1 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64); k2 = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64); - VP9_MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7); + MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7); BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10); out8 = -out8; @@ -414,7 +414,7 @@ static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, k0 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); k1 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); k2 = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64); - VP9_MADD_BF(v0, v2, v4, v6, k0, k1, k2, k0, out4, out6, out5, out7); + MADD_BF(v0, v2, v4, v6, k0, k1, k2, k0, out4, out6, out5, out7); out4 = -out4; SRARI_H2_SH(out4, out5, 6); dst4 = LD_UB(dst + 3 * dst_stride); @@ -426,7 +426,7 @@ static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, ST8x1_UB(res4, dst + 3 * dst_stride); ST8x1_UB(res5, dst + 12 * dst_stride); - VP9_MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15); + MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15); out13 = -out13; SRARI_H2_SH(out12, out13, 6); dst12 = LD_UB(dst + 2 * dst_stride); @@ -440,7 +440,7 @@ static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); k3 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64); - VP9_MADD_SHORT(out6, out7, k0, k3, out6, out7); + MADD_SHORT(out6, out7, k0, k3, out6, out7); SRARI_H2_SH(out6, out7, 6); dst6 = LD_UB(dst + 4 * dst_stride); dst7 = LD_UB(dst + 11 * dst_stride); @@ -451,7 +451,7 @@ static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, ST8x1_UB(res6, dst + 4 * dst_stride); ST8x1_UB(res7, dst + 11 * dst_stride); - VP9_MADD_SHORT(out10, out11, k0, k3, out10, out11); + MADD_SHORT(out10, out11, k0, k3, out10, out11); SRARI_H2_SH(out10, out11, 6); dst10 = LD_UB(dst + 6 * dst_stride); dst11 = LD_UB(dst + 9 * dst_stride); @@ -464,7 +464,7 @@ static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64); k2 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); - VP9_MADD_SHORT(h10, h11, k1, k2, out2, out3); + MADD_SHORT(h10, h11, k1, k2, out2, out3); SRARI_H2_SH(out2, out3, 6); dst2 = LD_UB(dst + 7 * dst_stride); dst3 = LD_UB(dst + 8 * dst_stride); @@ -475,7 +475,7 @@ static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, ST8x1_UB(res2, dst + 7 * dst_stride); ST8x1_UB(res3, dst + 8 * dst_stride); - VP9_MADD_SHORT(out14, out15, k1, k2, out14, out15); + MADD_SHORT(out14, out15, k1, k2, out14, out15); SRARI_H2_SH(out14, out15, 6); dst14 = LD_UB(dst + 5 * dst_stride); dst15 = LD_UB(dst + 10 * dst_stride); diff --git a/vp9/common/mips/msa/vp9_idct32x32_msa.c b/vp9/common/mips/msa/vp9_idct32x32_msa.c index 77d53a4..d9af1d0 100644 --- a/vp9/common/mips/msa/vp9_idct32x32_msa.c +++ b/vp9/common/mips/msa/vp9_idct32x32_msa.c @@ -47,26 +47,26 @@ static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf, /* Even stage 1 */ LD_SH8(tmp_buf, 32, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); - VP9_DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7); - VP9_DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3); + DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7); + DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3); BUTTERFLY_4(reg1, reg7, reg3, reg5, vec1, vec3, vec2, vec0); - VP9_DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3); + DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3); loc1 = vec3; loc0 = vec1; - VP9_DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4); - VP9_DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6); + DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4); + DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6); BUTTERFLY_4(reg4, reg0, reg2, reg6, vec1, vec3, vec2, vec0); BUTTERFLY_4(vec0, vec1, loc1, loc0, stp3, stp0, stp7, stp4); BUTTERFLY_4(vec2, vec3, loc3, loc2, stp2, stp1, stp6, stp5); /* Even stage 2 */ LD_SH8((tmp_buf + 16), 32, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); - VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7); - VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3); - VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5); - VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1); + DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7); + DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3); + DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5); + DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1); vec0 = reg0 + reg4; reg0 = reg0 - reg4; @@ -84,16 +84,16 @@ static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf, reg4 = reg5 - vec1; reg5 = reg5 + vec1; - VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7); - VP9_DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1); + DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7); + DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1); vec0 = reg0 - reg6; reg0 = reg0 + reg6; vec1 = reg7 - reg1; reg7 = reg7 + reg1; - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1); - VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4); + DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1); + DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4); /* Even stage 3 : Dependency on Even stage 1 & Even stage 2 */ BUTTERFLY_4(stp0, stp1, reg7, reg5, loc1, loc3, loc2, loc0); @@ -137,10 +137,10 @@ static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf, reg6 = LD_SH(tmp_buf + 25 * 8); reg7 = LD_SH(tmp_buf + 31 * 8); - VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7); - VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4); - VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5); - VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6); + DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7); + DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4); + DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5); + DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6); vec0 = reg0 + reg3; reg0 = reg0 - reg3; @@ -157,16 +157,16 @@ static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf, ST_SH2(vec0, vec1, (tmp_odd_buf + 4 * 8), 8); SUB2(reg5, reg4, reg3, reg2, vec0, vec1); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1); + DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1); ST_SH2(vec0, vec1, (tmp_odd_buf), 8); /* 4 Stores */ - VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7); - VP9_DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6); + DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7); + DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6); BUTTERFLY_4(reg0, reg7, reg6, reg1, vec0, vec1, vec2, vec3); ST_SH2(vec0, vec1, (tmp_odd_buf + 6 * 8), 8); - VP9_DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3); + DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3); ST_SH2(vec2, vec3, (tmp_odd_buf + 2 * 8), 8); /* Odd stage 2 */ @@ -180,21 +180,21 @@ static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf, reg6 = LD_SH(tmp_buf + 27 * 8); reg7 = LD_SH(tmp_buf + 29 * 8); - VP9_DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6); - VP9_DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5); - VP9_DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4); - VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7); + DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6); + DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5); + DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4); + DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7); /* 4 Stores */ SUB4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4, vec0, vec1, vec2, vec3); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1); - VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3); + DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1); + DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3); BUTTERFLY_4(loc3, loc2, loc0, loc1, vec1, vec0, vec2, vec3); ST_SH2(vec0, vec1, (tmp_odd_buf + 12 * 8), 3 * 8); - VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1); + DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1); ST_SH2(vec0, vec1, (tmp_odd_buf + 10 * 8), 8); /* 4 Stores */ @@ -204,7 +204,7 @@ static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf, ST_SH(reg0, (tmp_odd_buf + 13 * 8)); ST_SH(reg1, (tmp_odd_buf + 14 * 8)); - VP9_DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1); + DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1); ST_SH2(reg0, reg1, (tmp_odd_buf + 8 * 8), 8); /* Odd stage 3 : Dependency on Odd stage 1 & Odd stage 2 */ @@ -218,10 +218,10 @@ static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf, ST_SH4(loc0, loc1, loc2, loc3, tmp_odd_buf, 8); SUB2(reg0, reg4, reg1, reg5, vec0, vec1); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); + DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); SUB2(reg2, reg6, reg3, reg7, vec0, vec1); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); + DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 8 * 8), 8); /* Load 8 & Store 8 */ @@ -233,10 +233,10 @@ static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf, ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 4 * 8), 8); SUB2(reg0, reg4, reg3, reg7, vec0, vec1); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); + DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); SUB2(reg1, reg5, reg2, reg6, vec0, vec1); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); + DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8); } @@ -363,16 +363,16 @@ static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf, LD_SH8(tmp_buf, (4 * 32), reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); tmp_buf += (2 * 32); - VP9_DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7); - VP9_DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3); + DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7); + DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3); BUTTERFLY_4(reg1, reg7, reg3, reg5, vec1, vec3, vec2, vec0); - VP9_DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3); + DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3); loc1 = vec3; loc0 = vec1; - VP9_DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4); - VP9_DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6); + DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4); + DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6); BUTTERFLY_4(reg4, reg0, reg2, reg6, vec1, vec3, vec2, vec0); BUTTERFLY_4(vec0, vec1, loc1, loc0, stp3, stp0, stp7, stp4); BUTTERFLY_4(vec2, vec3, loc3, loc2, stp2, stp1, stp6, stp5); @@ -381,10 +381,10 @@ static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf, /* Load 8 */ LD_SH8(tmp_buf, (4 * 32), reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); - VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7); - VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3); - VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5); - VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1); + DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7); + DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3); + DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5); + DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1); vec0 = reg0 + reg4; reg0 = reg0 - reg4; @@ -402,16 +402,16 @@ static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf, reg4 = reg5 - vec1; reg5 = reg5 + vec1; - VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7); - VP9_DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1); + DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7); + DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1); vec0 = reg0 - reg6; reg0 = reg0 + reg6; vec1 = reg7 - reg1; reg7 = reg7 + reg1; - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1); - VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4); + DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1); + DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4); /* Even stage 3 : Dependency on Even stage 1 & Even stage 2 */ /* Store 8 */ @@ -448,10 +448,10 @@ static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf, reg6 = LD_SH(tmp_buf + 25 * 32); reg7 = LD_SH(tmp_buf + 31 * 32); - VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7); - VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4); - VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5); - VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6); + DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7); + DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4); + DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5); + DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6); vec0 = reg0 + reg3; reg0 = reg0 - reg3; @@ -467,15 +467,15 @@ static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf, ADD2(reg5, reg4, reg3, reg2, vec0, vec1); ST_SH2(vec0, vec1, (tmp_odd_buf + 4 * 8), 8); SUB2(reg5, reg4, reg3, reg2, vec0, vec1); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1); + DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1); ST_SH2(vec0, vec1, tmp_odd_buf, 8); /* 4 Stores */ - VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7); - VP9_DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6); + DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7); + DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6); BUTTERFLY_4(reg0, reg7, reg6, reg1, vec0, vec1, vec2, vec3); ST_SH2(vec0, vec1, (tmp_odd_buf + 6 * 8), 8); - VP9_DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3); + DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3); ST_SH2(vec2, vec3, (tmp_odd_buf + 2 * 8), 8); /* Odd stage 2 */ @@ -489,25 +489,25 @@ static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf, reg6 = LD_SH(tmp_buf + 27 * 32); reg7 = LD_SH(tmp_buf + 29 * 32); - VP9_DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6); - VP9_DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5); - VP9_DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4); - VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7); + DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6); + DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5); + DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4); + DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7); /* 4 Stores */ SUB4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4, vec0, vec1, vec2, vec3); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1); - VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3); + DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1); + DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3); BUTTERFLY_4(loc2, loc3, loc1, loc0, vec0, vec1, vec3, vec2); ST_SH2(vec0, vec1, (tmp_odd_buf + 12 * 8), 3 * 8); - VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1); + DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1); ST_SH2(vec0, vec1, (tmp_odd_buf + 10 * 8), 8); /* 4 Stores */ ADD4(reg0, reg3, reg1, reg2, reg5, reg6, reg4, reg7, vec0, vec1, vec2, vec3); BUTTERFLY_4(vec0, vec3, vec2, vec1, reg0, reg1, reg3, reg2); ST_SH2(reg0, reg1, (tmp_odd_buf + 13 * 8), 8); - VP9_DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1); + DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1); ST_SH2(reg0, reg1, (tmp_odd_buf + 8 * 8), 8); /* Odd stage 3 : Dependency on Odd stage 1 & Odd stage 2 */ @@ -519,10 +519,10 @@ static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf, ST_SH4(loc0, loc1, loc2, loc3, tmp_odd_buf, 8); SUB2(reg0, reg4, reg1, reg5, vec0, vec1); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); + DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); SUB2(reg2, reg6, reg3, reg7, vec0, vec1); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); + DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 8 * 8), 8); /* Load 8 & Store 8 */ @@ -533,10 +533,10 @@ static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf, ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 4 * 8), 8); SUB2(reg0, reg4, reg3, reg7, vec0, vec1); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); + DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); SUB2(reg1, reg5, reg2, reg6, vec0, vec1); - VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); + DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8); } diff --git a/vp9/common/mips/msa/vp9_idct_msa.h b/vp9/common/mips/msa/vp9_idct_msa.h index c86e65a..1d44aa9 100644 --- a/vp9/common/mips/msa/vp9_idct_msa.h +++ b/vp9/common/mips/msa/vp9_idct_msa.h @@ -14,52 +14,7 @@ #include "vpx_ports/mem.h" #include "vp9/common/vp9_idct.h" #include "vpx_dsp/mips/macros_msa.h" - -#define VP9_DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1) { \ - v8i16 k0_m = __msa_fill_h(cnst0); \ - v4i32 s0_m, s1_m, s2_m, s3_m; \ - \ - s0_m = (v4i32)__msa_fill_h(cnst1); \ - k0_m = __msa_ilvev_h((v8i16)s0_m, k0_m); \ - \ - ILVRL_H2_SW((-reg1), reg0, s1_m, s0_m); \ - ILVRL_H2_SW(reg0, reg1, s3_m, s2_m); \ - DOTP_SH2_SW(s1_m, s0_m, k0_m, k0_m, s1_m, s0_m); \ - SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \ - out0 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \ - \ - DOTP_SH2_SW(s3_m, s2_m, k0_m, k0_m, s1_m, s0_m); \ - SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \ - out1 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \ -} - -#define VP9_DOT_ADD_SUB_SRARI_PCK(in0, in1, in2, in3, in4, in5, in6, in7, \ - dst0, dst1, dst2, dst3) { \ - v4i32 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m; \ - v4i32 tp5_m, tp6_m, tp7_m, tp8_m, tp9_m; \ - \ - DOTP_SH4_SW(in0, in1, in0, in1, in4, in4, in5, in5, \ - tp0_m, tp2_m, tp3_m, tp4_m); \ - DOTP_SH4_SW(in2, in3, in2, in3, in6, in6, in7, in7, \ - tp5_m, tp6_m, tp7_m, tp8_m); \ - BUTTERFLY_4(tp0_m, tp3_m, tp7_m, tp5_m, tp1_m, tp9_m, tp7_m, tp5_m); \ - BUTTERFLY_4(tp2_m, tp4_m, tp8_m, tp6_m, tp3_m, tp0_m, tp4_m, tp2_m); \ - SRARI_W4_SW(tp1_m, tp9_m, tp7_m, tp5_m, DCT_CONST_BITS); \ - SRARI_W4_SW(tp3_m, tp0_m, tp4_m, tp2_m, DCT_CONST_BITS); \ - PCKEV_H4_SH(tp1_m, tp3_m, tp9_m, tp0_m, tp7_m, tp4_m, tp5_m, tp2_m, \ - dst0, dst1, dst2, dst3); \ -} - -#define VP9_DOT_SHIFT_RIGHT_PCK_H(in0, in1, in2) ({ \ - v8i16 dst_m; \ - v4i32 tp0_m, tp1_m; \ - \ - DOTP_SH2_SW(in0, in1, in2, in2, tp1_m, tp0_m); \ - SRARI_W2_SW(tp1_m, tp0_m, DCT_CONST_BITS); \ - dst_m = __msa_pckev_h((v8i16)tp1_m, (v8i16)tp0_m); \ - \ - dst_m; \ -}) +#include "vpx_dsp/mips/txfm_macros_msa.h" #define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, \ out0, out1, out2, out3, out4, out5, out6, out7) { \ @@ -79,9 +34,9 @@ \ ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \ ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \ - VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \ - cnst1_m, cnst2_m, cnst3_m, in7, in0, \ - in4, in3); \ + DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \ + cnst1_m, cnst2_m, cnst3_m, in7, in0, \ + in4, in3); \ \ SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \ cnst2_m = -cnst0_m; \ @@ -93,9 +48,9 @@ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \ \ - VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \ - cnst1_m, cnst2_m, cnst3_m, in5, in2, \ - in6, in1); \ + DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \ + cnst1_m, cnst2_m, cnst3_m, in5, in2, \ + in6, in1); \ BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \ out7 = -s0_m; \ out0 = s1_m; \ @@ -109,57 +64,25 @@ \ ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \ - VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \ - cnst2_m, cnst3_m, cnst1_m, out1, out6, \ - s0_m, s1_m); \ + DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \ + cnst2_m, cnst3_m, cnst1_m, out1, out6, \ + s0_m, s1_m); \ \ SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ \ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \ ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \ - out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ - out4 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ - out2 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \ - out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \ + out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ + out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ + out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \ + out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \ \ out1 = -out1; \ out3 = -out3; \ out5 = -out5; \ } -#define VP9_MADD_SHORT(m0, m1, c0, c1, res0, res1) { \ - v4i32 madd0_m, madd1_m, madd2_m, madd3_m; \ - v8i16 madd_s0_m, madd_s1_m; \ - \ - ILVRL_H2_SH(m1, m0, madd_s0_m, madd_s1_m); \ - DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s0_m, madd_s1_m, \ - c0, c0, c1, c1, madd0_m, madd1_m, madd2_m, madd3_m); \ - SRARI_W4_SW(madd0_m, madd1_m, madd2_m, madd3_m, DCT_CONST_BITS); \ - PCKEV_H2_SH(madd1_m, madd0_m, madd3_m, madd2_m, res0, res1); \ -} - -#define VP9_MADD_BF(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, \ - out0, out1, out2, out3) { \ - v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \ - v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m, m4_m, m5_m; \ - \ - ILVRL_H2_SH(inp1, inp0, madd_s0_m, madd_s1_m); \ - ILVRL_H2_SH(inp3, inp2, madd_s2_m, madd_s3_m); \ - DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, \ - cst0, cst0, cst2, cst2, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \ - BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, \ - m4_m, m5_m, tmp3_m, tmp2_m); \ - SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \ - PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1); \ - DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, \ - cst1, cst1, cst3, cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \ - BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, \ - m4_m, m5_m, tmp3_m, tmp2_m); \ - SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \ - PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3); \ -} - #define VP9_SET_COSPI_PAIR(c0_h, c1_h) ({ \ v8i16 out0_m, r0_m, r1_m; \ \ @@ -422,38 +345,38 @@ k1_m = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64); \ k3_m = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64); \ - VP9_MADD_BF(r15, r0, r7, r8, k0_m, k1_m, k2_m, k3_m, \ - g0_m, g1_m, g2_m, g3_m); \ + MADD_BF(r15, r0, r7, r8, k0_m, k1_m, k2_m, k3_m, \ + g0_m, g1_m, g2_m, g3_m); \ k0_m = VP9_SET_COSPI_PAIR(cospi_5_64, cospi_27_64); \ k1_m = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64); \ k3_m = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64); \ - VP9_MADD_BF(r13, r2, r5, r10, k0_m, k1_m, k2_m, k3_m, \ - g4_m, g5_m, g6_m, g7_m); \ + MADD_BF(r13, r2, r5, r10, k0_m, k1_m, k2_m, k3_m, \ + g4_m, g5_m, g6_m, g7_m); \ k0_m = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64); \ k1_m = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64); \ k3_m = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64); \ - VP9_MADD_BF(r11, r4, r3, r12, k0_m, k1_m, k2_m, k3_m, \ - g8_m, g9_m, g10_m, g11_m); \ + MADD_BF(r11, r4, r3, r12, k0_m, k1_m, k2_m, k3_m, \ + g8_m, g9_m, g10_m, g11_m); \ k0_m = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64); \ k1_m = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64); \ k3_m = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64); \ - VP9_MADD_BF(r9, r6, r1, r14, k0_m, k1_m, k2_m, k3_m, \ - g12_m, g13_m, g14_m, g15_m); \ + MADD_BF(r9, r6, r1, r14, k0_m, k1_m, k2_m, k3_m, \ + g12_m, g13_m, g14_m, g15_m); \ \ /* stage 2 */ \ k0_m = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64); \ k1_m = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64); \ k2_m = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64); \ - VP9_MADD_BF(g1_m, g3_m, g9_m, g11_m, k0_m, k1_m, k2_m, k0_m, \ - h0_m, h1_m, h2_m, h3_m); \ + MADD_BF(g1_m, g3_m, g9_m, g11_m, k0_m, k1_m, k2_m, k0_m, \ + h0_m, h1_m, h2_m, h3_m); \ k0_m = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64); \ k1_m = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64); \ - VP9_MADD_BF(g7_m, g5_m, g15_m, g13_m, k0_m, k1_m, k2_m, k0_m, \ - h4_m, h5_m, h6_m, h7_m); \ + MADD_BF(g7_m, g5_m, g15_m, g13_m, k0_m, k1_m, k2_m, k0_m, \ + h4_m, h5_m, h6_m, h7_m); \ BUTTERFLY_4(h0_m, h2_m, h6_m, h4_m, out8, out9, out11, out10); \ BUTTERFLY_8(g0_m, g2_m, g4_m, g6_m, g14_m, g12_m, g10_m, g8_m, \ h8_m, h9_m, h10_m, h11_m, h6_m, h4_m, h2_m, h0_m); \ @@ -463,19 +386,19 @@ k0_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \ k1_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \ k2_m = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64); \ - VP9_MADD_BF(h0_m, h2_m, h4_m, h6_m, k0_m, k1_m, k2_m, k0_m, \ - out4, out6, out5, out7); \ - VP9_MADD_BF(h1_m, h3_m, h5_m, h7_m, k0_m, k1_m, k2_m, k0_m, \ - out12, out14, out13, out15); \ + MADD_BF(h0_m, h2_m, h4_m, h6_m, k0_m, k1_m, k2_m, k0_m, \ + out4, out6, out5, out7); \ + MADD_BF(h1_m, h3_m, h5_m, h7_m, k0_m, k1_m, k2_m, k0_m, \ + out12, out14, out13, out15); \ \ /* stage 4 */ \ k0_m = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \ k1_m = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \ k3_m = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64); \ - VP9_MADD_SHORT(h10_m, h11_m, k1_m, k2_m, out2, out3); \ - VP9_MADD_SHORT(out6, out7, k0_m, k3_m, out6, out7); \ - VP9_MADD_SHORT(out10, out11, k0_m, k3_m, out10, out11); \ - VP9_MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15); \ + MADD_SHORT(h10_m, h11_m, k1_m, k2_m, out2, out3); \ + MADD_SHORT(out6, out7, k0_m, k3_m, out6, out7); \ + MADD_SHORT(out10, out11, k0_m, k3_m, out10, out11); \ + MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15); \ } #endif /* VP9_COMMON_MIPS_MSA_VP9_IDCT_MSA_H_ */ diff --git a/vp9/encoder/mips/msa/vp9_fdct_msa.h b/vp9/encoder/mips/msa/vp9_fdct_msa.h index 8480008..da7565b 100644 --- a/vp9/encoder/mips/msa/vp9_fdct_msa.h +++ b/vp9/encoder/mips/msa/vp9_fdct_msa.h @@ -14,52 +14,7 @@ #include "vpx_ports/mem.h" #include "vp9/common/vp9_idct.h" #include "vpx_dsp/mips/macros_msa.h" - -#define DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1) { \ - v8i16 k0_m = __msa_fill_h(cnst0); \ - v4i32 s0_m, s1_m, s2_m, s3_m; \ - \ - s0_m = (v4i32)__msa_fill_h(cnst1); \ - k0_m = __msa_ilvev_h((v8i16)s0_m, k0_m); \ - \ - ILVRL_H2_SW((-reg1), reg0, s1_m, s0_m); \ - ILVRL_H2_SW(reg0, reg1, s3_m, s2_m); \ - DOTP_SH2_SW(s1_m, s0_m, k0_m, k0_m, s1_m, s0_m); \ - SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \ - out0 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \ - \ - DOTP_SH2_SW(s3_m, s2_m, k0_m, k0_m, s1_m, s0_m); \ - SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \ - out1 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \ -} - -#define DOT_ADD_SUB_SRARI_PCK(in0, in1, in2, in3, in4, in5, in6, in7, \ - dst0, dst1, dst2, dst3) { \ - v4i32 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m; \ - v4i32 tp5_m, tp6_m, tp7_m, tp8_m, tp9_m; \ - \ - DOTP_SH4_SW(in0, in1, in0, in1, in4, in4, in5, in5, \ - tp0_m, tp2_m, tp3_m, tp4_m); \ - DOTP_SH4_SW(in2, in3, in2, in3, in6, in6, in7, in7, \ - tp5_m, tp6_m, tp7_m, tp8_m); \ - BUTTERFLY_4(tp0_m, tp3_m, tp7_m, tp5_m, tp1_m, tp9_m, tp7_m, tp5_m); \ - BUTTERFLY_4(tp2_m, tp4_m, tp8_m, tp6_m, tp3_m, tp0_m, tp4_m, tp2_m); \ - SRARI_W4_SW(tp1_m, tp9_m, tp7_m, tp5_m, DCT_CONST_BITS); \ - SRARI_W4_SW(tp3_m, tp0_m, tp4_m, tp2_m, DCT_CONST_BITS); \ - PCKEV_H4_SH(tp1_m, tp3_m, tp9_m, tp0_m, tp7_m, tp4_m, tp5_m, tp2_m, \ - dst0, dst1, dst2, dst3); \ -} - -#define DOT_SHIFT_RIGHT_PCK_H(in0, in1, in2) ({ \ - v8i16 dst_m; \ - v4i32 tp0_m, tp1_m; \ - \ - DOTP_SH2_SW(in0, in1, in2, in2, tp1_m, tp0_m); \ - SRARI_W2_SW(tp1_m, tp0_m, DCT_CONST_BITS); \ - dst_m = __msa_pckev_h((v8i16)tp1_m, (v8i16)tp0_m); \ - \ - dst_m; \ -}) +#include "vpx_dsp/mips/txfm_macros_msa.h" #define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, \ out0, out1, out2, out3, out4, out5, out6, out7) { \ @@ -127,38 +82,6 @@ out5 = -out5; \ } -#define MADD_SHORT(m0, m1, c0, c1, res0, res1) { \ - v4i32 madd0_m, madd1_m, madd2_m, madd3_m; \ - v8i16 madd_s0_m, madd_s1_m; \ - \ - ILVRL_H2_SH(m1, m0, madd_s0_m, madd_s1_m); \ - DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s0_m, madd_s1_m, \ - c0, c0, c1, c1, madd0_m, madd1_m, madd2_m, madd3_m); \ - SRARI_W4_SW(madd0_m, madd1_m, madd2_m, madd3_m, DCT_CONST_BITS); \ - PCKEV_H2_SH(madd1_m, madd0_m, madd3_m, madd2_m, res0, res1); \ -} - -#define MADD_BF(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, \ - out0, out1, out2, out3) { \ - v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \ - v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m, m4_m, m5_m; \ - \ - ILVRL_H2_SH(inp1, inp0, madd_s0_m, madd_s1_m); \ - ILVRL_H2_SH(inp3, inp2, madd_s2_m, madd_s3_m); \ - DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, \ - cst0, cst0, cst2, cst2, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \ - BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, \ - m4_m, m5_m, tmp3_m, tmp2_m); \ - SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \ - PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1); \ - DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, \ - cst1, cst1, cst3, cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \ - BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, \ - m4_m, m5_m, tmp3_m, tmp2_m); \ - SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \ - PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3); \ -} - #define LD_HADD(psrc, stride) ({ \ v8i16 in0_m, in1_m, in2_m, in3_m, in4_m, in5_m, in6_m, in7_m; \ v4i32 vec_w_m; \ diff --git a/vpx_dsp/mips/txfm_macros_msa.h b/vpx_dsp/mips/txfm_macros_msa.h new file mode 100644 index 0000000..9975f0c --- /dev/null +++ b/vpx_dsp/mips/txfm_macros_msa.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_ +#define VPX_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_ + +#define DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1) { \ + v8i16 k0_m = __msa_fill_h(cnst0); \ + v4i32 s0_m, s1_m, s2_m, s3_m; \ + \ + s0_m = (v4i32)__msa_fill_h(cnst1); \ + k0_m = __msa_ilvev_h((v8i16)s0_m, k0_m); \ + \ + ILVRL_H2_SW((-reg1), reg0, s1_m, s0_m); \ + ILVRL_H2_SW(reg0, reg1, s3_m, s2_m); \ + DOTP_SH2_SW(s1_m, s0_m, k0_m, k0_m, s1_m, s0_m); \ + SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \ + out0 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \ + \ + DOTP_SH2_SW(s3_m, s2_m, k0_m, k0_m, s1_m, s0_m); \ + SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \ + out1 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \ +} + +#define DOT_ADD_SUB_SRARI_PCK(in0, in1, in2, in3, in4, in5, in6, in7, \ + dst0, dst1, dst2, dst3) { \ + v4i32 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m; \ + v4i32 tp5_m, tp6_m, tp7_m, tp8_m, tp9_m; \ + \ + DOTP_SH4_SW(in0, in1, in0, in1, in4, in4, in5, in5, \ + tp0_m, tp2_m, tp3_m, tp4_m); \ + DOTP_SH4_SW(in2, in3, in2, in3, in6, in6, in7, in7, \ + tp5_m, tp6_m, tp7_m, tp8_m); \ + BUTTERFLY_4(tp0_m, tp3_m, tp7_m, tp5_m, tp1_m, tp9_m, tp7_m, tp5_m); \ + BUTTERFLY_4(tp2_m, tp4_m, tp8_m, tp6_m, tp3_m, tp0_m, tp4_m, tp2_m); \ + SRARI_W4_SW(tp1_m, tp9_m, tp7_m, tp5_m, DCT_CONST_BITS); \ + SRARI_W4_SW(tp3_m, tp0_m, tp4_m, tp2_m, DCT_CONST_BITS); \ + PCKEV_H4_SH(tp1_m, tp3_m, tp9_m, tp0_m, tp7_m, tp4_m, tp5_m, tp2_m, \ + dst0, dst1, dst2, dst3); \ +} + +#define DOT_SHIFT_RIGHT_PCK_H(in0, in1, in2) ({ \ + v8i16 dst_m; \ + v4i32 tp0_m, tp1_m; \ + \ + DOTP_SH2_SW(in0, in1, in2, in2, tp1_m, tp0_m); \ + SRARI_W2_SW(tp1_m, tp0_m, DCT_CONST_BITS); \ + dst_m = __msa_pckev_h((v8i16)tp1_m, (v8i16)tp0_m); \ + \ + dst_m; \ +}) + +#define MADD_SHORT(m0, m1, c0, c1, res0, res1) { \ + v4i32 madd0_m, madd1_m, madd2_m, madd3_m; \ + v8i16 madd_s0_m, madd_s1_m; \ + \ + ILVRL_H2_SH(m1, m0, madd_s0_m, madd_s1_m); \ + DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s0_m, madd_s1_m, \ + c0, c0, c1, c1, madd0_m, madd1_m, madd2_m, madd3_m); \ + SRARI_W4_SW(madd0_m, madd1_m, madd2_m, madd3_m, DCT_CONST_BITS); \ + PCKEV_H2_SH(madd1_m, madd0_m, madd3_m, madd2_m, res0, res1); \ +} + +#define MADD_BF(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, \ + out0, out1, out2, out3) { \ + v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \ + v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m, m4_m, m5_m; \ + \ + ILVRL_H2_SH(inp1, inp0, madd_s0_m, madd_s1_m); \ + ILVRL_H2_SH(inp3, inp2, madd_s2_m, madd_s3_m); \ + DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, \ + cst0, cst0, cst2, cst2, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \ + BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, \ + m4_m, m5_m, tmp3_m, tmp2_m); \ + SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \ + PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1); \ + DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, \ + cst1, cst1, cst3, cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \ + BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, \ + m4_m, m5_m, tmp3_m, tmp2_m); \ + SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \ + PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3); \ +} +#endif // VPX_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_ diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk index 07f2d6a..560b470 100644 --- a/vpx_dsp/vpx_dsp.mk +++ b/vpx_dsp/vpx_dsp.mk @@ -72,6 +72,7 @@ DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3.asm endif DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c endif # CONFIG_VP9_ENCODER +DSP_SRCS-$(HAVE_MSA) += mips/txfm_macros_msa.h # quantization ifeq ($(CONFIG_VP9_ENCODER),yes) -- 2.7.4