void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
int /*tx_type*/) {
- vp9_idct16x16_256_add_c(in, dest, stride);
+ vpx_idct16x16_256_add_c(in, dest, stride);
}
void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
#if CONFIG_VP9_HIGHBITDEPTH
void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct16x16_256_add_c(in, out, stride, 10);
+ vpx_highbd_idct16x16_256_add_c(in, out, stride, 10);
}
void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct16x16_256_add_c(in, out, stride, 12);
+ vpx_highbd_idct16x16_256_add_c(in, out, stride, 12);
}
void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
}
void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct16x16_10_add_c(in, out, stride, 10);
+ vpx_highbd_idct16x16_10_add_c(in, out, stride, 10);
}
void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct16x16_10_add_c(in, out, stride, 12);
+ vpx_highbd_idct16x16_10_add_c(in, out, stride, 12);
}
#if HAVE_SSE2
void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
+ vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
}
void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
+ vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
}
void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
+ vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
}
void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
+ vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
}
#endif // HAVE_SSE2
#endif // CONFIG_VP9_HIGHBITDEPTH
::testing::Values(
make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
- make_tuple(&vpx_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
#else
INSTANTIATE_TEST_CASE_P(
C, Trans16x16DCT,
::testing::Values(
- make_tuple(&vpx_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
NEON, Trans16x16DCT,
::testing::Values(
make_tuple(&vpx_fdct16x16_c,
- &vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));
+ &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
SSE2, Trans16x16DCT,
::testing::Values(
make_tuple(&vpx_fdct16x16_sse2,
- &vp9_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
+ &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16HT,
::testing::Values(
make_tuple(&vpx_highbd_fdct16x16_c,
&idct16x16_256_add_12_sse2, 0, VPX_BITS_12),
make_tuple(&vpx_fdct16x16_sse2,
- &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+ &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16HT,
::testing::Values(
MSA, Trans16x16DCT,
::testing::Values(
make_tuple(&vpx_fdct16x16_msa,
- &vp9_idct16x16_256_add_msa, 0, VPX_BITS_8)));
+ &vpx_idct16x16_256_add_msa, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
MSA, Trans16x16HT,
::testing::Values(
#if CONFIG_VP9_HIGHBITDEPTH
void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct32x32_1024_add_c(in, out, stride, 8);
+ vpx_highbd_idct32x32_1024_add_c(in, out, stride, 8);
}
void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct32x32_1024_add_c(in, out, stride, 10);
+ vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
}
void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct32x32_1024_add_c(in, out, stride, 12);
+ vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
make_tuple(&vpx_highbd_fdct32x32_rd_c,
&idct32x32_12, 1, VPX_BITS_12),
make_tuple(&vpx_fdct32x32_c,
- &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
+ &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_c,
- &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+ &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
#else
INSTANTIATE_TEST_CASE_P(
C, Trans32x32Test,
::testing::Values(
make_tuple(&vpx_fdct32x32_c,
- &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
+ &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_c,
- &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+ &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
NEON, Trans32x32Test,
::testing::Values(
make_tuple(&vpx_fdct32x32_c,
- &vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8),
+ &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_c,
- &vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
+ &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
#endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
SSE2, Trans32x32Test,
::testing::Values(
make_tuple(&vpx_fdct32x32_sse2,
- &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
+ &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_sse2,
- &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+ &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
VPX_BITS_12),
- make_tuple(&vpx_fdct32x32_sse2, &vp9_idct32x32_1024_add_c, 0,
+ make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
VPX_BITS_8),
- make_tuple(&vpx_fdct32x32_rd_sse2, &vp9_idct32x32_1024_add_c, 1,
+ make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
VPX_BITS_8)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
AVX2, Trans32x32Test,
::testing::Values(
make_tuple(&vpx_fdct32x32_avx2,
- &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
+ &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_avx2,
- &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+ &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
MSA, Trans32x32Test,
::testing::Values(
make_tuple(&vpx_fdct32x32_msa,
- &vp9_idct32x32_1024_add_msa, 0, VPX_BITS_8),
+ &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_msa,
- &vp9_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
+ &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace
#if CONFIG_VP9_HIGHBITDEPTH
void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct4x4_16_add_c(in, out, stride, 10);
+ vpx_highbd_idct4x4_16_add_c(in, out, stride, 10);
}
void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct4x4_16_add_c(in, out, stride, 12);
+ vpx_highbd_idct4x4_16_add_c(in, out, stride, 12);
}
void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
}
void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_iwht4x4_16_add_c(in, out, stride, 10);
+ vpx_highbd_iwht4x4_16_add_c(in, out, stride, 10);
}
void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_iwht4x4_16_add_c(in, out, stride, 12);
+ vpx_highbd_iwht4x4_16_add_c(in, out, stride, 12);
}
#if HAVE_SSE2
void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
+ vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
}
void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
+ vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
}
#endif // HAVE_SSE2
#endif // CONFIG_VP9_HIGHBITDEPTH
::testing::Values(
make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
- make_tuple(&vpx_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
#else
INSTANTIATE_TEST_CASE_P(
C, Trans4x4DCT,
::testing::Values(
- make_tuple(&vpx_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
::testing::Values(
make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),
make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),
- make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
#else
INSTANTIATE_TEST_CASE_P(
C, Trans4x4WHT,
::testing::Values(
- make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
NEON, Trans4x4DCT,
::testing::Values(
make_tuple(&vpx_fdct4x4_c,
- &vp9_idct4x4_16_add_neon, 0, VPX_BITS_8)));
+ &vpx_idct4x4_16_add_neon, 0, VPX_BITS_8)));
#endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
MMX, Trans4x4WHT,
::testing::Values(
- make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
#endif
#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4WHT,
::testing::Values(
- make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));
+ make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
SSE2, Trans4x4DCT,
::testing::Values(
make_tuple(&vpx_fdct4x4_sse2,
- &vp9_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
+ &vpx_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4HT,
::testing::Values(
make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12_sse2, 0, VPX_BITS_12),
make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
- make_tuple(&vpx_fdct4x4_sse2, &vp9_idct4x4_16_add_c, 0,
+ make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_c, 0,
VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
MSA, Trans4x4DCT,
::testing::Values(
- make_tuple(&vpx_fdct4x4_msa, &vp9_idct4x4_16_add_msa, 0, VPX_BITS_8)));
+ make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
MSA, Trans4x4HT,
::testing::Values(
#if CONFIG_VP9_HIGHBITDEPTH
void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct8x8_64_add_c(in, out, stride, 10);
+ vpx_highbd_idct8x8_64_add_c(in, out, stride, 10);
}
void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct8x8_64_add_c(in, out, stride, 12);
+ vpx_highbd_idct8x8_64_add_c(in, out, stride, 12);
}
void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
}
void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct8x8_10_add_c(in, out, stride, 10);
+ vpx_highbd_idct8x8_10_add_c(in, out, stride, 10);
}
void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct8x8_10_add_c(in, out, stride, 12);
+ vpx_highbd_idct8x8_10_add_c(in, out, stride, 12);
}
#if HAVE_SSE2
void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
+ vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
}
void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
+ vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
}
void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
+ vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
}
void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
- vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
+ vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
}
#endif // HAVE_SSE2
#endif // CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
C, FwdTrans8x8DCT,
::testing::Values(
- make_tuple(&vpx_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),
+ make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
#else
INSTANTIATE_TEST_CASE_P(
C, FwdTrans8x8DCT,
::testing::Values(
- make_tuple(&vpx_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
+ make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
NEON, FwdTrans8x8DCT,
::testing::Values(
- make_tuple(&vpx_fdct8x8_neon, &vp9_idct8x8_64_add_neon, 0,
+ make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 0,
VPX_BITS_8)));
#endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8DCT,
::testing::Values(
- make_tuple(&vpx_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0,
+ make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 0,
VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8HT,
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8DCT,
::testing::Values(
- make_tuple(&vpx_fdct8x8_sse2, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),
+ make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
make_tuple(&vpx_highbd_fdct8x8_c,
&idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
make_tuple(&vpx_highbd_fdct8x8_sse2,
INSTANTIATE_TEST_CASE_P(
SSSE3, FwdTrans8x8DCT,
::testing::Values(
- make_tuple(&vpx_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0,
+ make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_ssse3, 0,
VPX_BITS_8)));
#endif
INSTANTIATE_TEST_CASE_P(
MSA, FwdTrans8x8DCT,
::testing::Values(
- make_tuple(&vpx_fdct8x8_msa, &vp9_idct8x8_64_add_msa, 0, VPX_BITS_8)));
+ make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
MSA, FwdTrans8x8HT,
::testing::Values(
reference_dct_2d(input, output_r);
for (int j = 0; j < 64; ++j)
coeff[j] = round(output_r[j]);
- vp9_idct8x8_64_add_c(coeff, dst, 8);
+ vpx_idct8x8_64_add_c(coeff, dst, 8);
for (int j = 0; j < 64; ++j) {
const int diff = dst[j] - src[j];
const int error = diff * diff;
C, PartialIDctTest,
::testing::Values(
make_tuple(&vpx_fdct32x32_c,
- &vp9_idct32x32_1024_add_c,
- &vp9_idct32x32_34_add_c,
+ &vpx_idct32x32_1024_add_c,
+ &vpx_idct32x32_34_add_c,
TX_32X32, 34),
make_tuple(&vpx_fdct32x32_c,
- &vp9_idct32x32_1024_add_c,
- &vp9_idct32x32_1_add_c,
+ &vpx_idct32x32_1024_add_c,
+ &vpx_idct32x32_1_add_c,
TX_32X32, 1),
make_tuple(&vpx_fdct16x16_c,
- &vp9_idct16x16_256_add_c,
- &vp9_idct16x16_10_add_c,
+ &vpx_idct16x16_256_add_c,
+ &vpx_idct16x16_10_add_c,
TX_16X16, 10),
make_tuple(&vpx_fdct16x16_c,
- &vp9_idct16x16_256_add_c,
- &vp9_idct16x16_1_add_c,
+ &vpx_idct16x16_256_add_c,
+ &vpx_idct16x16_1_add_c,
TX_16X16, 1),
make_tuple(&vpx_fdct8x8_c,
- &vp9_idct8x8_64_add_c,
- &vp9_idct8x8_12_add_c,
+ &vpx_idct8x8_64_add_c,
+ &vpx_idct8x8_12_add_c,
TX_8X8, 12),
make_tuple(&vpx_fdct8x8_c,
- &vp9_idct8x8_64_add_c,
- &vp9_idct8x8_1_add_c,
+ &vpx_idct8x8_64_add_c,
+ &vpx_idct8x8_1_add_c,
TX_8X8, 1),
make_tuple(&vpx_fdct4x4_c,
- &vp9_idct4x4_16_add_c,
- &vp9_idct4x4_1_add_c,
+ &vpx_idct4x4_16_add_c,
+ &vpx_idct4x4_1_add_c,
TX_4X4, 1)));
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
NEON, PartialIDctTest,
::testing::Values(
make_tuple(&vpx_fdct32x32_c,
- &vp9_idct32x32_1024_add_c,
- &vp9_idct32x32_1_add_neon,
+ &vpx_idct32x32_1024_add_c,
+ &vpx_idct32x32_1_add_neon,
TX_32X32, 1),
make_tuple(&vpx_fdct16x16_c,
- &vp9_idct16x16_256_add_c,
- &vp9_idct16x16_10_add_neon,
+ &vpx_idct16x16_256_add_c,
+ &vpx_idct16x16_10_add_neon,
TX_16X16, 10),
make_tuple(&vpx_fdct16x16_c,
- &vp9_idct16x16_256_add_c,
- &vp9_idct16x16_1_add_neon,
+ &vpx_idct16x16_256_add_c,
+ &vpx_idct16x16_1_add_neon,
TX_16X16, 1),
make_tuple(&vpx_fdct8x8_c,
- &vp9_idct8x8_64_add_c,
- &vp9_idct8x8_12_add_neon,
+ &vpx_idct8x8_64_add_c,
+ &vpx_idct8x8_12_add_neon,
TX_8X8, 12),
make_tuple(&vpx_fdct8x8_c,
- &vp9_idct8x8_64_add_c,
- &vp9_idct8x8_1_add_neon,
+ &vpx_idct8x8_64_add_c,
+ &vpx_idct8x8_1_add_neon,
TX_8X8, 1),
make_tuple(&vpx_fdct4x4_c,
- &vp9_idct4x4_16_add_c,
- &vp9_idct4x4_1_add_neon,
+ &vpx_idct4x4_16_add_c,
+ &vpx_idct4x4_1_add_neon,
TX_4X4, 1)));
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
SSE2, PartialIDctTest,
::testing::Values(
make_tuple(&vpx_fdct32x32_c,
- &vp9_idct32x32_1024_add_c,
- &vp9_idct32x32_34_add_sse2,
+ &vpx_idct32x32_1024_add_c,
+ &vpx_idct32x32_34_add_sse2,
TX_32X32, 34),
make_tuple(&vpx_fdct32x32_c,
- &vp9_idct32x32_1024_add_c,
- &vp9_idct32x32_1_add_sse2,
+ &vpx_idct32x32_1024_add_c,
+ &vpx_idct32x32_1_add_sse2,
TX_32X32, 1),
make_tuple(&vpx_fdct16x16_c,
- &vp9_idct16x16_256_add_c,
- &vp9_idct16x16_10_add_sse2,
+ &vpx_idct16x16_256_add_c,
+ &vpx_idct16x16_10_add_sse2,
TX_16X16, 10),
make_tuple(&vpx_fdct16x16_c,
- &vp9_idct16x16_256_add_c,
- &vp9_idct16x16_1_add_sse2,
+ &vpx_idct16x16_256_add_c,
+ &vpx_idct16x16_1_add_sse2,
TX_16X16, 1),
make_tuple(&vpx_fdct8x8_c,
- &vp9_idct8x8_64_add_c,
- &vp9_idct8x8_12_add_sse2,
+ &vpx_idct8x8_64_add_c,
+ &vpx_idct8x8_12_add_sse2,
TX_8X8, 12),
make_tuple(&vpx_fdct8x8_c,
- &vp9_idct8x8_64_add_c,
- &vp9_idct8x8_1_add_sse2,
+ &vpx_idct8x8_64_add_c,
+ &vpx_idct8x8_1_add_sse2,
TX_8X8, 1),
make_tuple(&vpx_fdct4x4_c,
- &vp9_idct4x4_16_add_c,
- &vp9_idct4x4_1_add_sse2,
+ &vpx_idct4x4_16_add_c,
+ &vpx_idct4x4_1_add_sse2,
TX_4X4, 1)));
#endif
SSSE3_64, PartialIDctTest,
::testing::Values(
make_tuple(&vpx_fdct8x8_c,
- &vp9_idct8x8_64_add_c,
- &vp9_idct8x8_12_add_ssse3,
+ &vpx_idct8x8_64_add_c,
+ &vpx_idct8x8_12_add_ssse3,
TX_8X8, 12)));
#endif
MSA, PartialIDctTest,
::testing::Values(
make_tuple(&vpx_fdct32x32_c,
- &vp9_idct32x32_1024_add_c,
- &vp9_idct32x32_34_add_msa,
+ &vpx_idct32x32_1024_add_c,
+ &vpx_idct32x32_34_add_msa,
TX_32X32, 34),
make_tuple(&vpx_fdct32x32_c,
- &vp9_idct32x32_1024_add_c,
- &vp9_idct32x32_1_add_msa,
+ &vpx_idct32x32_1024_add_c,
+ &vpx_idct32x32_1_add_msa,
TX_32X32, 1),
make_tuple(&vpx_fdct16x16_c,
- &vp9_idct16x16_256_add_c,
- &vp9_idct16x16_10_add_msa,
+ &vpx_idct16x16_256_add_c,
+ &vpx_idct16x16_10_add_msa,
TX_16X16, 10),
make_tuple(&vpx_fdct16x16_c,
- &vp9_idct16x16_256_add_c,
- &vp9_idct16x16_1_add_msa,
+ &vpx_idct16x16_256_add_c,
+ &vpx_idct16x16_1_add_msa,
TX_16X16, 1),
make_tuple(&vpx_fdct8x8_c,
- &vp9_idct8x8_64_add_c,
- &vp9_idct8x8_12_add_msa,
+ &vpx_idct8x8_64_add_c,
+ &vpx_idct8x8_12_add_msa,
TX_8X8, 10),
make_tuple(&vpx_fdct8x8_c,
- &vp9_idct8x8_64_add_c,
- &vp9_idct8x8_1_add_msa,
+ &vpx_idct8x8_64_add_c,
+ &vpx_idct8x8_1_add_msa,
TX_8X8, 1),
make_tuple(&vpx_fdct4x4_c,
- &vp9_idct4x4_16_add_c,
- &vp9_idct4x4_1_add_msa,
+ &vpx_idct4x4_16_add_c,
+ &vpx_idct4x4_1_add_msa,
TX_4X4, 1)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
extern "C" {
// Save the d8-d15 registers into store.
-void vp9_push_neon(int64_t *store);
+void vpx_push_neon(int64_t *store);
}
namespace libvpx_test {
private:
static bool StoreRegisters(int64_t store[8]) {
- vp9_push_neon(store);
+ vpx_push_neon(store);
return true;
}
bool Check() const {
if (!initialized_) return false;
int64_t post_store[8];
- vp9_push_neon(post_store);
+ vpx_push_neon(post_store);
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(pre_store_[i], post_store[i]) << "d"
<< i + 8 << " has been modified";
switch (tx_type) {
case DCT_DCT: // DCT in both horizontal and vertical
- vp9_idct4_rows_dspr2(input, outptr);
- vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ vpx_idct4_rows_dspr2(input, outptr);
+ vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
break;
case ADST_DCT: // ADST in vertical, DCT in horizontal
- vp9_idct4_rows_dspr2(input, outptr);
+ vpx_idct4_rows_dspr2(input, outptr);
outptr = out;
temp_in[i * 4 + j] = out[j * 4 + i];
}
}
- vp9_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
+ vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
break;
case ADST_ADST: // ADST in both directions
for (i = 0; i < 4; ++i) {
/* transform rows */
for (i = 0; i < 2; ++i) {
/* process 16 * 8 block */
- vp9_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+ vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
}
/* transform columns */
for (i = 0; i < 2; ++i) {
/* process 8 * 16 block */
- vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+ vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
dst_stride);
}
break;
/* transform rows */
for (i = 0; i < 2; ++i) {
/* process 16 * 8 block */
- vp9_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+ vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
}
/* transform columns */
for (i = 0; i < 2; ++i) {
- vp9_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
+ vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
(dst + (i << 3)), dst_stride);
}
break;
/* transform rows */
for (i = 0; i < 2; ++i) {
/* process 16 * 8 block */
- vp9_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+ vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
}
/* transform columns */
for (i = 0; i < 2; ++i) {
/* process 8 * 16 block */
- vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+ vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
dst_stride);
}
break;
/* transform rows */
for (i = 0; i < 2; ++i) {
/* process 16 * 8 block */
- vp9_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+ vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
}
/* transform columns */
for (i = 0; i < 2; ++i) {
- vp9_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
+ vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
(dst + (i << 3)), dst_stride);
}
break;
void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob) {
if (eob > 1)
- vp9_idct4x4_16_add(input, dest, stride);
+ vpx_idct4x4_16_add(input, dest, stride);
else
- vp9_idct4x4_1_add(input, dest, stride);
+ vpx_idct4x4_1_add(input, dest, stride);
}
void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob) {
if (eob > 1)
- vp9_iwht4x4_16_add(input, dest, stride);
+ vpx_iwht4x4_16_add(input, dest, stride);
else
- vp9_iwht4x4_1_add(input, dest, stride);
+ vpx_iwht4x4_1_add(input, dest, stride);
}
void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
// Combine that with code here.
if (eob == 1)
// DC only DCT coefficient
- vp9_idct8x8_1_add(input, dest, stride);
+ vpx_idct8x8_1_add(input, dest, stride);
else if (eob <= 12)
- vp9_idct8x8_12_add(input, dest, stride);
+ vpx_idct8x8_12_add(input, dest, stride);
else
- vp9_idct8x8_64_add(input, dest, stride);
+ vpx_idct8x8_64_add(input, dest, stride);
}
void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
* coefficients. Use eobs to separate different cases. */
if (eob == 1)
/* DC only DCT coefficient. */
- vp9_idct16x16_1_add(input, dest, stride);
+ vpx_idct16x16_1_add(input, dest, stride);
else if (eob <= 10)
- vp9_idct16x16_10_add(input, dest, stride);
+ vpx_idct16x16_10_add(input, dest, stride);
else
- vp9_idct16x16_256_add(input, dest, stride);
+ vpx_idct16x16_256_add(input, dest, stride);
}
void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob) {
if (eob == 1)
- vp9_idct32x32_1_add(input, dest, stride);
+ vpx_idct32x32_1_add(input, dest, stride);
else if (eob <= 34)
// non-zero coeff only in upper-left 8x8
- vp9_idct32x32_34_add(input, dest, stride);
+ vpx_idct32x32_34_add(input, dest, stride);
else
- vp9_idct32x32_1024_add(input, dest, stride);
+ vpx_idct32x32_1024_add(input, dest, stride);
}
// iht
void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
const highbd_transform_2d IHT_4[] = {
- { vp9_highbd_idct4_c, vp9_highbd_idct4_c }, // DCT_DCT = 0
- { highbd_iadst4_c, vp9_highbd_idct4_c }, // ADST_DCT = 1
- { vp9_highbd_idct4_c, highbd_iadst4_c }, // DCT_ADST = 2
- { highbd_iadst4_c, highbd_iadst4_c } // ADST_ADST = 3
+ { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
};
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
}
static const highbd_transform_2d HIGH_IHT_8[] = {
- { vp9_highbd_idct8_c, vp9_highbd_idct8_c }, // DCT_DCT = 0
- { highbd_iadst8_c, vp9_highbd_idct8_c }, // ADST_DCT = 1
- { vp9_highbd_idct8_c, highbd_iadst8_c }, // DCT_ADST = 2
- { highbd_iadst8_c, highbd_iadst8_c } // ADST_ADST = 3
+ { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3
};
void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
}
static const highbd_transform_2d HIGH_IHT_16[] = {
- { vp9_highbd_idct16_c, vp9_highbd_idct16_c }, // DCT_DCT = 0
- { highbd_iadst16_c, vp9_highbd_idct16_c }, // ADST_DCT = 1
- { vp9_highbd_idct16_c, highbd_iadst16_c }, // DCT_ADST = 2
- { highbd_iadst16_c, highbd_iadst16_c } // ADST_ADST = 3
+ { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3
};
void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd) {
if (eob > 1)
- vp9_highbd_idct4x4_16_add(input, dest, stride, bd);
+ vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
else
- vp9_highbd_idct4x4_1_add(input, dest, stride, bd);
+ vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
}
void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd) {
if (eob > 1)
- vp9_highbd_iwht4x4_16_add(input, dest, stride, bd);
+ vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
else
- vp9_highbd_iwht4x4_1_add(input, dest, stride, bd);
+ vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
}
void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
// Combine that with code here.
// DC only DCT coefficient
if (eob == 1) {
- vp9_highbd_idct8x8_1_add(input, dest, stride, bd);
+ vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
} else if (eob <= 10) {
- vp9_highbd_idct8x8_10_add(input, dest, stride, bd);
+ vpx_highbd_idct8x8_10_add(input, dest, stride, bd);
} else {
- vp9_highbd_idct8x8_64_add(input, dest, stride, bd);
+ vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
}
}
// coefficients. Use eobs to separate different cases.
// DC only DCT coefficient.
if (eob == 1) {
- vp9_highbd_idct16x16_1_add(input, dest, stride, bd);
+ vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
} else if (eob <= 10) {
- vp9_highbd_idct16x16_10_add(input, dest, stride, bd);
+ vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
} else {
- vp9_highbd_idct16x16_256_add(input, dest, stride, bd);
+ vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
}
}
int stride, int eob, int bd) {
// Non-zero coeff only in upper-left 8x8
if (eob == 1) {
- vp9_highbd_idct32x32_1_add(input, dest, stride, bd);
+ vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
} else if (eob <= 34) {
- vp9_highbd_idct32x32_34_add(input, dest, stride, bd);
+ vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
} else {
- vp9_highbd_idct32x32_1024_add(input, dest, stride, bd);
+ vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
}
}
int eob);
void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
-void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int
- eob);
+void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob);
void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
-VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM)
-
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
}
} // for
{
- // from vp9_dct_sse2.c
+ // from vpx_dct_sse2.c
// Post-condition (division by two)
// division of two 16 bits signed numbers using shifts
// n / 2 = (n - (n >> 15)) >> 1
;
- EXPORT |vp9_idct16x16_1_add_neon|
+ EXPORT |vpx_idct16x16_1_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp9_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
+;void vpx_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_idct16x16_1_add_neon| PROC
+|vpx_idct16x16_1_add_neon| PROC
ldrsh r0, [r0]
; generate cospi_16_64 = 11585
vst1.64 {d31}, [r12], r2
bx lr
- ENDP ; |vp9_idct16x16_1_add_neon|
+ ENDP ; |vpx_idct16x16_1_add_neon|
END
#include "vpx_dsp/inv_txfm.h"
#include "vpx_ports/mem.h"
-void vp9_idct16x16_1_add_neon(
+void vpx_idct16x16_1_add_neon(
int16_t *input,
uint8_t *dest,
int dest_stride) {
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_idct16x16_256_add_neon_pass1|
- EXPORT |vp9_idct16x16_256_add_neon_pass2|
- EXPORT |vp9_idct16x16_10_add_neon_pass1|
- EXPORT |vp9_idct16x16_10_add_neon_pass2|
+ EXPORT |vpx_idct16x16_256_add_neon_pass1|
+ EXPORT |vpx_idct16x16_256_add_neon_pass2|
+ EXPORT |vpx_idct16x16_10_add_neon_pass1|
+ EXPORT |vpx_idct16x16_10_add_neon_pass2|
ARM
REQUIRE8
PRESERVE8
MEND
AREA Block, CODE, READONLY ; name this block of code
-;void |vp9_idct16x16_256_add_neon_pass1|(int16_t *input,
+;void |vpx_idct16x16_256_add_neon_pass1|(int16_t *input,
; int16_t *output, int output_stride)
;
; r0 int16_t input
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_idct16x16_256_add_neon_pass1| PROC
+|vpx_idct16x16_256_add_neon_pass1| PROC
; TODO(hkuang): Find a better way to load the elements.
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
vst1.64 {d31}, [r1], r2
bx lr
- ENDP ; |vp9_idct16x16_256_add_neon_pass1|
+ ENDP ; |vpx_idct16x16_256_add_neon_pass1|
-;void vp9_idct16x16_256_add_neon_pass2(int16_t *src,
+;void vpx_idct16x16_256_add_neon_pass2(int16_t *src,
; int16_t *output,
; int16_t *pass1Output,
; int16_t skip_adding,
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_idct16x16_256_add_neon_pass2| PROC
+|vpx_idct16x16_256_add_neon_pass2| PROC
push {r3-r9}
; TODO(hkuang): Find a better way to load the elements.
end_idct16x16_pass2
pop {r3-r9}
bx lr
- ENDP ; |vp9_idct16x16_256_add_neon_pass2|
+ ENDP ; |vpx_idct16x16_256_add_neon_pass2|
-;void |vp9_idct16x16_10_add_neon_pass1|(int16_t *input,
+;void |vpx_idct16x16_10_add_neon_pass1|(int16_t *input,
; int16_t *output, int output_stride)
;
; r0 int16_t input
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_idct16x16_10_add_neon_pass1| PROC
+|vpx_idct16x16_10_add_neon_pass1| PROC
; TODO(hkuang): Find a better way to load the elements.
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
vst1.64 {d31}, [r1], r2
bx lr
- ENDP ; |vp9_idct16x16_10_add_neon_pass1|
+ ENDP ; |vpx_idct16x16_10_add_neon_pass1|
-;void vp9_idct16x16_10_add_neon_pass2(int16_t *src,
+;void vpx_idct16x16_10_add_neon_pass2(int16_t *src,
; int16_t *output,
; int16_t *pass1Output,
; int16_t skip_adding,
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_idct16x16_10_add_neon_pass2| PROC
+|vpx_idct16x16_10_add_neon_pass2| PROC
push {r3-r9}
; TODO(hkuang): Find a better way to load the elements.
end_idct10_16x16_pass2
pop {r3-r9}
bx lr
- ENDP ; |vp9_idct16x16_10_add_neon_pass2|
+ ENDP ; |vpx_idct16x16_10_add_neon_pass2|
END
return;
}
-void vp9_idct16x16_256_add_neon_pass1(
+void vpx_idct16x16_256_add_neon_pass1(
int16_t *in,
int16_t *out,
int output_stride) {
return;
}
-void vp9_idct16x16_256_add_neon_pass2(
+void vpx_idct16x16_256_add_neon_pass2(
int16_t *src,
int16_t *out,
int16_t *pass1Output,
return;
}
-void vp9_idct16x16_10_add_neon_pass1(
+void vpx_idct16x16_10_add_neon_pass1(
int16_t *in,
int16_t *out,
int output_stride) {
return;
}
-void vp9_idct16x16_10_add_neon_pass2(
+void vpx_idct16x16_10_add_neon_pass2(
int16_t *src,
int16_t *out,
int16_t *pass1Output,
#include "vpx_dsp/vpx_dsp_common.h"
-void vp9_idct16x16_256_add_neon_pass1(const int16_t *input,
+void vpx_idct16x16_256_add_neon_pass1(const int16_t *input,
int16_t *output,
int output_stride);
-void vp9_idct16x16_256_add_neon_pass2(const int16_t *src,
+void vpx_idct16x16_256_add_neon_pass2(const int16_t *src,
int16_t *output,
int16_t *pass1Output,
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
-void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,
+void vpx_idct16x16_10_add_neon_pass1(const int16_t *input,
int16_t *output,
int output_stride);
-void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,
+void vpx_idct16x16_10_add_neon_pass2(const int16_t *src,
int16_t *output,
int16_t *pass1Output,
int16_t skip_adding,
#if HAVE_NEON_ASM
/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
-extern void vp9_push_neon(int64_t *store);
-extern void vp9_pop_neon(int64_t *store);
+extern void vpx_push_neon(int64_t *store);
+extern void vpx_pop_neon(int64_t *store);
#endif // HAVE_NEON_ASM
-void vp9_idct16x16_256_add_neon(const int16_t *input,
+void vpx_idct16x16_256_add_neon(const int16_t *input,
uint8_t *dest, int dest_stride) {
#if HAVE_NEON_ASM
int64_t store_reg[8];
#if HAVE_NEON_ASM
// save d8-d15 register values.
- vp9_push_neon(store_reg);
+ vpx_push_neon(store_reg);
#endif
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);
+ vpx_idct16x16_256_add_neon_pass1(input, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
- vp9_idct16x16_256_add_neon_pass2(input+1,
+ vpx_idct16x16_256_add_neon_pass2(input+1,
row_idct_output,
pass1_output,
0,
/* Parallel idct on the lower 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);
+ vpx_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
- vp9_idct16x16_256_add_neon_pass2(input+8*16+1,
+ vpx_idct16x16_256_add_neon_pass2(input+8*16+1,
row_idct_output+8,
pass1_output,
0,
/* Parallel idct on the left 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
+ vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
- vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,
+ vpx_idct16x16_256_add_neon_pass2(row_idct_output+1,
row_idct_output,
pass1_output,
1,
/* Parallel idct on the right 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
+ vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
- vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
+ vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
row_idct_output+8,
pass1_output,
1,
#if HAVE_NEON_ASM
// restore d8-d15 register values.
- vp9_pop_neon(store_reg);
+ vpx_pop_neon(store_reg);
#endif
return;
}
-void vp9_idct16x16_10_add_neon(const int16_t *input,
+void vpx_idct16x16_10_add_neon(const int16_t *input,
uint8_t *dest, int dest_stride) {
#if HAVE_NEON_ASM
int64_t store_reg[8];
#if HAVE_NEON_ASM
// save d8-d15 register values.
- vp9_push_neon(store_reg);
+ vpx_push_neon(store_reg);
#endif
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
+ vpx_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
- vp9_idct16x16_10_add_neon_pass2(input+1,
+ vpx_idct16x16_10_add_neon_pass2(input+1,
row_idct_output,
pass1_output,
0,
/* Parallel idct on the left 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
+ vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
- vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,
+ vpx_idct16x16_256_add_neon_pass2(row_idct_output+1,
row_idct_output,
pass1_output,
1,
/* Parallel idct on the right 8 columns */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
+ vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7.
// Then add the result to the destination data.
- vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
+ vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
row_idct_output+8,
pass1_output,
1,
#if HAVE_NEON_ASM
// restore d8-d15 register values.
- vp9_pop_neon(store_reg);
+ vpx_pop_neon(store_reg);
#endif
return;
; file in the root of the source tree.
;
- EXPORT |vp9_idct32x32_1_add_neon|
+ EXPORT |vpx_idct32x32_1_add_neon|
ARM
REQUIRE8
PRESERVE8
vst1.8 {q15},[$dst], $stride
MEND
-;void vp9_idct32x32_1_add_neon(int16_t *input, uint8_t *dest,
+;void vpx_idct32x32_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride
-|vp9_idct32x32_1_add_neon| PROC
+|vpx_idct32x32_1_add_neon| PROC
push {lr}
pld [r1]
add r3, r1, #16 ; r3 dest + 16 for second loop
bne diff_positive_32_32_loop
pop {pc}
- ENDP ; |vp9_idct32x32_1_add_neon|
+ ENDP ; |vpx_idct32x32_1_add_neon|
END
return;
}
-void vp9_idct32x32_1_add_neon(
+void vpx_idct32x32_1_add_neon(
int16_t *input,
uint8_t *dest,
int dest_stride) {
cospi_31_64 EQU 804
- EXPORT |vp9_idct32x32_1024_add_neon|
+ EXPORT |vpx_idct32x32_1024_add_neon|
ARM
REQUIRE8
PRESERVE8
MEND
; --------------------------------------------------------------------------
-;void vp9_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride);
+;void vpx_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride);
;
; r0 int16_t *input,
; r1 uint8_t *dest,
; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...)
; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...)
-|vp9_idct32x32_1024_add_neon| PROC
+|vpx_idct32x32_1024_add_neon| PROC
; This function does one pass of idct32x32 transform.
;
; This is done by transposing the input and then doing a 1d transform on
vpop {d8-d15}
pop {r4-r11}
bx lr
- ENDP ; |vp9_idct32x32_1024_add_neon|
+ ENDP ; |vpx_idct32x32_1024_add_neon|
END
return;
}
-void vp9_idct32x32_1024_add_neon(
+void vpx_idct32x32_1024_add_neon(
int16_t *input,
uint8_t *dest,
int stride) {
;
- EXPORT |vp9_idct4x4_1_add_neon|
+ EXPORT |vpx_idct4x4_1_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp9_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
+;void vpx_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_idct4x4_1_add_neon| PROC
+|vpx_idct4x4_1_add_neon| PROC
ldrsh r0, [r0]
; generate cospi_16_64 = 11585
vst1.32 {d7[1]}, [r12]
bx lr
- ENDP ; |vp9_idct4x4_1_add_neon|
+ ENDP ; |vpx_idct4x4_1_add_neon|
END
#include "vpx_dsp/inv_txfm.h"
#include "vpx_ports/mem.h"
-void vp9_idct4x4_1_add_neon(
+void vpx_idct4x4_1_add_neon(
int16_t *input,
uint8_t *dest,
int dest_stride) {
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_idct4x4_16_add_neon|
+ EXPORT |vpx_idct4x4_16_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
AREA Block, CODE, READONLY ; name this block of code
-;void vp9_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
+;void vpx_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_idct4x4_16_add_neon| PROC
+|vpx_idct4x4_16_add_neon| PROC
; The 2D transform is done with two passes which are actually pretty
; similar. We first transform the rows. This is done by transposing
vst1.32 {d26[1]}, [r1], r2
vst1.32 {d26[0]}, [r1] ; no post-increment
bx lr
- ENDP ; |vp9_idct4x4_16_add_neon|
+ ENDP ; |vpx_idct4x4_16_add_neon|
END
#include <arm_neon.h>
-void vp9_idct4x4_16_add_neon(
+void vpx_idct4x4_16_add_neon(
int16_t *input,
uint8_t *dest,
int dest_stride) {
;
- EXPORT |vp9_idct8x8_1_add_neon|
+ EXPORT |vpx_idct8x8_1_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp9_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,
+;void vpx_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_idct8x8_1_add_neon| PROC
+|vpx_idct8x8_1_add_neon| PROC
ldrsh r0, [r0]
; generate cospi_16_64 = 11585
vst1.64 {d31}, [r12], r2
bx lr
- ENDP ; |vp9_idct8x8_1_add_neon|
+ ENDP ; |vpx_idct8x8_1_add_neon|
END
#include "vpx_dsp/inv_txfm.h"
#include "vpx_ports/mem.h"
-void vp9_idct8x8_1_add_neon(
+void vpx_idct8x8_1_add_neon(
int16_t *input,
uint8_t *dest,
int dest_stride) {
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_idct8x8_64_add_neon|
- EXPORT |vp9_idct8x8_12_add_neon|
+ EXPORT |vpx_idct8x8_64_add_neon|
+ EXPORT |vpx_idct8x8_12_add_neon|
ARM
REQUIRE8
PRESERVE8
MEND
AREA Block, CODE, READONLY ; name this block of code
-;void vp9_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
+;void vpx_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_idct8x8_64_add_neon| PROC
+|vpx_idct8x8_64_add_neon| PROC
push {r4-r9}
vpush {d8-d15}
vld1.s16 {q8,q9}, [r0]!
vpop {d8-d15}
pop {r4-r9}
bx lr
- ENDP ; |vp9_idct8x8_64_add_neon|
+ ENDP ; |vpx_idct8x8_64_add_neon|
-;void vp9_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
+;void vpx_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_idct8x8_12_add_neon| PROC
+|vpx_idct8x8_12_add_neon| PROC
push {r4-r9}
vpush {d8-d15}
vld1.s16 {q8,q9}, [r0]!
vpop {d8-d15}
pop {r4-r9}
bx lr
- ENDP ; |vp9_idct8x8_12_add_neon|
+ ENDP ; |vpx_idct8x8_12_add_neon|
END
return;
}
-void vp9_idct8x8_64_add_neon(
+void vpx_idct8x8_64_add_neon(
int16_t *input,
uint8_t *dest,
int dest_stride) {
return;
}
-void vp9_idct8x8_12_add_neon(
+void vpx_idct8x8_12_add_neon(
int16_t *input,
uint8_t *dest,
int dest_stride) {
;
- EXPORT |vp9_push_neon|
- EXPORT |vp9_pop_neon|
+ EXPORT |vpx_push_neon|
+ EXPORT |vpx_pop_neon|
ARM
REQUIRE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-|vp9_push_neon| PROC
+|vpx_push_neon| PROC
vst1.i64 {d8, d9, d10, d11}, [r0]!
vst1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
ENDP
-|vp9_pop_neon| PROC
+|vpx_pop_neon| PROC
vld1.i64 {d8, d9, d10, d11}, [r0]!
vld1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
for (j = 0; j < 32; ++j)
// TODO(cd): see quality impact of only doing
// output[j * 32 + i] = (temp_out[j] + 1) >> 2;
- // PS: also change code in vp9/encoder/x86/vp9_dct_sse2.c
+ // PS: also change code in vpx_dsp/x86/vpx_dct_sse2.c
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
#include "vpx_dsp/inv_txfm.h"
-void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
0.5 shifts per pixel. */
int i;
}
}
-void vp9_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {
+void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {
int i;
tran_high_t a1, e1;
tran_low_t tmp[4];
output[3] = WRAPLOW(step[0] - step[3], 8);
}
-void vp9_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
tran_low_t out[4 * 4];
tran_low_t *outptr = out;
int i, j;
}
}
-void vp9_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
+void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
int dest_stride) {
int i;
tran_high_t a1;
output[7] = WRAPLOW(step1[0] - step1[7], 8);
}
-void vp9_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
tran_low_t out[8 * 8];
tran_low_t *outptr = out;
int i, j;
}
}
-void vp9_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
int i, j;
tran_high_t a1;
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
output[7] = WRAPLOW(-x1, 8);
}
-void vp9_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
tran_low_t out[8 * 8] = { 0 };
tran_low_t *outptr = out;
int i, j;
output[15] = WRAPLOW(step2[0] - step2[15], 8);
}
-void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
+void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
tran_low_t out[16 * 16];
tran_low_t *outptr = out;
output[15] = WRAPLOW(-x1, 8);
}
-void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest,
+void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
tran_low_t out[16 * 16] = { 0 };
tran_low_t *outptr = out;
}
}
-void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
int i, j;
tran_high_t a1;
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
output[31] = WRAPLOW(step1[0] - step1[31], 8);
}
-void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
+void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
tran_low_t out[32 * 32];
tran_low_t *outptr = out;
}
}
-void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
+void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
tran_low_t out[32 * 32] = {0};
tran_low_t *outptr = out;
}
}
-void vp9_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
int i, j;
tran_high_t a1;
}
#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
0.5 shifts per pixel. */
}
}
-void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
+void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
int dest_stride, int bd) {
int i;
tran_high_t a1, e1;
}
}
-void vp9_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step[4];
tran_high_t temp1, temp2;
(void) bd;
output[3] = WRAPLOW(step[0] - step[3], bd);
}
-void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[4 * 4];
tran_low_t *outptr = out;
// Rows
for (i = 0; i < 4; ++i) {
- vp9_highbd_idct4_c(input, outptr, bd);
+ vpx_highbd_idct4_c(input, outptr, bd);
input += 4;
outptr += 4;
}
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i];
- vp9_highbd_idct4_c(temp_in, temp_out, bd);
+ vpx_highbd_idct4_c(temp_in, temp_out, bd);
for (j = 0; j < 4; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
}
}
-void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
int dest_stride, int bd) {
int i;
tran_high_t a1;
}
}
-void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step1[8], step2[8];
tran_high_t temp1, temp2;
// stage 1
step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
// stage 2 & stage 3 - even half
- vp9_highbd_idct4_c(step1, step1, bd);
+ vpx_highbd_idct4_c(step1, step1, bd);
// stage 2 - odd half
step2[4] = WRAPLOW(step1[4] + step1[5], bd);
output[7] = WRAPLOW(step1[0] - step1[7], bd);
}
-void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[8 * 8];
tran_low_t *outptr = out;
// First transform rows.
for (i = 0; i < 8; ++i) {
- vp9_highbd_idct8_c(input, outptr, bd);
+ vpx_highbd_idct8_c(input, outptr, bd);
input += 8;
outptr += 8;
}
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- vp9_highbd_idct8_c(temp_in, temp_out, bd);
+ vpx_highbd_idct8_c(temp_in, temp_out, bd);
for (j = 0; j < 8; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
}
}
-void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
int i, j;
tran_high_t a1;
}
}
-void highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
tran_low_t x0 = input[0];
output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd);
}
-void highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
tran_low_t x0 = input[7];
output[7] = WRAPLOW(-x1, bd);
}
-void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[8 * 8] = { 0 };
tran_low_t *outptr = out;
// First transform rows.
// Only first 4 row has non-zero coefs.
for (i = 0; i < 4; ++i) {
- vp9_highbd_idct8_c(input, outptr, bd);
+ vpx_highbd_idct8_c(input, outptr, bd);
input += 8;
outptr += 8;
}
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- vp9_highbd_idct8_c(temp_in, temp_out, bd);
+ vpx_highbd_idct8_c(temp_in, temp_out, bd);
for (j = 0; j < 8; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
}
}
-void vp9_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
+void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step1[16], step2[16];
tran_high_t temp1, temp2;
(void) bd;
output[15] = WRAPLOW(step2[0] - step2[15], bd);
}
-void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[16 * 16];
tran_low_t *outptr = out;
// First transform rows.
for (i = 0; i < 16; ++i) {
- vp9_highbd_idct16_c(input, outptr, bd);
+ vpx_highbd_idct16_c(input, outptr, bd);
input += 16;
outptr += 16;
}
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i];
- vp9_highbd_idct16_c(temp_in, temp_out, bd);
+ vpx_highbd_idct16_c(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
}
}
-void highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
+void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
tran_high_t s9, s10, s11, s12, s13, s14, s15;
output[15] = WRAPLOW(-x1, bd);
}
-void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[16 * 16] = { 0 };
tran_low_t *outptr = out;
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here.
for (i = 0; i < 4; ++i) {
- vp9_highbd_idct16_c(input, outptr, bd);
+ vpx_highbd_idct16_c(input, outptr, bd);
input += 16;
outptr += 16;
}
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j*16 + i];
- vp9_highbd_idct16_c(temp_in, temp_out, bd);
+ vpx_highbd_idct16_c(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
}
}
-void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
int i, j;
tran_high_t a1;
}
}
-void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd) {
+static void highbd_idct32_c(const tran_low_t *input,
+ tran_low_t *output, int bd) {
tran_low_t step1[32], step2[32];
tran_high_t temp1, temp2;
(void) bd;
output[31] = WRAPLOW(step1[0] - step1[31], bd);
}
-void vp9_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[32 * 32];
tran_low_t *outptr = out;
}
}
-void vp9_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[32 * 32] = {0};
tran_low_t *outptr = out;
}
}
-void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
int i, j;
int a1;
void iadst16_c(const tran_low_t *input, tran_low_t *output);
#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
-void vp9_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
-void vp9_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
-void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
-void highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
-void highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
-void highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
int bd) {
#include "vpx_dsp/mips/inv_txfm_msa.h"
-void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
+void vpx_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
v8i16 loc0, loc1, loc2, loc3;
v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14;
v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15;
ST_SH8(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15, (output + 8), 16);
}
-void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+void vpx_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
int32_t dst_stride) {
v8i16 loc0, loc1, loc2, loc3;
v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14;
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg7, reg9, reg1, reg15);
}
-void vp9_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
int32_t i;
DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]);
/* transform rows */
for (i = 0; i < 2; ++i) {
/* process 16 * 8 block */
- vp9_idct16_1d_rows_msa((input + (i << 7)), (out + (i << 7)));
+ vpx_idct16_1d_rows_msa((input + (i << 7)), (out + (i << 7)));
}
/* transform columns */
for (i = 0; i < 2; ++i) {
/* process 8 * 16 block */
- vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),
+ vpx_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),
dst_stride);
}
}
-void vp9_idct16x16_10_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_idct16x16_10_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
uint8_t i;
DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]);
int16_t *out = out_arr;
/* process 16 * 8 block */
- vp9_idct16_1d_rows_msa(input, out);
+ vpx_idct16_1d_rows_msa(input, out);
/* short case just considers top 4 rows as valid output */
out += 4 * 16;
/* transform columns */
for (i = 0; i < 2; ++i) {
/* process 8 * 16 block */
- vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),
+ vpx_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),
dst_stride);
}
}
-void vp9_idct16x16_1_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_idct16x16_1_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
uint8_t i;
int16_t out;
}
}
-void vp9_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) {
+void vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) {
v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
ST_SH8(l8, l9, l10, l11, l12, l13, l14, l15, (output + 8), 16);
}
-void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
int32_t dst_stride) {
v8i16 v0, v2, v4, v6, k0, k1, k2, k3;
v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
#include "vpx_dsp/mips/inv_txfm_msa.h"
-static void vp9_idct32x8_row_transpose_store(const int16_t *input,
- int16_t *tmp_buf) {
+static void idct32x8_row_transpose_store(const int16_t *input,
+ int16_t *tmp_buf) {
v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;
/* 1st & 2nd 8x8 */
ST_SH4(m6, n6, m7, n7, (tmp_buf + 28 * 8), 8);
}
-static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf,
- int16_t *tmp_eve_buf) {
+static void idct32x8_row_even_process_store(int16_t *tmp_buf,
+ int16_t *tmp_eve_buf) {
v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7;
ST_SH(loc3, (tmp_eve_buf + 7 * 8));
}
-static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf,
- int16_t *tmp_odd_buf) {
+static void idct32x8_row_odd_process_store(int16_t *tmp_buf,
+ int16_t *tmp_odd_buf) {
v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8);
}
-static void vp9_idct_butterfly_transpose_store(int16_t *tmp_buf,
- int16_t *tmp_eve_buf,
- int16_t *tmp_odd_buf,
- int16_t *dst) {
+static void idct_butterfly_transpose_store(int16_t *tmp_buf,
+ int16_t *tmp_eve_buf,
+ int16_t *tmp_odd_buf,
+ int16_t *dst) {
v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;
ST_SH4(m6, n6, m7, n7, (dst + 24 + 4 * 32), 32);
}
-static void vp9_idct32x8_1d_rows_msa(const int16_t *input, int16_t *output) {
+static void idct32x8_1d_rows_msa(const int16_t *input, int16_t *output) {
DECLARE_ALIGNED(32, int16_t, tmp_buf[8 * 32]);
DECLARE_ALIGNED(32, int16_t, tmp_odd_buf[16 * 8]);
DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]);
- vp9_idct32x8_row_transpose_store(input, &tmp_buf[0]);
- vp9_idct32x8_row_even_process_store(&tmp_buf[0], &tmp_eve_buf[0]);
- vp9_idct32x8_row_odd_process_store(&tmp_buf[0], &tmp_odd_buf[0]);
- vp9_idct_butterfly_transpose_store(&tmp_buf[0], &tmp_eve_buf[0],
- &tmp_odd_buf[0], output);
+ idct32x8_row_transpose_store(input, &tmp_buf[0]);
+ idct32x8_row_even_process_store(&tmp_buf[0], &tmp_eve_buf[0]);
+ idct32x8_row_odd_process_store(&tmp_buf[0], &tmp_odd_buf[0]);
+ idct_butterfly_transpose_store(&tmp_buf[0], &tmp_eve_buf[0],
+ &tmp_odd_buf[0], output);
}
-static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf,
- int16_t *tmp_eve_buf) {
+static void idct8x32_column_even_process_store(int16_t *tmp_buf,
+ int16_t *tmp_eve_buf) {
v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7;
ST_SH2(loc2, loc0, (tmp_eve_buf + 8 * 8), 8);
}
-static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf,
- int16_t *tmp_odd_buf) {
+static void idct8x32_column_odd_process_store(int16_t *tmp_buf,
+ int16_t *tmp_odd_buf) {
v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8);
}
-static void vp9_idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
- int16_t *tmp_odd_buf,
- uint8_t *dst,
- int32_t dst_stride) {
+static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
+ int16_t *tmp_odd_buf,
+ uint8_t *dst,
+ int32_t dst_stride) {
v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;
n1, n3, n5, n7);
}
-static void vp9_idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
- int32_t dst_stride) {
+static void idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
DECLARE_ALIGNED(32, int16_t, tmp_odd_buf[16 * 8]);
DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]);
- vp9_idct8x32_column_even_process_store(input, &tmp_eve_buf[0]);
- vp9_idct8x32_column_odd_process_store(input, &tmp_odd_buf[0]);
- vp9_idct8x32_column_butterfly_addblk(&tmp_eve_buf[0], &tmp_odd_buf[0],
- dst, dst_stride);
+ idct8x32_column_even_process_store(input, &tmp_eve_buf[0]);
+ idct8x32_column_odd_process_store(input, &tmp_odd_buf[0]);
+ idct8x32_column_butterfly_addblk(&tmp_eve_buf[0], &tmp_odd_buf[0],
+ dst, dst_stride);
}
-void vp9_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
int32_t i;
DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]);
/* transform rows */
for (i = 0; i < 4; ++i) {
/* process 32 * 8 block */
- vp9_idct32x8_1d_rows_msa((input + (i << 8)), (out_ptr + (i << 8)));
+ idct32x8_1d_rows_msa((input + (i << 8)), (out_ptr + (i << 8)));
}
/* transform columns */
for (i = 0; i < 4; ++i) {
/* process 8 * 32 block */
- vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
- dst_stride);
+ idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+ dst_stride);
}
}
-void vp9_idct32x32_34_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_idct32x32_34_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
int32_t i;
DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]);
out_ptr = out_arr;
/* rows: only upper-left 8x8 has non-zero coeff */
- vp9_idct32x8_1d_rows_msa(input, out_ptr);
+ idct32x8_1d_rows_msa(input, out_ptr);
/* transform columns */
for (i = 0; i < 4; ++i) {
/* process 8 * 32 block */
- vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
- dst_stride);
+ idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+ dst_stride);
}
}
-void vp9_idct32x32_1_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_idct32x32_1_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
int32_t i;
int16_t out;
#include "vpx_dsp/mips/inv_txfm_msa.h"
-void vp9_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
v8i16 in0, in1, in2, in3;
v4i32 in0_r, in1_r, in2_r, in3_r, in4_r;
ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride);
}
-void vp9_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
int16_t a1, e1;
v8i16 in1, in0 = { 0 };
ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride);
}
-void vp9_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
v8i16 in0, in1, in2, in3;
ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
}
-void vp9_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
int16_t out;
v8i16 vec;
#include "vpx_dsp/mips/inv_txfm_msa.h"
-void vp9_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
}
-void vp9_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3;
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
}
-void vp9_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst,
+void vpx_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst,
int32_t dst_stride) {
int16_t out;
int32_t val;
); \
out; })
-void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
+void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
int dest_stride);
-void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output);
-void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
+void vpx_idct4_rows_dspr2(const int16_t *input, int16_t *output);
+void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
int dest_stride);
void iadst4_dspr2(const int16_t *input, int16_t *output);
void idct8_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows);
MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15); \
}
-void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+void vpx_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
int32_t dst_stride);
-void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output);
-void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+void vpx_idct16_1d_rows_msa(const int16_t *input, int16_t *output);
+void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
int32_t dst_stride);
-void vp9_iadst16_1d_rows_msa(const int16_t *input, int16_t *output);
+void vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output);
#endif // VPX_DSP_MIPS_INV_TXFM_MSA_H_
}
}
-void vp9_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
+void vpx_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
int dest_stride) {
DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
uint32_t pos = 45;
idct16_cols_add_blk_dspr2(out, dest, dest_stride);
}
-void vp9_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,
+void vpx_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,
int dest_stride) {
DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
int16_t *outptr = out;
idct16_cols_add_blk_dspr2(out, dest, dest_stride);
}
-void vp9_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest,
+void vpx_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest,
int dest_stride) {
uint32_t pos = 45;
int32_t out;
#include "vpx_dsp/txfm_common.h"
#if HAVE_DSPR2
-void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
+void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
}
}
-void vp9_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest,
+void vpx_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest,
int dest_stride) {
DECLARE_ALIGNED(32, int16_t, out[32 * 32]);
int16_t *outptr = out;
idct32_rows_dspr2(input, outptr, 32);
// Columns
- vp9_idct32_cols_add_blk_dspr2(out, dest, dest_stride);
+ vpx_idct32_cols_add_blk_dspr2(out, dest, dest_stride);
}
-void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
+void vpx_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
int stride) {
DECLARE_ALIGNED(32, int16_t, out[32 * 32]);
int16_t *outptr = out;
}
// Columns
- vp9_idct32_cols_add_blk_dspr2(out, dest, stride);
+ vpx_idct32_cols_add_blk_dspr2(out, dest, stride);
}
-void vp9_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,
+void vpx_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,
int stride) {
int r, out;
int32_t a1, absa1;
#include "vpx_dsp/txfm_common.h"
#if HAVE_DSPR2
-void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output) {
+void vpx_idct4_rows_dspr2(const int16_t *input, int16_t *output) {
int16_t step_0, step_1, step_2, step_3;
int Temp0, Temp1, Temp2, Temp3;
const int const_2_power_13 = 8192;
}
}
-void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
+void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t step_0, step_1, step_2, step_3;
int Temp0, Temp1, Temp2, Temp3;
}
}
-void vp9_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
+void vpx_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
int dest_stride) {
DECLARE_ALIGNED(32, int16_t, out[4 * 4]);
int16_t *outptr = out;
);
// Rows
- vp9_idct4_rows_dspr2(input, outptr);
+ vpx_idct4_rows_dspr2(input, outptr);
// Columns
- vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
-void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest,
+void vpx_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest,
int dest_stride) {
int a1, absa1;
int r;
}
}
-void vp9_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
+void vpx_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
int dest_stride) {
DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
int16_t *outptr = out;
idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
-void vp9_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest,
+void vpx_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest,
int dest_stride) {
DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
int16_t *outptr = out;
idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
-void vp9_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest,
+void vpx_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest,
int dest_stride) {
uint32_t pos = 45;
int32_t out;
endif # CONFIG_USE_X86INC
ifeq ($(HAVE_NEON_ASM),yes)
+DSP_SRCS-yes += arm/save_reg_neon$(ASM)
DSP_SRCS-yes += arm/idct4x4_1_add_neon$(ASM)
DSP_SRCS-yes += arm/idct4x4_add_neon$(ASM)
DSP_SRCS-yes += arm/idct8x8_1_add_neon$(ASM)
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
- add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct4x4_1_add/;
+ add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct4x4_1_add/;
- add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct4x4_16_add/;
+ add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct4x4_16_add/;
- add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_1_add/;
+ add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_1_add/;
- add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_64_add/;
+ add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_64_add/;
- add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_12_add/;
+ add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_12_add/;
- add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct16x16_1_add/;
+ add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_1_add/;
- add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct16x16_256_add/;
+ add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_256_add/;
- add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct16x16_10_add/;
+ add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_10_add/;
- add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct32x32_1024_add/;
+ add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_1024_add/;
- add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct32x32_34_add/;
+ add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_34_add/;
- add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct32x32_1_add/;
+ add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_1_add/;
- add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_iwht4x4_1_add/;
+ add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_iwht4x4_1_add/;
- add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_iwht4x4_16_add/;
+ add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_iwht4x4_16_add/;
- add_proto qw/void vp9_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct4x4_1_add/;
+ add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct4x4_1_add/;
- add_proto qw/void vp9_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct8x8_1_add/;
+ add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct8x8_1_add/;
- add_proto qw/void vp9_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct16x16_1_add/;
+ add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct16x16_1_add/;
- add_proto qw/void vp9_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct32x32_1024_add/;
+ add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct32x32_1024_add/;
- add_proto qw/void vp9_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct32x32_34_add/;
+ add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct32x32_34_add/;
- add_proto qw/void vp9_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct32x32_1_add/;
+ add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct32x32_1_add/;
- add_proto qw/void vp9_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_iwht4x4_1_add/;
+ add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_iwht4x4_1_add/;
- add_proto qw/void vp9_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_iwht4x4_16_add/;
+ add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_iwht4x4_16_add/;
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
- add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct4x4_16_add/;
+ add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct4x4_16_add/;
- add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct8x8_64_add/;
+ add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct8x8_64_add/;
- add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct8x8_10_add/;
+ add_proto qw/void vpx_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct8x8_10_add/;
- add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct16x16_256_add/;
+ add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct16x16_256_add/;
- add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct16x16_10_add/;
+ add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct16x16_10_add/;
} else {
- add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct4x4_16_add sse2/;
+ add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct4x4_16_add sse2/;
- add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct8x8_64_add sse2/;
+ add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct8x8_64_add sse2/;
- add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct8x8_10_add sse2/;
+ add_proto qw/void vpx_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct8x8_10_add sse2/;
- add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct16x16_256_add sse2/;
+ add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct16x16_256_add sse2/;
- add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vp9_highbd_idct16x16_10_add sse2/;
+ add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct16x16_10_add sse2/;
} # CONFIG_EMULATE_HARDWARE
} else {
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
- add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct4x4_1_add/;
+ add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct4x4_1_add/;
- add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct4x4_16_add/;
+ add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct4x4_16_add/;
- add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_1_add/;
+ add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_1_add/;
- add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_64_add/;
+ add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_64_add/;
- add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_12_add/;
+ add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_12_add/;
- add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct16x16_1_add/;
+ add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_1_add/;
- add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct16x16_256_add/;
+ add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_256_add/;
- add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct16x16_10_add/;
+ add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_10_add/;
- add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct32x32_1024_add/;
+ add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_1024_add/;
- add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct32x32_34_add/;
+ add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_34_add/;
- add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct32x32_1_add/;
+ add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_1_add/;
- add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_iwht4x4_1_add/;
+ add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_iwht4x4_1_add/;
- add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_iwht4x4_16_add/;
+ add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_iwht4x4_16_add/;
} else {
- add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct4x4_1_add sse2 neon dspr2 msa/;
+ add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct4x4_1_add sse2 neon dspr2 msa/;
- add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct4x4_16_add sse2 neon dspr2 msa/;
+ add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct4x4_16_add sse2 neon dspr2 msa/;
- add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_1_add sse2 neon dspr2 msa/;
+ add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/;
- add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
- add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
- add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct16x16_1_add sse2 neon dspr2 msa/;
+ add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/;
- add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct16x16_256_add sse2 neon dspr2 msa/;
+ add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_256_add sse2 neon dspr2 msa/;
- add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct16x16_10_add sse2 neon dspr2 msa/;
+ add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/;
- add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2 msa/;
+ add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/;
- add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2 msa/;
+ add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_34_add sse2 neon_asm dspr2 msa/;
# Need to add 34 eob idct32x32 neon implementation.
- $vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
+ $vpx_idct32x32_34_add_neon_asm=vpx_idct32x32_1024_add_neon;
- add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/;
+ add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/;
- add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_iwht4x4_1_add msa/;
+ add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_iwht4x4_1_add msa/;
- add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_iwht4x4_16_add msa/, "$sse2_x86inc";
+ add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_iwht4x4_16_add msa/, "$sse2_x86inc";
} # CONFIG_EMULATE_HARDWARE
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
+
+%define program_name vpx
+
%include "third_party/x86inc/x86inc.asm"
SECTION .text
*(int *)(dest) = _mm_cvtsi128_si32(d0); \
}
-void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i eight = _mm_set1_epi16(8);
const __m128i cst = _mm_setr_epi16(
}
}
-void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a;
out7 = _mm_subs_epi16(stp1_0, stp2_7); \
}
-void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 4);
// 2-D
for (i = 0; i < 2; i++) {
- // 8x8 Transpose is copied from vp9_fdct8x8_sse2()
+ // 8x8 Transpose is copied from vpx_fdct8x8_sse2()
TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
RECON_AND_STORE(dest + 7 * stride, in7);
}
-void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a;
__m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- // 8x8 Transpose is copied from vp9_fdct8x8_sse2()
+ // 8x8 Transpose is copied from vpx_fdct8x8_sse2()
TRANSPOSE_8X8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7],
in0, in1, in2, in3, in4, in5, in6, in7);
in[7] = _mm_sub_epi16(k__const_0, s1);
}
-void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 4);
stp2_10, stp2_13, stp2_11, stp2_12) \
}
-void vp9_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
+void vpx_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 5);
}
}
-void vp9_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a, i;
iadst16_8col(in1);
}
-void vp9_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
+void vpx_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 5);
}
// Only upper-left 8x8 has non-zero coeff
-void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
+void vpx_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<5);
}
}
-void vp9_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
+void vpx_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 5);
}
}
-void vp9_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a, i;
return retval;
}
-void vp9_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[4 * 4];
tran_low_t *outptr = out;
} else {
// Run the un-optimised row transform
for (i = 0; i < 4; ++i) {
- vp9_highbd_idct4_c(input, outptr, bd);
+ vpx_highbd_idct4_c(input, outptr, bd);
input += 4;
outptr += 4;
}
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i];
- vp9_highbd_idct4_c(temp_in, temp_out, bd);
+ vpx_highbd_idct4_c(temp_in, temp_out, bd);
for (j = 0; j < 4; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
}
}
-void vp9_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[8 * 8];
tran_low_t *outptr = out;
} else {
// Run the un-optimised row transform
for (i = 0; i < 8; ++i) {
- vp9_highbd_idct8_c(input, outptr, bd);
+ vpx_highbd_idct8_c(input, outptr, bd);
input += 8;
outptr += 8;
}
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- vp9_highbd_idct8_c(temp_in, temp_out, bd);
+ vpx_highbd_idct8_c(temp_in, temp_out, bd);
for (j = 0; j < 8; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
}
}
-void vp9_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[8 * 8] = { 0 };
tran_low_t *outptr = out;
} else {
// Run the un-optimised row transform
for (i = 0; i < 4; ++i) {
- vp9_highbd_idct8_c(input, outptr, bd);
+ vpx_highbd_idct8_c(input, outptr, bd);
input += 8;
outptr += 8;
}
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- vp9_highbd_idct8_c(temp_in, temp_out, bd);
+ vpx_highbd_idct8_c(temp_in, temp_out, bd);
for (j = 0; j < 8; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
}
}
-void vp9_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[16 * 16];
tran_low_t *outptr = out;
} else {
// Run the un-optimised row transform
for (i = 0; i < 16; ++i) {
- vp9_highbd_idct16_c(input, outptr, bd);
+ vpx_highbd_idct16_c(input, outptr, bd);
input += 16;
outptr += 16;
}
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i];
- vp9_highbd_idct16_c(temp_in, temp_out, bd);
+ vpx_highbd_idct16_c(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
}
}
-void vp9_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[16 * 16] = { 0 };
tran_low_t *outptr = out;
} else {
// Run the un-optimised row transform
for (i = 0; i < 4; ++i) {
- vp9_highbd_idct16_c(input, outptr, bd);
+ vpx_highbd_idct16_c(input, outptr, bd);
input += 16;
outptr += 16;
}
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i];
- vp9_highbd_idct16_c(temp_in, temp_out, bd);
+ vpx_highbd_idct16_c(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
+
+%define program_name vpx
+
%include "third_party/x86inc/x86inc.asm"
; This file provides SSSE3 version of the inverse transformation. Part