From: Mans Rullgard Date: Sat, 21 Apr 2012 14:31:10 +0000 (+0100) Subject: ARM: allow runtime masking of CPU features X-Git-Tag: v9_beta1~1957 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d526c5338d50d12a54fd95130030c60070707d3e;p=platform%2Fupstream%2Flibav.git ARM: allow runtime masking of CPU features This allows masking CPU features with the -cpuflags avconv option which is useful for testing different optimisations without rebuilding. Signed-off-by: Mans Rullgard --- diff --git a/avconv.c b/avconv.c index 6c3e6a9..851d8dd 100644 --- a/avconv.c +++ b/avconv.c @@ -4865,6 +4865,14 @@ static int opt_cpuflags(const char *opt, const char *arg) { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_FMA4 }, .unit = "flags" }, { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOW }, .unit = "flags" }, { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOWEXT }, .unit = "flags" }, + + { "armv5te", NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ARMV5TE }, .unit = "flags" }, + { "armv6", NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ARMV6 }, .unit = "flags" }, + { "armv6t2", NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ARMV6T2 }, .unit = "flags" }, + { "vfp", NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_VFP }, .unit = "flags" }, + { "vfpv3", NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_VFPV3 }, .unit = "flags" }, + { "neon", NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_NEON }, .unit = "flags" }, + { NULL }, }; static const AVClass class = { diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c index aed11f4..d7cb95b 100644 --- a/libavcodec/arm/ac3dsp_init_arm.c +++ b/libavcodec/arm/ac3dsp_init_arm.c @@ -19,6 +19,8 @@ */ #include + +#include "libavutil/arm/cpu.h" #include "libavutil/attributes.h" #include "libavcodec/ac3dsp.h" #include "config.h" @@ -39,13 +41,15 @@ void ff_ac3_update_bap_counts_arm(uint16_t mant_cnt[16], uint8_t *bap, int len); av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) { + int cpu_flags = av_get_cpu_flags(); + c->update_bap_counts = ff_ac3_update_bap_counts_arm; - if (HAVE_ARMV6) { + if (have_armv6(cpu_flags)) { c->bit_alloc_calc_bap = ff_ac3_bit_alloc_calc_bap_armv6; } - if (HAVE_NEON) { + if (have_neon(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_neon; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_neon; c->ac3_lshift_int16 = ff_ac3_lshift_int16_neon; diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c index ec7016e..f0375c9 100644 --- a/libavcodec/arm/dcadsp_init_arm.c +++ b/libavcodec/arm/dcadsp_init_arm.c @@ -19,6 +19,8 @@ */ #include "config.h" + +#include "libavutil/arm/cpu.h" #include "libavutil/attributes.h" #include "libavcodec/dcadsp.h" @@ -27,6 +29,8 @@ void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs, av_cold void ff_dcadsp_init_arm(DCADSPContext *s) { - if (HAVE_NEON) + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) s->lfe_fir = ff_dca_lfe_fir_neon; } diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c index bc94b08..0c1563d 100644 --- a/libavcodec/arm/dsputil_init_arm.c +++ b/libavcodec/arm/dsputil_init_arm.c @@ -19,6 +19,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/arm/cpu.h" #include "libavcodec/dsputil.h" #include "dsputil_arm.h" @@ -76,6 +77,7 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block) void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) { const int high_bit_depth = avctx->bits_per_raw_sample > 8; + int cpu_flags = av_get_cpu_flags(); ff_put_pixels_clamped = c->put_pixels_clamped; ff_add_pixels_clamped = c->add_pixels_clamped; @@ -117,8 +119,8 @@ void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm; } - if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx); - if (HAVE_ARMV6) ff_dsputil_init_armv6(c, avctx); - if (HAVE_ARMVFP) ff_dsputil_init_vfp(c, avctx); - if (HAVE_NEON) ff_dsputil_init_neon(c, avctx); + if (have_armv5te(cpu_flags)) ff_dsputil_init_armv5te(c, avctx); + if (have_armv6(cpu_flags)) ff_dsputil_init_armv6(c, avctx); + if (have_vfp(cpu_flags)) ff_dsputil_init_vfp(c, avctx); + if (have_neon(cpu_flags)) ff_dsputil_init_neon(c, avctx); } diff --git a/libavcodec/arm/dsputil_init_vfp.c b/libavcodec/arm/dsputil_init_vfp.c index 9cda890..d5e2d3b 100644 --- a/libavcodec/arm/dsputil_init_vfp.c +++ b/libavcodec/arm/dsputil_init_vfp.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/arm/cpu.h" #include "libavcodec/dsputil.h" #include "dsputil_arm.h" @@ -28,7 +29,9 @@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0, void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx) { - if (!HAVE_VFPV3) + int cpu_flags = av_get_cpu_flags(); + + if (!have_vfpv3(cpu_flags)) c->vector_fmul = ff_vector_fmul_vfp; c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp; } diff --git a/libavcodec/arm/fft_fixed_init_arm.c b/libavcodec/arm/fft_fixed_init_arm.c index be412cd..5601ba1 100644 --- a/libavcodec/arm/fft_fixed_init_arm.c +++ b/libavcodec/arm/fft_fixed_init_arm.c @@ -18,6 +18,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/arm/cpu.h" + #define CONFIG_FFT_FLOAT 0 #include "libavcodec/fft.h" @@ -27,7 +29,9 @@ void ff_mdct_fixed_calcw_neon(FFTContext *s, FFTDouble *o, const FFTSample *i); av_cold void ff_fft_fixed_init_arm(FFTContext *s) { - if (HAVE_NEON) { + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) { s->fft_permutation = FF_FFT_PERM_SWAP_LSBS; s->fft_calc = ff_fft_fixed_calc_neon; diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c index b2c3b72..9ec620f 100644 --- a/libavcodec/arm/fft_init_arm.c +++ b/libavcodec/arm/fft_init_arm.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/arm/cpu.h" #include "libavcodec/fft.h" #include "libavcodec/rdft.h" #include "libavcodec/synth_filter.h" @@ -39,7 +40,9 @@ void ff_synth_filter_float_neon(FFTContext *imdct, av_cold void ff_fft_init_arm(FFTContext *s) { - if (HAVE_NEON) { + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) { s->fft_permute = ff_fft_permute_neon; s->fft_calc = ff_fft_calc_neon; #if CONFIG_MDCT @@ -54,7 +57,9 @@ av_cold void ff_fft_init_arm(FFTContext *s) #if CONFIG_RDFT av_cold void ff_rdft_init_arm(RDFTContext *s) { - if (HAVE_NEON) + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) s->rdft_calc = ff_rdft_calc_neon; } #endif @@ -62,7 +67,9 @@ av_cold void ff_rdft_init_arm(RDFTContext *s) #if CONFIG_DCA_DECODER av_cold void ff_synth_filter_init_arm(SynthFilterContext *s) { - if (HAVE_NEON) + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) s->synth_filter_float = ff_synth_filter_float_neon; } #endif diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c index 92e07f1..9435263 100644 --- a/libavcodec/arm/fmtconvert_init_arm.c +++ b/libavcodec/arm/fmtconvert_init_arm.c @@ -20,6 +20,7 @@ #include +#include "libavutil/arm/cpu.h" #include "libavcodec/avcodec.h" #include "libavcodec/fmtconvert.h" @@ -33,11 +34,13 @@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, long len); void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx) { - if (HAVE_ARMVFP && HAVE_ARMV6) { + int cpu_flags = av_get_cpu_flags(); + + if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) { c->float_to_int16 = ff_float_to_int16_vfp; } - if (HAVE_NEON) { + if (have_neon(cpu_flags)) { c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon; if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c index 1c331a4..b4277a5 100644 --- a/libavcodec/arm/h264dsp_init_arm.c +++ b/libavcodec/arm/h264dsp_init_arm.c @@ -20,6 +20,7 @@ #include +#include "libavutil/arm/cpu.h" #include "libavcodec/dsputil.h" #include "libavcodec/h264dsp.h" @@ -97,5 +98,8 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const i void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) { - if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc); + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) + ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc); } diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c index 5fc07bc..808cc54 100644 --- a/libavcodec/arm/h264pred_init_arm.c +++ b/libavcodec/arm/h264pred_init_arm.c @@ -20,6 +20,7 @@ #include +#include "libavutil/arm/cpu.h" #include "libavcodec/h264pred.h" void ff_pred16x16_vert_neon(uint8_t *src, int stride); @@ -76,5 +77,8 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int b void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth, const int chroma_format_idc) { - if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc); + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) + ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc); } diff --git a/libavcodec/arm/mpegaudiodsp_init_arm.c b/libavcodec/arm/mpegaudiodsp_init_arm.c index 94a5578..a9804e9 100644 --- a/libavcodec/arm/mpegaudiodsp_init_arm.c +++ b/libavcodec/arm/mpegaudiodsp_init_arm.c @@ -19,6 +19,8 @@ */ #include + +#include "libavutil/arm/cpu.h" #include "libavcodec/mpegaudiodsp.h" #include "config.h" @@ -27,7 +29,9 @@ void ff_mpadsp_apply_window_fixed_armv6(int32_t *synth_buf, int32_t *window, void ff_mpadsp_init_arm(MPADSPContext *s) { - if (HAVE_ARMV6) { + int cpu_flags = av_get_cpu_flags(); + + if (have_armv6(cpu_flags)) { s->apply_window_fixed = ff_mpadsp_apply_window_fixed_armv6; } } diff --git a/libavcodec/arm/mpegvideo_arm.c b/libavcodec/arm/mpegvideo_arm.c index 26eed89..cce90c7 100644 --- a/libavcodec/arm/mpegvideo_arm.c +++ b/libavcodec/arm/mpegvideo_arm.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/arm/cpu.h" #include "libavcodec/avcodec.h" #include "libavcodec/dsputil.h" #include "libavcodec/mpegvideo.h" @@ -40,11 +41,12 @@ void ff_dct_unquantize_h263_intra_neon(MpegEncContext *s, DCTELEM *block, void ff_MPV_common_init_arm(MpegEncContext *s) { -#if HAVE_ARMV5TE - ff_MPV_common_init_armv5te(s); -#endif + int cpu_flags = av_get_cpu_flags(); + + if (have_armv5te(cpu_flags)) + ff_MPV_common_init_armv5te(s); - if (HAVE_NEON) { + if (have_neon(cpu_flags)) { s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_neon; s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_neon; } diff --git a/libavcodec/arm/sbrdsp_init_arm.c b/libavcodec/arm/sbrdsp_init_arm.c index 04294cc..4da7967 100644 --- a/libavcodec/arm/sbrdsp_init_arm.c +++ b/libavcodec/arm/sbrdsp_init_arm.c @@ -19,6 +19,7 @@ */ #include "config.h" +#include "libavutil/arm/cpu.h" #include "libavutil/attributes.h" #include "libavcodec/sbrdsp.h" @@ -51,7 +52,9 @@ void ff_sbr_hf_apply_noise_3_neon(float Y[64][2], const float *s_m, av_cold void ff_sbrdsp_init_arm(SBRDSPContext *s) { - if (HAVE_NEON) { + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) { s->sum64x5 = ff_sbr_sum64x5_neon; s->sum_square = ff_sbr_sum_square_neon; s->neg_odd_64 = ff_sbr_neg_odd_64_neon; diff --git a/libavcodec/arm/vp56dsp_init_arm.c b/libavcodec/arm/vp56dsp_init_arm.c index 5989602..691db58 100644 --- a/libavcodec/arm/vp56dsp_init_arm.c +++ b/libavcodec/arm/vp56dsp_init_arm.c @@ -19,6 +19,8 @@ */ #include + +#include "libavutil/arm/cpu.h" #include "libavcodec/avcodec.h" #include "libavcodec/vp56dsp.h" @@ -27,7 +29,9 @@ void ff_vp6_edge_filter_ver_neon(uint8_t *yuv, int stride, int t); void ff_vp56dsp_init_arm(VP56DSPContext *s, enum CodecID codec) { - if (codec != CODEC_ID_VP5 && HAVE_NEON) { + int cpu_flags = av_get_cpu_flags(); + + if (codec != CODEC_ID_VP5 && have_neon(cpu_flags)) { s->edge_filter_hor = ff_vp6_edge_filter_hor_neon; s->edge_filter_ver = ff_vp6_edge_filter_ver_neon; } diff --git a/libavcodec/arm/vp8dsp_init_arm.c b/libavcodec/arm/vp8dsp_init_arm.c index 83f3634..29c64d4 100644 --- a/libavcodec/arm/vp8dsp_init_arm.c +++ b/libavcodec/arm/vp8dsp_init_arm.c @@ -17,6 +17,8 @@ */ #include + +#include "libavutil/arm/cpu.h" #include "libavcodec/vp8dsp.h" void ff_vp8_luma_dc_wht_neon(DCTELEM block[4][4][16], DCTELEM dc[16]); @@ -83,7 +85,9 @@ VP8_MC(bilin4_hv); av_cold void ff_vp8dsp_init_arm(VP8DSPContext *dsp) { - if (HAVE_NEON) { + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) { dsp->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_neon; dsp->vp8_luma_dc_wht_dc = ff_vp8_luma_dc_wht_dc_neon; diff --git a/libavutil/arm/Makefile b/libavutil/arm/Makefile new file mode 100644 index 0000000..246f73a --- /dev/null +++ b/libavutil/arm/Makefile @@ -0,0 +1 @@ +OBJS += arm/cpu.o diff --git a/libavutil/arm/cpu.c b/libavutil/arm/cpu.c new file mode 100644 index 0000000..33dca1c --- /dev/null +++ b/libavutil/arm/cpu.c @@ -0,0 +1,30 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/cpu.h" +#include "config.h" + +int ff_get_cpu_flags_arm(void) +{ + return AV_CPU_FLAG_ARMV5TE * HAVE_ARMV5TE | + AV_CPU_FLAG_ARMV6 * HAVE_ARMV6 | + AV_CPU_FLAG_ARMV6T2 * HAVE_ARMV6T2 | + AV_CPU_FLAG_VFP * HAVE_ARMVFP | + AV_CPU_FLAG_VFPV3 * HAVE_VFPV3 | + AV_CPU_FLAG_NEON * HAVE_NEON; +} diff --git a/libavutil/arm/cpu.h b/libavutil/arm/cpu.h new file mode 100644 index 0000000..72e16d4 --- /dev/null +++ b/libavutil/arm/cpu.h @@ -0,0 +1,32 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_ARM_CPU_H +#define AVUTIL_ARM_CPU_H + +#include "config.h" +#include "libavutil/cpu.h" + +#define have_armv5te(flags) (HAVE_ARMV5TE && ((flags) & AV_CPU_FLAG_ARMV5TE)) +#define have_armv6(flags) (HAVE_ARMV6 && ((flags) & AV_CPU_FLAG_ARMV6)) +#define have_armv6t2(flags) (HAVE_ARMV6T2 && ((flags) & AV_CPU_FLAG_ARMV6T2)) +#define have_vfp(flags) (HAVE_ARMVFP && ((flags) & AV_CPU_FLAG_VFP)) +#define have_vfpv3(flags) (HAVE_VFPV3 && ((flags) & AV_CPU_FLAG_VFPV3)) +#define have_neon(flags) (HAVE_NEON && ((flags) & AV_CPU_FLAG_NEON)) + +#endif diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 6fc13ad..e23d401 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -28,6 +28,7 @@ int av_get_cpu_flags(void) if (checked) return flags; + if (ARCH_ARM) flags = ff_get_cpu_flags_arm(); if (ARCH_PPC) flags = ff_get_cpu_flags_ppc(); if (ARCH_X86) flags = ff_get_cpu_flags_x86(); @@ -52,7 +53,14 @@ static const struct { int flag; const char *name; } cpu_flag_tab[] = { -#if ARCH_PPC +#if ARCH_ARM + { AV_CPU_FLAG_ARMV5TE, "armv5te" }, + { AV_CPU_FLAG_ARMV6, "armv6" }, + { AV_CPU_FLAG_ARMV6T2, "armv6t2" }, + { AV_CPU_FLAG_VFP, "vfp" }, + { AV_CPU_FLAG_VFPV3, "vfpv3" }, + { AV_CPU_FLAG_NEON, "neon" }, +#elif ARCH_PPC { AV_CPU_FLAG_ALTIVEC, "altivec" }, #elif ARCH_X86 { AV_CPU_FLAG_MMX, "mmx" }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 55ad7d1..0c0b1de 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -42,6 +42,13 @@ #define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard +#define AV_CPU_FLAG_ARMV5TE (1 << 0) +#define AV_CPU_FLAG_ARMV6 (1 << 1) +#define AV_CPU_FLAG_ARMV6T2 (1 << 2) +#define AV_CPU_FLAG_VFP (1 << 3) +#define AV_CPU_FLAG_VFPV3 (1 << 4) +#define AV_CPU_FLAG_NEON (1 << 5) + /** * Return the flags which specify extensions supported by the CPU. */ @@ -56,6 +63,7 @@ int av_get_cpu_flags(void); void av_set_cpu_flags_mask(int mask); /* The following CPU-specific functions shall not be called directly. */ +int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); int ff_get_cpu_flags_x86(void);