aandcttables
ac3dsp
audio_frame_queue
+ blockdsp
cabac
dsputil
gcrypt
rdft_select="fft"
mpegaudio_select="mpegaudiodsp"
mpegaudiodsp_select="dct"
-mpegvideo_select="dsputil hpeldsp videodsp"
+mpegvideo_select="blockdsp dsputil hpeldsp videodsp"
mpegvideoenc_select="dsputil mpegvideo qpeldsp"
# decoders / encoders
amrwb_decoder_select="lsp"
amv_decoder_select="sp5x_decoder"
ape_decoder_select="dsputil"
-asv1_decoder_select="dsputil"
+asv1_decoder_select="blockdsp dsputil"
asv1_encoder_select="dsputil"
-asv2_decoder_select="dsputil"
+asv2_decoder_select="blockdsp dsputil"
asv2_encoder_select="dsputil"
atrac1_decoder_select="mdct sinewin"
atrac3_decoder_select="mdct"
atrac3p_decoder_select="mdct sinewin"
-bink_decoder_select="dsputil hpeldsp"
+bink_decoder_select="blockdsp hpeldsp"
binkaudio_dct_decoder_select="mdct rdft dct sinewin"
binkaudio_rdft_decoder_select="mdct rdft sinewin"
-cavs_decoder_select="dsputil golomb h264chroma qpeldsp videodsp"
+cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp"
cllc_decoder_select="dsputil"
comfortnoise_encoder_select="lpc"
cook_decoder_select="dsputil mdct sinewin"
cscd_decoder_select="lzo"
cscd_decoder_suggest="zlib"
dca_decoder_select="mdct"
-dnxhd_decoder_select="dsputil"
-dnxhd_encoder_select="aandcttables dsputil mpegvideoenc"
+dnxhd_decoder_select="blockdsp dsputil"
+dnxhd_encoder_select="aandcttables blockdsp dsputil mpegvideoenc"
dvvideo_decoder_select="dsputil"
dvvideo_encoder_select="dsputil"
dxa_decoder_deps="zlib"
eac3_decoder_select="ac3_decoder"
eac3_encoder_select="ac3_encoder"
-eamad_decoder_select="aandcttables dsputil mpegvideo"
+eamad_decoder_select="aandcttables blockdsp dsputil mpegvideo"
eatgq_decoder_select="aandcttables dsputil"
-eatqi_decoder_select="aandcttables dsputil error_resilience mpegvideo"
+eatqi_decoder_select="aandcttables blockdsp dsputil error_resilience mpegvideo"
exr_decoder_deps="zlib"
ffv1_decoder_select="golomb rangecoder"
ffv1_encoder_select="rangecoder"
flashsv2_decoder_deps="zlib"
flv_decoder_select="h263_decoder"
flv_encoder_select="h263_encoder"
-fourxm_decoder_select="dsputil"
+fourxm_decoder_select="blockdsp dsputil"
fraps_decoder_select="dsputil huffman"
g2m_decoder_deps="zlib"
-g2m_decoder_select="dsputil"
+g2m_decoder_select="blockdsp dsputil"
h261_decoder_select="error_resilience mpegvideo"
h261_encoder_select="aandcttables mpegvideoenc"
h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo qpeldsp"
interplay_video_decoder_select="hpeldsp"
jpegls_decoder_select="golomb mjpeg_decoder"
jpegls_encoder_select="golomb"
-jv_decoder_select="dsputil"
+jv_decoder_select="blockdsp"
lagarith_decoder_select="huffyuvdsp"
ljpeg_encoder_select="aandcttables mpegvideoenc"
loco_decoder_select="golomb"
-mdec_decoder_select="dsputil error_resilience mpegvideo"
+mdec_decoder_select="blockdsp dsputil error_resilience mpegvideo"
metasound_decoder_select="lsp mdct sinewin"
-mimic_decoder_select="dsputil hpeldsp"
-mjpeg_decoder_select="dsputil hpeldsp"
+mimic_decoder_select="blockdsp dsputil hpeldsp"
+mjpeg_decoder_select="blockdsp dsputil hpeldsp"
mjpeg_encoder_select="aandcttables mpegvideoenc"
mjpegb_decoder_select="mjpeg_decoder"
mlp_decoder_select="mlp_parser"
utvideo_decoder_select="dsputil"
utvideo_encoder_select="dsputil huffman huffyuvencdsp"
vble_decoder_select="huffyuvdsp"
-vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp"
+vc1_decoder_select="blockdsp error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp"
vc1image_decoder_select="vc1_decoder"
vorbis_decoder_select="mdct"
vorbis_encoder_select="mdct"
wmavoice_decoder_select="lsp rdft dct mdct sinewin"
wmv1_decoder_select="h263_decoder"
wmv1_encoder_select="h263_encoder"
-wmv2_decoder_select="h263_decoder intrax8 videodsp"
+wmv2_decoder_select="blockdsp h263_decoder intrax8 videodsp"
wmv2_encoder_select="h263_encoder"
wmv3_decoder_select="vc1_decoder"
wmv3image_decoder_select="wmv3_decoder"
#include "libavutil/imgutils.h"
#include "libavutil/intreadwrite.h"
#include "avcodec.h"
+#include "blockdsp.h"
#include "bytestream.h"
#include "dsputil.h"
#include "get_bits.h"
typedef struct FourXContext {
AVCodecContext *avctx;
DSPContext dsp;
+ BlockDSPContext bdsp;
uint16_t *frame_buffer;
uint16_t *last_frame_buffer;
GetBitContext pre_gb; ///< ac/dc prefix
int ret;
int i;
- f->dsp.clear_blocks(f->block[0]);
+ f->bdsp.clear_blocks(f->block[0]);
for (i = 0; i < 6; i++)
if ((ret = decode_i_block(f, f->block[i])) < 0)
}
f->version = AV_RL32(avctx->extradata) >> 16;
+ ff_blockdsp_init(&f->bdsp, avctx);
ff_dsputil_init(&f->dsp, avctx);
f->avctx = avctx;
init_vlcs(f);
OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o
OBJS-$(CONFIG_AC3DSP) += ac3dsp.o
OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o
+OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o
OBJS-$(CONFIG_CABAC) += cabac.o
OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o
OBJS-$(CONFIG_DXVA2) += dxva2.o
OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \
arm/ac3dsp_arm.o
+OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o
OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \
arm/dsputil_arm.o \
arm/jrevdct_arm.o \
NEON-OBJS += arm/fmtconvert_neon.o
NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o
+NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \
+ arm/blockdsp_neon.o
NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \
arm/dsputil_neon.o \
arm/int_neon.o \
--- /dev/null
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_BLOCKDSP_ARM_H
+#define AVCODEC_ARM_BLOCKDSP_ARM_H
+
+#include "libavcodec/blockdsp.h"
+
+void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth);
+
+#endif /* AVCODEC_ARM_BLOCKDSP_ARM_H */
--- /dev/null
+/*
+ * ARM optimized block operations
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/blockdsp.h"
+#include "blockdsp_arm.h"
+
+av_cold void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_neon(cpu_flags))
+ ff_blockdsp_init_neon(c, high_bit_depth);
+}
--- /dev/null
+/*
+ * ARM NEON optimised block operations
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavcodec/blockdsp.h"
+#include "blockdsp_arm.h"
+
+void ff_clear_block_neon(int16_t *block);
+void ff_clear_blocks_neon(int16_t *blocks);
+
+av_cold void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth)
+{
+ if (!high_bit_depth) {
+ c->clear_block = ff_clear_block_neon;
+ c->clear_blocks = ff_clear_blocks_neon;
+ }
+}
--- /dev/null
+/*
+ * ARM NEON optimised block functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_clear_block_neon, export=1
+ vmov.i16 q0, #0
+ .rept 8
+ vst1.16 {q0}, [r0,:128]!
+ .endr
+ bx lr
+endfunc
+
+function ff_clear_blocks_neon, export=1
+ vmov.i16 q0, #0
+ .rept 8*6
+ vst1.16 {q0}, [r0,:128]!
+ .endr
+ bx lr
+endfunc
void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data);
void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);
-void ff_clear_block_neon(int16_t *block);
-void ff_clear_blocks_neon(int16_t *blocks);
-
void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
c->put_pixels_clamped = ff_put_pixels_clamped_neon;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
- if (!high_bit_depth) {
- c->clear_block = ff_clear_block_neon;
- c->clear_blocks = ff_clear_blocks_neon;
- }
-
c->vector_clipf = ff_vector_clipf_neon;
c->vector_clip_int32 = ff_vector_clip_int32_neon;
#include "libavutil/arm/asm.S"
-function ff_clear_block_neon, export=1
- vmov.i16 q0, #0
- .rept 8
- vst1.16 {q0}, [r0,:128]!
- .endr
- bx lr
-endfunc
-
-function ff_clear_blocks_neon, export=1
- vmov.i16 q0, #0
- .rept 8*6
- vst1.16 {q0}, [r0,:128]!
- .endr
- bx lr
-endfunc
-
function ff_put_pixels_clamped_neon, export=1
vld1.16 {d16-d19}, [r0,:128]!
vqmovun.s16 d0, q8
#include "libavutil/mem.h"
#include "avcodec.h"
+#include "blockdsp.h"
#include "dsputil.h"
#include "get_bits.h"
#include "put_bits.h"
typedef struct ASV1Context{
AVCodecContext *avctx;
+ BlockDSPContext bdsp;
DSPContext dsp;
PutBitContext pb;
GetBitContext gb;
#include "asv.h"
#include "avcodec.h"
+#include "blockdsp.h"
#include "put_bits.h"
#include "internal.h"
#include "mathops.h"
{
int i;
- a->dsp.clear_blocks(block[0]);
+ a->bdsp.clear_blocks(block[0]);
if (a->avctx->codec_id == AV_CODEC_ID_ASV1) {
for (i = 0; i < 6; i++) {
}
ff_asv_common_init(avctx);
+ ff_blockdsp_init(&a->bdsp, avctx);
init_vlcs(a);
ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_asv_scantab);
avctx->pix_fmt = AV_PIX_FMT_YUV420P;
#include "libavutil/imgutils.h"
#include "libavutil/internal.h"
#include "avcodec.h"
-#include "dsputil.h"
#include "binkdata.h"
#include "binkdsp.h"
+#include "blockdsp.h"
#include "hpeldsp.h"
#include "internal.h"
#include "mathops.h"
*/
typedef struct BinkContext {
AVCodecContext *avctx;
- DSPContext dsp;
+ BlockDSPContext bdsp;
HpelDSPContext hdsp;
BinkDSPContext binkdsp;
AVFrame *last;
} else {
put_pixels8x8_overlapped(dst, ref, stride);
}
- c->dsp.clear_block(block);
+ c->bdsp.clear_block(block);
v = binkb_get_value(c, BINKB_SRC_INTER_COEFS);
read_residue(gb, block, v);
c->binkdsp.add_pixels8(dst, block, stride);
break;
case 5:
v = binkb_get_value(c, BINKB_SRC_COLORS);
- c->dsp.fill_block_tab[1](dst, v, stride, 8);
+ c->bdsp.fill_block_tab[1](dst, v, stride, 8);
break;
case 6:
for (i = 0; i < 2; i++)
break;
case FILL_BLOCK:
v = get_value(c, BINK_SRC_COLORS);
- c->dsp.fill_block_tab[0](dst, v, stride, 16);
+ c->bdsp.fill_block_tab[0](dst, v, stride, 16);
break;
case PATTERN_BLOCK:
for (i = 0; i < 2; i++)
return AVERROR_INVALIDDATA;
}
c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8);
- c->dsp.clear_block(block);
+ c->bdsp.clear_block(block);
v = get_bits(gb, 7);
read_residue(gb, block, v);
c->binkdsp.add_pixels8(dst, block, stride);
break;
case FILL_BLOCK:
v = get_value(c, BINK_SRC_COLORS);
- c->dsp.fill_block_tab[1](dst, v, stride, 8);
+ c->bdsp.fill_block_tab[1](dst, v, stride, 8);
break;
case INTER_BLOCK:
xoff = get_value(c, BINK_SRC_X_OFF);
avctx->pix_fmt = c->has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P;
- ff_dsputil_init(&c->dsp, avctx);
+ ff_blockdsp_init(&c->bdsp, avctx);
ff_hpeldsp_init(&c->hdsp, avctx->flags);
ff_binkdsp_init(&c->binkdsp);
--- /dev/null
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "avcodec.h"
+#include "blockdsp.h"
+#include "version.h"
+
+static void clear_block_8_c(int16_t *block)
+{
+ memset(block, 0, sizeof(int16_t) * 64);
+}
+
+static void clear_blocks_8_c(int16_t *blocks)
+{
+ memset(blocks, 0, sizeof(int16_t) * 6 * 64);
+}
+
+static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
+{
+ int i;
+
+ for (i = 0; i < h; i++) {
+ memset(block, value, 16);
+ block += line_size;
+ }
+}
+
+static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
+{
+ int i;
+
+ for (i = 0; i < h; i++) {
+ memset(block, value, 8);
+ block += line_size;
+ }
+}
+
+av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx)
+{
+ const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
+
+ c->clear_block = clear_block_8_c;
+ c->clear_blocks = clear_blocks_8_c;
+
+ c->fill_block_tab[0] = fill_block16_c;
+ c->fill_block_tab[1] = fill_block8_c;
+
+ if (ARCH_ARM)
+ ff_blockdsp_init_arm(c, high_bit_depth);
+ if (ARCH_PPC)
+ ff_blockdsp_init_ppc(c, high_bit_depth);
+ if (ARCH_X86)
+#if FF_API_XVMC
+ ff_blockdsp_init_x86(c, high_bit_depth, avctx);
+#else
+ ff_blockdsp_init_x86(c, high_bit_depth);
+#endif /* FF_API_XVMC */
+}
--- /dev/null
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_BLOCKDSP_H
+#define AVCODEC_BLOCKDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+#include "version.h"
+
+/* add and put pixel (decoding)
+ * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
+ * h for op_pixels_func is limited to { width / 2, width },
+ * but never larger than 16 and never smaller than 4. */
+typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */,
+ uint8_t value, int line_size, int h);
+
+typedef struct BlockDSPContext {
+ void (*clear_block)(int16_t *block /* align 16 */);
+ void (*clear_blocks)(int16_t *blocks /* align 16 */);
+
+ op_fill_func fill_block_tab[2];
+} BlockDSPContext;
+
+void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx);
+
+void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth);
+void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth);
+#if FF_API_XVMC
+void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
+ AVCodecContext *avctx);
+#else
+void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth);
+#endif /* FF_API_XVMC */
+
+#endif /* AVCODEC_BLOCKDSP_H */
{
AVSContext *h = avctx->priv_data;
+ ff_blockdsp_init(&h->bdsp, avctx);
ff_dsputil_init(&h->dsp, avctx);
ff_h264chroma_init(&h->h264chroma, 8);
ff_videodsp_init(&h->vdsp, 8);
#define AVCODEC_CAVS_H
#include "cavsdsp.h"
+#include "blockdsp.h"
#include "dsputil.h"
#include "h264chroma.h"
#include "get_bits.h"
typedef struct AVSContext {
AVCodecContext *avctx;
DSPContext dsp;
+ BlockDSPContext bdsp;
H264ChromaContext h264chroma;
VideoDSPContext vdsp;
CAVSDSPContext cdsp;
dequant_shift[qp], i)) < 0)
return ret;
h->cdsp.cavs_idct8_add(dst, block, stride);
- h->dsp.clear_block(block);
+ h->bdsp.clear_block(block);
return 0;
}
#include "libavutil/imgutils.h"
#include "libavutil/timer.h"
#include "avcodec.h"
+#include "blockdsp.h"
#include "get_bits.h"
#include "dnxhddata.h"
#include "dsputil.h"
typedef struct DNXHDContext {
AVCodecContext *avctx;
GetBitContext gb;
+ BlockDSPContext bdsp;
int cid; ///< compression id
unsigned int width, height;
unsigned int mb_width, mb_height;
ctx->avctx->pix_fmt = AV_PIX_FMT_YUV444P10;
ctx->avctx->bits_per_raw_sample = 10;
if (ctx->bit_depth != 10) {
+ ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
ff_dsputil_init(&ctx->dsp, ctx->avctx);
ctx->bit_depth = 10;
ctx->decode_dct_block = dnxhd_decode_dct_block_10_444;
ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P10;
ctx->avctx->bits_per_raw_sample = 10;
if (ctx->bit_depth != 10) {
+ ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
ff_dsputil_init(&ctx->dsp, ctx->avctx);
ctx->bit_depth = 10;
ctx->decode_dct_block = dnxhd_decode_dct_block_10;
ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P;
ctx->avctx->bits_per_raw_sample = 8;
if (ctx->bit_depth != 8) {
+ ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
ff_dsputil_init(&ctx->dsp, ctx->avctx);
ctx->bit_depth = 8;
ctx->decode_dct_block = dnxhd_decode_dct_block_8;
skip_bits1(&ctx->gb);
for (i = 0; i < 8; i++) {
- ctx->dsp.clear_block(ctx->blocks[i]);
+ ctx->bdsp.clear_block(ctx->blocks[i]);
ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
}
if (ctx->is_444) {
for (; i < 12; i++) {
- ctx->dsp.clear_block(ctx->blocks[i]);
+ ctx->bdsp.clear_block(ctx->blocks[i]);
ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
}
}
#include "libavutil/timer.h"
#include "avcodec.h"
+#include "blockdsp.h"
#include "dsputil.h"
#include "internal.h"
#include "mpegvideo.h"
avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
+ ff_blockdsp_init(&ctx->bdsp, avctx);
ff_dsputil_init(&ctx->m.dsp, avctx);
ff_dct_common_init(&ctx->m);
if (!ctx->m.dct_quantize)
ptr_v + ctx->dct_uv_offset,
ctx->m.uvlinesize);
} else {
- dsp->clear_block(ctx->blocks[4]);
- dsp->clear_block(ctx->blocks[5]);
- dsp->clear_block(ctx->blocks[6]);
- dsp->clear_block(ctx->blocks[7]);
+ ctx->bdsp.clear_block(ctx->blocks[4]);
+ ctx->bdsp.clear_block(ctx->blocks[5]);
+ ctx->bdsp.clear_block(ctx->blocks[6]);
+ ctx->bdsp.clear_block(ctx->blocks[7]);
}
} else {
dsp->get_pixels(ctx->blocks[4],
typedef struct DNXHDEncContext {
AVClass *class;
+ BlockDSPContext bdsp;
MpegEncContext m; ///< Used for quantization dsp functions
int cid;
return sum;
}
-static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
-{
- int i;
-
- for (i = 0; i < h; i++) {
- memset(block, value, 16);
- block += line_size;
- }
-}
-
-static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
-{
- int i;
-
- for (i = 0; i < h; i++) {
- memset(block, value, 8);
- block += line_size;
- }
-}
-
#define avg2(a, b) ((a + b + 1) >> 1)
#define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2)
memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
}
-static void clear_block_8_c(int16_t *block)
-{
- memset(block, 0, sizeof(int16_t) * 64);
-}
-
-static void clear_blocks_8_c(int16_t *blocks)
-{
- memset(blocks, 0, sizeof(int16_t) * 6 * 64);
-}
-
/* init static data */
av_cold void ff_dsputil_static_init(void)
{
c->pix_sum = pix_sum_c;
c->pix_norm1 = pix_norm1_c;
- c->fill_block_tab[0] = fill_block16_c;
- c->fill_block_tab[1] = fill_block8_c;
-
/* TODO [0] 16 [1] 8 */
c->pix_abs[0][0] = pix_abs16_c;
c->pix_abs[0][1] = pix_abs16_x2_c;
c->draw_edges = draw_edges_8_c;
- c->clear_block = clear_block_8_c;
- c->clear_blocks = clear_blocks_8_c;
-
switch (avctx->bits_per_raw_sample) {
case 9:
case 10:
int dxx, int dxy, int dyx, int dyy, int shift, int r,
int width, int height);
-/* minimum alignment rules ;)
- * If you notice errors in the align stuff, need more alignment for some ASM code
- * for some CPU or need to use a function with less aligned data then send a mail
- * to the libav-devel mailing list, ...
- *
- * !warning These alignments might not match reality, (missing attribute((align))
- * stuff somewhere possible).
- * I (Michael) did not check them, these are just the alignments which I think
- * could be reached easily ...
- *
- * !future video codecs might need functions with less strict alignment
- */
-
-/* add and put pixel (decoding)
- * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
- * h for op_pixels_func is limited to { width / 2, width },
- * but never larger than 16 and never smaller than 4. */
-typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */,
- uint8_t value, int line_size, int h);
-
struct MpegEncContext;
/* Motion estimation:
* h is limited to { width / 2, width, 2 * width },
int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy,
int shift, int r, int width, int height);
- void (*clear_block)(int16_t *block /* align 16 */);
- void (*clear_blocks)(int16_t *blocks /* align 16 */);
+
int (*pix_sum)(uint8_t *pix, int line_size);
int (*pix_norm1)(uint8_t *pix, int line_size);
*/
void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len);
-
- op_fill_func fill_block_tab[2];
} DSPContext;
void ff_dsputil_static_init(void);
typedef struct MadContext {
AVCodecContext *avctx;
+ BlockDSPContext bdsp;
DSPContext dsp;
AVFrame *last_frame;
GetBitContext gb;
MadContext *s = avctx->priv_data;
s->avctx = avctx;
avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+ ff_blockdsp_init(&s->bdsp, avctx);
ff_dsputil_init(&s->dsp, avctx);
ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM);
ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
int add = 2*decode_motion(&s->gb);
comp_block(s, frame, s->mb_x, s->mb_y, j, mv_x, mv_y, add);
} else {
- s->dsp.clear_block(s->block);
+ s->bdsp.clear_block(s->block);
decode_block_intra(s, s->block);
idct_put(s, frame, s->block, s->mb_x, s->mb_y, j);
}
*/
#include "avcodec.h"
+#include "blockdsp.h"
#include "get_bits.h"
#include "aandcttab.h"
#include "eaidct.h"
TqiContext *t = avctx->priv_data;
MpegEncContext *s = &t->s;
s->avctx = avctx;
+ ff_blockdsp_init(&s->bdsp, avctx);
ff_dsputil_init(&s->dsp, avctx);
ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM);
ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
static int tqi_decode_mb(MpegEncContext *s, int16_t (*block)[64])
{
int n;
- s->dsp.clear_blocks(block[0]);
+ s->bdsp.clear_blocks(block[0]);
for (n=0; n<6; n++)
if (ff_mpeg1_decode_block_intra(s, block[n], n) < 0)
return -1;
#include "libavutil/intreadwrite.h"
#include "avcodec.h"
+#include "blockdsp.h"
#include "bytestream.h"
#include "dsputil.h"
#include "get_bits.h"
};
typedef struct JPGContext {
+ BlockDSPContext bdsp;
DSPContext dsp;
ScanTable scantable;
if (ret)
return ret;
+ ff_blockdsp_init(&c->bdsp, avctx);
ff_dsputil_init(&c->dsp, avctx);
ff_init_scantable(c->dsp.idct_permutation, &c->scantable,
ff_zigzag_direct);
const int is_chroma = !!plane;
const uint8_t *qmat = is_chroma ? chroma_quant : luma_quant;
- c->dsp.clear_block(block);
+ c->bdsp.clear_block(block);
dc = get_vlc2(gb, c->dc_vlc[is_chroma].table, 9, 3);
if (dc < 0)
return AVERROR_INVALIDDATA;
for (i = 0; i < 3; i++)
c->prev_dc[i] = 1024;
bx = by = 0;
- c->dsp.clear_blocks(c->block[0]);
+ c->bdsp.clear_blocks(c->block[0]);
for (mb_y = 0; mb_y < mb_h; mb_y++) {
for (mb_x = 0; mb_x < mb_w; mb_x++) {
if (mask && !mask[mb_x * 2] && !mask[mb_x * 2 + 1] &&
intra:
/* decode each block */
if (s->mb_intra || HAS_CBP(h->mtype)) {
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
for (i = 0; i < 6; i++) {
if (h261_decode_block(h, s->block[i], i, cbp & 32) < 0)
return SLICE_ERROR;
for (i = 0; i < 6; i++) {
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
s->block_last_index[i]= -1;
- s->dsp.clear_block(s->block[i]);
+ s->bdsp.clear_block(s->block[i]);
}
}
}else{
int sign;
assert(w->orient<12);
- s->dsp.clear_block(s->block[0]);
+ s->bdsp.clear_block(s->block[0]);
if(chroma){
dc_mode=2;
rl = &ff_rl_intra_aic;
i = 0;
s->gb= gb;
- s->dsp.clear_block(block);
+ s->bdsp.clear_block(block);
goto retry;
}
av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra);
}
}while(cbpc == 20);
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
dquant = cbpc & 8;
s->mb_intra = ((cbpc & 4) != 0);
s->mb_intra = IS_INTRA(mb_type);
if(HAS_CBP(mb_type)){
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
cbpc = get_vlc2(&s->gb, cbpc_b_vlc.table, CBPC_B_VLC_BITS, 1);
if(s->mb_intra){
dquant = IS_QUANT(mb_type);
}
}while(cbpc == 8);
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
dquant = cbpc & 4;
s->mb_intra = 1;
#include "libavutil/intreadwrite.h"
#include "avcodec.h"
-#include "dsputil.h"
+#include "blockdsp.h"
#include "get_bits.h"
#include "internal.h"
typedef struct JvContext {
- DSPContext dsp;
+ BlockDSPContext bdsp;
AVFrame *frame;
uint32_t palette[AVPALETTE_COUNT];
int palette_has_changed;
return AVERROR(ENOMEM);
avctx->pix_fmt = AV_PIX_FMT_PAL8;
- ff_dsputil_init(&s->dsp, avctx);
+ ff_blockdsp_init(&s->bdsp, avctx);
return 0;
}
* Decode 8x8 block
*/
static inline void decode8x8(GetBitContext *gb, uint8_t *dst, int linesize,
- DSPContext *dsp)
+ BlockDSPContext *bdsp)
{
int i, j, v[2];
switch (get_bits(gb, 2)) {
case 1:
v[0] = get_bits(gb, 8);
- dsp->fill_block_tab[1](dst, v[0], linesize, 8);
+ bdsp->fill_block_tab[1](dst, v[0], linesize, 8);
break;
case 2:
v[0] = get_bits(gb, 8);
for (i = 0; i < avctx->width; i += 8)
decode8x8(&gb,
s->frame->data[0] + j * s->frame->linesize[0] + i,
- s->frame->linesize[0], &s->dsp);
+ s->frame->linesize[0], &s->bdsp);
buf += video_size;
} else if (video_type == 2) {
*/
#include "avcodec.h"
+#include "blockdsp.h"
#include "mpegvideo.h"
#include "mpeg12.h"
#include "thread.h"
typedef struct MDECContext {
AVCodecContext *avctx;
+ BlockDSPContext bdsp;
DSPContext dsp;
ThreadFrame frame;
GetBitContext gb;
int i, ret;
const int block_index[6] = { 5, 4, 0, 1, 2, 3 };
- a->dsp.clear_blocks(block[0]);
+ a->bdsp.clear_blocks(block[0]);
for (i = 0; i < 6; i++) {
if ((ret = mdec_decode_block_intra(a, block[block_index[i]],
a->avctx = avctx;
+ ff_blockdsp_init(&a->bdsp, avctx);
ff_dsputil_init(&a->dsp, avctx);
ff_mpeg12_init_vlcs();
ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct);
#include <stdint.h>
#include "avcodec.h"
+#include "blockdsp.h"
#include "internal.h"
#include "get_bits.h"
#include "bytestream.h"
GetBitContext gb;
ScanTable scantable;
+ BlockDSPContext bdsp;
DSPContext dsp;
HpelDSPContext hdsp;
VLC vlc;
av_log(avctx, AV_LOG_ERROR, "error initializing vlc table\n");
return ret;
}
+ ff_blockdsp_init(&ctx->bdsp, avctx);
ff_dsputil_init(&ctx->dsp, avctx);
ff_hpeldsp_init(&ctx->hdsp, avctx->flags);
ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, col_zag);
int16_t *block = ctx->dct_block;
unsigned int pos;
- ctx->dsp.clear_block(block);
+ ctx->bdsp.clear_block(block);
block[0] = get_bits(&ctx->gb, 8) << 3;
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
#include "avcodec.h"
+#include "blockdsp.h"
#include "internal.h"
#include "mjpeg.h"
#include "mjpegdec.h"
}
s->avctx = avctx;
+ ff_blockdsp_init(&s->bdsp, avctx);
ff_hpeldsp_init(&s->hdsp, avctx->flags);
ff_dsputil_init(&s->dsp, avctx);
ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
int16_t *quant_matrix, int Al)
{
int val;
- s->dsp.clear_block(block);
+ s->bdsp.clear_block(block);
val = mjpeg_decode_dc(s, dc_index);
if (val == 0xffff) {
av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
reference_data[c] + block_offset,
linesize[c], 8);
else {
- s->dsp.clear_block(s->block);
+ s->bdsp.clear_block(s->block);
if (decode_block(s, s->block, i,
s->dc_index[i], s->ac_index[i],
s->quant_matrixes[s->quant_index[c]]) < 0) {
#include "libavutil/pixdesc.h"
#include "avcodec.h"
+#include "blockdsp.h"
#include "get_bits.h"
#include "dsputil.h"
#include "hpeldsp.h"
uint8_t *last_nnz[MAX_COMPONENTS];
uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
ScanTable scantable;
+ BlockDSPContext bdsp;
DSPContext dsp;
HpelDSPContext hdsp;
av_dlog(s->avctx, "mb_type=%x\n", mb_type);
// motion_type = 0; /* avoid warning */
if (IS_INTRA(mb_type)) {
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
if (!s->chroma_y_shift)
- s->dsp.clear_blocks(s->block[6]);
+ s->bdsp.clear_blocks(s->block[6]);
/* compute DCT type */
// FIXME: add an interlaced_dct coded var?
s->mb_intra = 0;
if (HAS_CBP(mb_type)) {
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
cbp = get_vlc2(&s->gb, ff_mb_pat_vlc.table, MB_PAT_VLC_BITS, 1);
if (mb_block_count > 6) {
cbp <<= mb_block_count - 6;
cbp |= get_bits(&s->gb, mb_block_count - 6);
- s->dsp.clear_blocks(s->block[6]);
+ s->bdsp.clear_blocks(s->block[6]);
}
if (cbp <= 0) {
av_log(s->avctx, AV_LOG_ERROR,
if (!IS_SKIP(mb_type)) {
int i;
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
/* decode each block */
for (i = 0; i < 6; i++) {
if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, s->mb_intra, ctx->rvlc) < 0) {
}
} while (cbpc == 20);
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
dquant = cbpc & 8;
s->mb_intra = ((cbpc & 4) != 0);
if (s->mb_intra)
if (modb2) {
cbp = 0;
} else {
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
cbp = get_bits(&s->gb, 6);
}
if (!s->progressive_sequence)
s->interlaced_dct = get_bits1(&s->gb);
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
/* decode each block */
for (i = 0; i < 6; i++) {
if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, 1, 0) < 0)
for (i = 0; i < 6; i++) {
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i)) & 1) == 0) {
s->block_last_index[i] = -1;
- s->dsp.clear_block(s->block[i]);
+ s->bdsp.clear_block(s->block[i]);
}
}
} else {
#include "libavutil/internal.h"
#include "libavutil/timer.h"
#include "avcodec.h"
+#include "blockdsp.h"
#include "dsputil.h"
#include "internal.h"
#include "mathops.h"
ff_init_block_index(s);
ff_update_block_index(s);
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
s->dest[0] = s->current_picture.f->data[0] + (s->mb_y * 16 * s->linesize) + s->mb_x * 16;
s->dest[1] = s->current_picture.f->data[1] + (s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16 >> s->chroma_x_shift);
/* init common dct for both encoder and decoder */
av_cold int ff_dct_common_init(MpegEncContext *s)
{
+ ff_blockdsp_init(&s->bdsp, s->avctx);
ff_dsputil_init(&s->dsp, s->avctx);
ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
#define AVCODEC_MPEGVIDEO_H
#include "avcodec.h"
+#include "blockdsp.h"
#include "dsputil.h"
#include "error_resilience.h"
#include "get_bits.h"
int unrestricted_mv; ///< mv can point outside of the coded picture
int h263_long_vectors; ///< use horrible h263v1 long vector mode
+ BlockDSPContext bdsp;
DSPContext dsp; ///< pointers for accelerated dsp functions
HpelDSPContext hdsp;
QpelDSPContext qdsp;
}
}
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
for (i = 0; i < 6; i++) {
if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{
}
}
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
for (i = 0; i < 6; i++) {
if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{
OBJS += ppc/fmtconvert_altivec.o \
+OBJS-$(CONFIG_BLOCKDSP) += ppc/blockdsp.o
OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o
OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o
OBJS-$(CONFIG_H264CHROMA) += ppc/h264chroma_init.o
--- /dev/null
+/*
+ * Copyright (c) 2002 Brian Foley
+ * Copyright (c) 2002 Dieter Shirley
+ * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+#include <string.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/ppc/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavcodec/blockdsp.h"
+
+/* ***** WARNING ***** WARNING ***** WARNING ***** */
+/*
+ * clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with
+ * a cache line size not equal to 32 bytes. Fortunately all processors used
+ * by Apple up to at least the 7450 (AKA second generation G4) use 32-byte
+ * cache lines. This is due to the use of the 'dcbz' instruction. It simply
+ * clears a single cache line to zero, so you need to know the cache line
+ * size to use it! It's absurd, but it's fast...
+ *
+ * update 24/06/2003: Apple released the G5 yesterday, with a PPC970.
+ * cache line size: 128 bytes. Oups.
+ * The semantics of dcbz was changed, it always clears 32 bytes. So the function
+ * below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
+ * which is defined to clear a cache line (as dcbz before). So we can still
+ * distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
+ *
+ * see <http://developer.apple.com/technotes/tn/tn2087.html>
+ * and <http://developer.apple.com/technotes/tn/tn2086.html>
+ */
+static void clear_blocks_dcbz32_ppc(int16_t *blocks)
+{
+ register int misal = (unsigned long) blocks & 0x00000010, i = 0;
+
+ if (misal) {
+ ((unsigned long *) blocks)[0] = 0L;
+ ((unsigned long *) blocks)[1] = 0L;
+ ((unsigned long *) blocks)[2] = 0L;
+ ((unsigned long *) blocks)[3] = 0L;
+ i += 16;
+ }
+ for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32)
+ __asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory");
+ if (misal) {
+ ((unsigned long *) blocks)[188] = 0L;
+ ((unsigned long *) blocks)[189] = 0L;
+ ((unsigned long *) blocks)[190] = 0L;
+ ((unsigned long *) blocks)[191] = 0L;
+ i += 16;
+ }
+}
+
+/* Same as above, when dcbzl clears a whole 128 bytes cache line
+ * i.e. the PPC970 AKA G5. */
+static void clear_blocks_dcbz128_ppc(int16_t *blocks)
+{
+#if HAVE_DCBZL
+ register int misal = (unsigned long) blocks & 0x0000007f, i = 0;
+
+ if (misal) {
+ /* We could probably also optimize this case,
+ * but there's not much point as the machines
+ * aren't available yet (2003-06-26). */
+ memset(blocks, 0, sizeof(int16_t) * 6 * 64);
+ } else {
+ for (; i < sizeof(int16_t) * 6 * 64; i += 128)
+ __asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory");
+ }
+#else
+ memset(blocks, 0, sizeof(int16_t) * 6 * 64);
+#endif
+}
+
+/* Check dcbz report how many bytes are set to 0 by dcbz. */
+/* update 24/06/2003: Replace dcbz by dcbzl to get the intended effect
+ * (Apple "fixed" dcbz). Unfortunately this cannot be used unless the
+ * assembler knows about dcbzl ... */
+static long check_dcbzl_effect(void)
+{
+ long count = 0;
+#if HAVE_DCBZL
+ register char *fakedata = av_malloc(1024);
+ register char *fakedata_middle;
+ register long zero = 0, i = 0;
+
+ if (!fakedata)
+ return 0L;
+
+ fakedata_middle = fakedata + 512;
+
+ memset(fakedata, 0xFF, 1024);
+
+ /* Below the constraint "b" seems to mean "address base register"
+ * in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */
+ __asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero));
+
+ for (i = 0; i < 1024; i++)
+ if (fakedata[i] == (char) 0)
+ count++;
+
+ av_free(fakedata);
+#endif
+
+ return count;
+}
+
+#if HAVE_ALTIVEC
+static void clear_block_altivec(int16_t *block)
+{
+ LOAD_ZERO;
+ vec_st(zero_s16v, 0, block);
+ vec_st(zero_s16v, 16, block);
+ vec_st(zero_s16v, 32, block);
+ vec_st(zero_s16v, 48, block);
+ vec_st(zero_s16v, 64, block);
+ vec_st(zero_s16v, 80, block);
+ vec_st(zero_s16v, 96, block);
+ vec_st(zero_s16v, 112, block);
+}
+#endif /* HAVE_ALTIVEC */
+
+av_cold void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth)
+{
+ // common optimizations whether AltiVec is available or not
+ if (!high_bit_depth) {
+ switch (check_dcbzl_effect()) {
+ case 32:
+ c->clear_blocks = clear_blocks_dcbz32_ppc;
+ break;
+ case 128:
+ c->clear_blocks = clear_blocks_dcbz128_ppc;
+ break;
+ default:
+ break;
+ }
+ }
+
+#if HAVE_ALTIVEC
+ if (!PPC_ALTIVEC(av_get_cpu_flags()))
+ return;
+
+ if (!high_bit_depth)
+ c->clear_block = clear_block_altivec;
+#endif /* HAVE_ALTIVEC */
+}
}
}
-static void clear_block_altivec(int16_t *block)
-{
- LOAD_ZERO;
- vec_st(zero_s16v, 0, block);
- vec_st(zero_s16v, 16, block);
- vec_st(zero_s16v, 32, block);
- vec_st(zero_s16v, 48, block);
- vec_st(zero_s16v, 64, block);
- vec_st(zero_s16v, 80, block);
- vec_st(zero_s16v, 96, block);
- vec_st(zero_s16v, 112, block);
-}
-
static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst,
uint8_t *src, int stride, int h)
{
if (!high_bit_depth) {
c->get_pixels = get_pixels_altivec;
- c->clear_block = clear_block_altivec;
}
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
-#include "libavutil/mem.h"
#include "libavutil/ppc/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
#include "dsputil_altivec.h"
-/* ***** WARNING ***** WARNING ***** WARNING ***** */
-/*
- * clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with
- * a cache line size not equal to 32 bytes. Fortunately all processors used
- * by Apple up to at least the 7450 (AKA second generation G4) use 32-byte
- * cache lines. This is due to the use of the 'dcbz' instruction. It simply
- * clears a single cache line to zero, so you need to know the cache line
- * size to use it! It's absurd, but it's fast...
- *
- * update 24/06/2003: Apple released the G5 yesterday, with a PPC970.
- * cache line size: 128 bytes. Oups.
- * The semantics of dcbz was changed, it always clears 32 bytes. So the function
- * below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
- * which is defined to clear a cache line (as dcbz before). So we can still
- * distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
- *
- * see <http://developer.apple.com/technotes/tn/tn2087.html>
- * and <http://developer.apple.com/technotes/tn/tn2086.html>
- */
-static void clear_blocks_dcbz32_ppc(int16_t *blocks)
-{
- register int misal = (unsigned long) blocks & 0x00000010, i = 0;
-
- if (misal) {
- ((unsigned long *) blocks)[0] = 0L;
- ((unsigned long *) blocks)[1] = 0L;
- ((unsigned long *) blocks)[2] = 0L;
- ((unsigned long *) blocks)[3] = 0L;
- i += 16;
- }
- for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32)
- __asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory");
- if (misal) {
- ((unsigned long *) blocks)[188] = 0L;
- ((unsigned long *) blocks)[189] = 0L;
- ((unsigned long *) blocks)[190] = 0L;
- ((unsigned long *) blocks)[191] = 0L;
- i += 16;
- }
-}
-
-/* Same as above, when dcbzl clears a whole 128 bytes cache line
- * i.e. the PPC970 AKA G5. */
-static void clear_blocks_dcbz128_ppc(int16_t *blocks)
-{
-#if HAVE_DCBZL
- register int misal = (unsigned long) blocks & 0x0000007f, i = 0;
-
- if (misal) {
- /* We could probably also optimize this case,
- * but there's not much point as the machines
- * aren't available yet (2003-06-26). */
- memset(blocks, 0, sizeof(int16_t) * 6 * 64);
- } else {
- for (; i < sizeof(int16_t) * 6 * 64; i += 128)
- __asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory");
- }
-#else
- memset(blocks, 0, sizeof(int16_t) * 6 * 64);
-#endif
-}
-
-/* Check dcbz report how many bytes are set to 0 by dcbz. */
-/* update 24/06/2003: Replace dcbz by dcbzl to get the intended effect
- * (Apple "fixed" dcbz). Unfortunately this cannot be used unless the
- * assembler knows about dcbzl ... */
-static long check_dcbzl_effect(void)
-{
- long count = 0;
-#if HAVE_DCBZL
- register char *fakedata = av_malloc(1024);
- register char *fakedata_middle;
- register long zero = 0, i = 0;
-
- if (!fakedata)
- return 0L;
-
- fakedata_middle = fakedata + 512;
-
- memset(fakedata, 0xFF, 1024);
-
- /* Below the constraint "b" seems to mean "address base register"
- * in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */
- __asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero));
-
- for (i = 0; i < 1024; i++)
- if (fakedata[i] == (char) 0)
- count++;
-
- av_free(fakedata);
-#endif
-
- return count;
-}
-
av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
- // common optimizations whether AltiVec is available or not
- if (!high_bit_depth) {
- switch (check_dcbzl_effect()) {
- case 32:
- c->clear_blocks = clear_blocks_dcbz32_ppc;
- break;
- case 128:
- c->clear_blocks = clear_blocks_dcbz128_ppc;
- break;
- default:
- break;
- }
- }
-
if (PPC_ALTIVEC(av_get_cpu_flags())) {
ff_dsputil_init_altivec(c, avctx, high_bit_depth);
ff_int_init_altivec(c, avctx);
int scale;
int q1, q2 = 0;
- s->dsp.clear_block(block);
+ s->bdsp.clear_block(block);
/* XXX: Guard against dumb values of mquant */
mquant = (mquant < 1) ? 0 : ((mquant > 31) ? 31 : mquant);
int ttblk = ttmb & 7;
int pat = 0;
- s->dsp.clear_block(block);
+ s->bdsp.clear_block(block);
if (ttmb == -1) {
ttblk = ff_vc1_ttblk_to_tt[v->tt_index][get_vlc2(gb, ff_vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)];
dst[3] = dst[2] + 8;
dst[4] = s->dest[1];
dst[5] = s->dest[2];
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
mb_pos = s->mb_x + s->mb_y * s->mb_width;
s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA;
s->current_picture.qscale_table[mb_pos] = v->pq;
for (;s->mb_x < s->mb_width; s->mb_x++) {
int16_t (*block)[64] = v->block[v->cur_blk_idx];
ff_update_block_index(s);
- s->dsp.clear_blocks(block[0]);
+ s->bdsp.clear_blocks(block[0]);
mb_pos = s->mb_x + s->mb_y * s->mb_stride;
s->current_picture.mb_type[mb_pos + v->mb_off] = MB_TYPE_INTRA;
s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0;
if (ff_vc1_init_common(v) < 0)
return -1;
+ ff_blockdsp_init(&s->bdsp, avctx);
ff_h264chroma_init(&v->h264chroma, 8);
ff_qpeldsp_init(&s->qdsp);
ff_vc1dsp_init(&v->vc1dsp);
av_cold void ff_wmv2_common_init(Wmv2Context * w){
MpegEncContext * const s= &w->s;
+ ff_blockdsp_init(&s->bdsp, s->avctx);
ff_wmv2dsp_init(&w->wdsp);
s->dsp.idct_permutation_type = w->wdsp.idct_perm;
ff_init_scantable_permutation(s->dsp.idct_permutation,
case 1:
ff_simple_idct84_add(dst , stride, block1);
ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]);
- s->dsp.clear_block(w->abt_block2[n]);
+ s->bdsp.clear_block(w->abt_block2[n]);
break;
case 2:
ff_simple_idct48_add(dst , stride, block1);
ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]);
- s->dsp.clear_block(w->abt_block2[n]);
+ s->bdsp.clear_block(w->abt_block2[n]);
break;
default:
av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n");
wmv2_pred_motion(w, &mx, &my);
if(cbp){
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
if(s->per_mb_rl_table){
s->rl_table_index = decode012(&s->gb);
s->rl_chroma_table_index = s->rl_table_index;
s->rl_chroma_table_index = s->rl_table_index;
}
- s->dsp.clear_blocks(s->block[0]);
+ s->bdsp.clear_blocks(s->block[0]);
for (i = 0; i < 6; i++) {
if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{
OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o
OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
+MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o
MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \
x86/idct_mmx_xvid.o \
x86/idct_sse2_xvid.o \
--- /dev/null
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/blockdsp.h"
+#include "libavcodec/version.h"
+
+#if HAVE_INLINE_ASM
+
+#define CLEAR_BLOCKS(name, n) \
+static void name(int16_t *blocks) \
+{ \
+ __asm__ volatile ( \
+ "pxor %%mm7, %%mm7 \n\t" \
+ "mov %1, %%"REG_a" \n\t" \
+ "1: \n\t" \
+ "movq %%mm7, (%0, %%"REG_a") \n\t" \
+ "movq %%mm7, 8(%0, %%"REG_a") \n\t" \
+ "movq %%mm7, 16(%0, %%"REG_a") \n\t" \
+ "movq %%mm7, 24(%0, %%"REG_a") \n\t" \
+ "add $32, %%"REG_a" \n\t" \
+ "js 1b \n\t" \
+ :: "r"(((uint8_t *) blocks) + 128 * n), \
+ "i"(-128 * n) \
+ : "%"REG_a); \
+}
+CLEAR_BLOCKS(clear_blocks_mmx, 6)
+CLEAR_BLOCKS(clear_block_mmx, 1)
+
+static void clear_block_sse(int16_t *block)
+{
+ __asm__ volatile (
+ "xorps %%xmm0, %%xmm0 \n"
+ "movaps %%xmm0, (%0) \n"
+ "movaps %%xmm0, 16(%0) \n"
+ "movaps %%xmm0, 32(%0) \n"
+ "movaps %%xmm0, 48(%0) \n"
+ "movaps %%xmm0, 64(%0) \n"
+ "movaps %%xmm0, 80(%0) \n"
+ "movaps %%xmm0, 96(%0) \n"
+ "movaps %%xmm0, 112(%0) \n"
+ :: "r" (block)
+ : "memory");
+}
+
+static void clear_blocks_sse(int16_t *blocks)
+{
+ __asm__ volatile (
+ "xorps %%xmm0, %%xmm0 \n"
+ "mov %1, %%"REG_a" \n"
+ "1: \n"
+ "movaps %%xmm0, (%0, %%"REG_a") \n"
+ "movaps %%xmm0, 16(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 32(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 48(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 64(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 80(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 96(%0, %%"REG_a") \n"
+ "movaps %%xmm0, 112(%0, %%"REG_a") \n"
+ "add $128, %%"REG_a" \n"
+ "js 1b \n"
+ :: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6)
+ : "%"REG_a);
+}
+
+#endif /* HAVE_INLINE_ASM */
+
+#if FF_API_XVMC
+av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
+ AVCodecContext *avctx)
+#else
+av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth)
+#endif /* FF_API_XVMC */
+{
+#if HAVE_INLINE_ASM
+ int cpu_flags = av_get_cpu_flags();
+
+ if (!high_bit_depth) {
+ if (INLINE_MMX(cpu_flags)) {
+ c->clear_block = clear_block_mmx;
+ c->clear_blocks = clear_blocks_mmx;
+ }
+
+#if FF_API_XVMC
+FF_DISABLE_DEPRECATION_WARNINGS
+ /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
+ if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)
+ return;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif /* FF_API_XVMC */
+
+ if (INLINE_SSE(cpu_flags)) {
+ c->clear_block = clear_block_sse;
+ c->clear_blocks = clear_blocks_sse;
+ }
+ }
+#endif /* HAVE_INLINE_ASM */
+}
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
-#include "libavutil/internal.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/simple_idct.h"
-#include "libavcodec/version.h"
#include "dsputil_x86.h"
#include "idct_xvid.h"
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
if (!high_bit_depth) {
- c->clear_block = ff_clear_block_mmx;
- c->clear_blocks = ff_clear_blocks_mmx;
c->draw_edges = ff_draw_edges_mmx;
switch (avctx->idct_algo) {
{
#if HAVE_SSE_INLINE
c->vector_clipf = ff_vector_clipf_sse;
-
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
- /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
- if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)
- return;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
-
- if (!high_bit_depth) {
- c->clear_block = ff_clear_block_sse;
- c->clear_blocks = ff_clear_blocks_sse;
- }
#endif /* HAVE_SSE_INLINE */
}
} while (--i);
}
-#define CLEAR_BLOCKS(name, n) \
-void name(int16_t *blocks) \
-{ \
- __asm__ volatile ( \
- "pxor %%mm7, %%mm7 \n\t" \
- "mov %1, %%"REG_a" \n\t" \
- "1: \n\t" \
- "movq %%mm7, (%0, %%"REG_a") \n\t" \
- "movq %%mm7, 8(%0, %%"REG_a") \n\t" \
- "movq %%mm7, 16(%0, %%"REG_a") \n\t" \
- "movq %%mm7, 24(%0, %%"REG_a") \n\t" \
- "add $32, %%"REG_a" \n\t" \
- "js 1b \n\t" \
- :: "r"(((uint8_t *) blocks) + 128 * n), \
- "i"(-128 * n) \
- : "%"REG_a); \
-}
-CLEAR_BLOCKS(ff_clear_blocks_mmx, 6)
-CLEAR_BLOCKS(ff_clear_block_mmx, 1)
-
-void ff_clear_block_sse(int16_t *block)
-{
- __asm__ volatile (
- "xorps %%xmm0, %%xmm0 \n"
- "movaps %%xmm0, (%0) \n"
- "movaps %%xmm0, 16(%0) \n"
- "movaps %%xmm0, 32(%0) \n"
- "movaps %%xmm0, 48(%0) \n"
- "movaps %%xmm0, 64(%0) \n"
- "movaps %%xmm0, 80(%0) \n"
- "movaps %%xmm0, 96(%0) \n"
- "movaps %%xmm0, 112(%0) \n"
- :: "r" (block)
- : "memory");
-}
-
-void ff_clear_blocks_sse(int16_t *blocks)
-{
- __asm__ volatile (
- "xorps %%xmm0, %%xmm0 \n"
- "mov %1, %%"REG_a" \n"
- "1: \n"
- "movaps %%xmm0, (%0, %%"REG_a") \n"
- "movaps %%xmm0, 16(%0, %%"REG_a") \n"
- "movaps %%xmm0, 32(%0, %%"REG_a") \n"
- "movaps %%xmm0, 48(%0, %%"REG_a") \n"
- "movaps %%xmm0, 64(%0, %%"REG_a") \n"
- "movaps %%xmm0, 80(%0, %%"REG_a") \n"
- "movaps %%xmm0, 96(%0, %%"REG_a") \n"
- "movaps %%xmm0, 112(%0, %%"REG_a") \n"
- "add $128, %%"REG_a" \n"
- "js 1b \n"
- :: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6)
- : "%"REG_a);
-}
-
/* Draw the edges of width 'w' of an image of size width, height
* this MMX version can only handle w == 8 || w == 16. */
void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
int line_size);
-void ff_clear_block_mmx(int16_t *block);
-void ff_clear_block_sse(int16_t *block);
-void ff_clear_blocks_mmx(int16_t *blocks);
-void ff_clear_blocks_sse(int16_t *blocks);
-
void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
int w, int h, int sides);