dsputil: Split clear_block*/fill_block* off into a separate context

author Diego Biurrun <diego@biurrun.de>

Tue, 14 Jan 2014 09:33:47 +0000 (10:33 +0100)

committer Diego Biurrun <diego@biurrun.de>

Wed, 18 Jun 2014 21:07:23 +0000 (14:07 -0700)
author Diego Biurrun <diego@biurrun.de>
Tue, 14 Jan 2014 09:33:47 +0000 (10:33 +0100)
committer Diego Biurrun <diego@biurrun.de>
Wed, 18 Jun 2014 21:07:23 +0000 (14:07 -0700)
diff --git a/configure b/configure

index cc47c8a..c538632 100755 (executable)
--- a/configure
+++ b/configure
@@ -1530,6 +1530,7 @@ CONFIG_EXTRA="
      aandcttables
      ac3dsp
      audio_frame_queue
+    blockdsp
      cabac
      dsputil
      gcrypt
@@ -1705,7 +1706,7 @@ mdct_select="fft"
  rdft_select="fft"
  mpegaudio_select="mpegaudiodsp"
  mpegaudiodsp_select="dct"
-mpegvideo_select="dsputil hpeldsp videodsp"
+mpegvideo_select="blockdsp dsputil hpeldsp videodsp"
  mpegvideoenc_select="dsputil mpegvideo qpeldsp"
  
  # decoders / encoders
@@ -1722,33 +1723,33 @@ amrnb_decoder_select="lsp"
  amrwb_decoder_select="lsp"
  amv_decoder_select="sp5x_decoder"
  ape_decoder_select="dsputil"
-asv1_decoder_select="dsputil"
+asv1_decoder_select="blockdsp dsputil"
  asv1_encoder_select="dsputil"
-asv2_decoder_select="dsputil"
+asv2_decoder_select="blockdsp dsputil"
  asv2_encoder_select="dsputil"
  atrac1_decoder_select="mdct sinewin"
  atrac3_decoder_select="mdct"
  atrac3p_decoder_select="mdct sinewin"
-bink_decoder_select="dsputil hpeldsp"
+bink_decoder_select="blockdsp hpeldsp"
  binkaudio_dct_decoder_select="mdct rdft dct sinewin"
  binkaudio_rdft_decoder_select="mdct rdft sinewin"
-cavs_decoder_select="dsputil golomb h264chroma qpeldsp videodsp"
+cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp"
  cllc_decoder_select="dsputil"
  comfortnoise_encoder_select="lpc"
  cook_decoder_select="dsputil mdct sinewin"
  cscd_decoder_select="lzo"
  cscd_decoder_suggest="zlib"
  dca_decoder_select="mdct"
-dnxhd_decoder_select="dsputil"
-dnxhd_encoder_select="aandcttables dsputil mpegvideoenc"
+dnxhd_decoder_select="blockdsp dsputil"
+dnxhd_encoder_select="aandcttables blockdsp dsputil mpegvideoenc"
  dvvideo_decoder_select="dsputil"
  dvvideo_encoder_select="dsputil"
  dxa_decoder_deps="zlib"
  eac3_decoder_select="ac3_decoder"
  eac3_encoder_select="ac3_encoder"
-eamad_decoder_select="aandcttables dsputil mpegvideo"
+eamad_decoder_select="aandcttables blockdsp dsputil mpegvideo"
  eatgq_decoder_select="aandcttables dsputil"
-eatqi_decoder_select="aandcttables dsputil error_resilience mpegvideo"
+eatqi_decoder_select="aandcttables blockdsp dsputil error_resilience mpegvideo"
  exr_decoder_deps="zlib"
  ffv1_decoder_select="golomb rangecoder"
  ffv1_encoder_select="rangecoder"
@@ -1762,10 +1763,10 @@ flashsv_encoder_deps="zlib"
  flashsv2_decoder_deps="zlib"
  flv_decoder_select="h263_decoder"
  flv_encoder_select="h263_encoder"
-fourxm_decoder_select="dsputil"
+fourxm_decoder_select="blockdsp dsputil"
  fraps_decoder_select="dsputil huffman"
  g2m_decoder_deps="zlib"
-g2m_decoder_select="dsputil"
+g2m_decoder_select="blockdsp dsputil"
  h261_decoder_select="error_resilience mpegvideo"
  h261_encoder_select="aandcttables mpegvideoenc"
  h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo qpeldsp"
@@ -1783,14 +1784,14 @@ indeo3_decoder_select="hpeldsp"
  interplay_video_decoder_select="hpeldsp"
  jpegls_decoder_select="golomb mjpeg_decoder"
  jpegls_encoder_select="golomb"
-jv_decoder_select="dsputil"
+jv_decoder_select="blockdsp"
  lagarith_decoder_select="huffyuvdsp"
  ljpeg_encoder_select="aandcttables mpegvideoenc"
  loco_decoder_select="golomb"
-mdec_decoder_select="dsputil error_resilience mpegvideo"
+mdec_decoder_select="blockdsp dsputil error_resilience mpegvideo"
  metasound_decoder_select="lsp mdct sinewin"
-mimic_decoder_select="dsputil hpeldsp"
-mjpeg_decoder_select="dsputil hpeldsp"
+mimic_decoder_select="blockdsp dsputil hpeldsp"
+mjpeg_decoder_select="blockdsp dsputil hpeldsp"
  mjpeg_encoder_select="aandcttables mpegvideoenc"
  mjpegb_decoder_select="mjpeg_decoder"
  mlp_decoder_select="mlp_parser"
@@ -1862,7 +1863,7 @@ twinvq_decoder_select="mdct lsp sinewin"
  utvideo_decoder_select="dsputil"
  utvideo_encoder_select="dsputil huffman huffyuvencdsp"
  vble_decoder_select="huffyuvdsp"
-vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp"
+vc1_decoder_select="blockdsp error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp"
  vc1image_decoder_select="vc1_decoder"
  vorbis_decoder_select="mdct"
  vorbis_encoder_select="mdct"
@@ -1883,7 +1884,7 @@ wmav2_encoder_select="mdct sinewin"
  wmavoice_decoder_select="lsp rdft dct mdct sinewin"
  wmv1_decoder_select="h263_decoder"
  wmv1_encoder_select="h263_encoder"
-wmv2_decoder_select="h263_decoder intrax8 videodsp"
+wmv2_decoder_select="blockdsp h263_decoder intrax8 videodsp"
  wmv2_encoder_select="h263_encoder"
  wmv3_decoder_select="vc1_decoder"
  wmv3image_decoder_select="wmv3_decoder"
diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c

index 3c89f1c..b958e84 100644 (file)
--- a/libavcodec/4xm.c
+++ b/libavcodec/4xm.c
@@ -30,6 +30,7 @@
  #include "libavutil/imgutils.h"
  #include "libavutil/intreadwrite.h"
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "bytestream.h"
  #include "dsputil.h"
  #include "get_bits.h"
@@ -132,6 +133,7 @@ typedef struct CFrameBuffer {
  typedef struct FourXContext {
      AVCodecContext *avctx;
      DSPContext dsp;
+    BlockDSPContext bdsp;
      uint16_t *frame_buffer;
      uint16_t *last_frame_buffer;
      GetBitContext pre_gb;          ///< ac/dc prefix
@@ -564,7 +566,7 @@ static int decode_i_mb(FourXContext *f)
      int ret;
      int i;
  
-    f->dsp.clear_blocks(f->block[0]);
+    f->bdsp.clear_blocks(f->block[0]);
  
      for (i = 0; i < 6; i++)
          if ((ret = decode_i_block(f, f->block[i])) < 0)
@@ -953,6 +955,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
      }
  
      f->version = AV_RL32(avctx->extradata) >> 16;
+    ff_blockdsp_init(&f->bdsp, avctx);
      ff_dsputil_init(&f->dsp, avctx);
      f->avctx = avctx;
      init_vlcs(f);
diff --git a/libavcodec/Makefile b/libavcodec/Makefile

index a61e673..c591545 100644 (file)
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -28,6 +28,7 @@ OBJS = allcodecs.o                                                      \
  OBJS-$(CONFIG_AANDCTTABLES)            += aandcttab.o
  OBJS-$(CONFIG_AC3DSP)                  += ac3dsp.o
  OBJS-$(CONFIG_AUDIO_FRAME_QUEUE)       += audio_frame_queue.o
+OBJS-$(CONFIG_BLOCKDSP)                += blockdsp.o
  OBJS-$(CONFIG_CABAC)                   += cabac.o
  OBJS-$(CONFIG_DCT)                     += dct.o dct32_fixed.o dct32_float.o
  OBJS-$(CONFIG_DXVA2)                   += dxva2.o
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile

index 13025af..381e997 100644 (file)
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -4,6 +4,7 @@ OBJS                                   += arm/fmtconvert_init_arm.o
  
  OBJS-$(CONFIG_AC3DSP)                  += arm/ac3dsp_init_arm.o         \
                                            arm/ac3dsp_arm.o
+OBJS-$(CONFIG_BLOCKDSP)                += arm/blockdsp_init_arm.o
  OBJS-$(CONFIG_DSPUTIL)                 += arm/dsputil_init_arm.o        \
                                            arm/dsputil_arm.o             \
                                            arm/jrevdct_arm.o             \
@@ -76,6 +77,8 @@ VFP-OBJS-$(CONFIG_DCA_DECODER)         += arm/dcadsp_vfp.o              \
  NEON-OBJS                              += arm/fmtconvert_neon.o
  
  NEON-OBJS-$(CONFIG_AC3DSP)             += arm/ac3dsp_neon.o
+NEON-OBJS-$(CONFIG_BLOCKDSP)           += arm/blockdsp_init_neon.o      \
+                                          arm/blockdsp_neon.o
  NEON-OBJS-$(CONFIG_DSPUTIL)            += arm/dsputil_init_neon.o       \
                                            arm/dsputil_neon.o            \
                                            arm/int_neon.o                \
diff --git a/libavcodec/arm/blockdsp_arm.h b/libavcodec/arm/blockdsp_arm.h

new file mode 100644 (file)

index 0000000..6d9c2c3
--- /dev/null
+++ b/libavcodec/arm/blockdsp_arm.h
@@ -0,0 +1,26 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_BLOCKDSP_ARM_H
+#define AVCODEC_ARM_BLOCKDSP_ARM_H
+
+#include "libavcodec/blockdsp.h"
+
+void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth);
+
+#endif /* AVCODEC_ARM_BLOCKDSP_ARM_H */
diff --git a/libavcodec/arm/blockdsp_init_arm.c b/libavcodec/arm/blockdsp_init_arm.c

new file mode 100644 (file)

index 0000000..a0c0367
--- /dev/null
+++ b/libavcodec/arm/blockdsp_init_arm.c
@@ -0,0 +1,33 @@
+/*
+ * ARM optimized block operations
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/blockdsp.h"
+#include "blockdsp_arm.h"
+
+av_cold void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        ff_blockdsp_init_neon(c, high_bit_depth);
+}
diff --git a/libavcodec/arm/blockdsp_init_neon.c b/libavcodec/arm/blockdsp_init_neon.c

new file mode 100644 (file)

index 0000000..5081cf0
--- /dev/null
+++ b/libavcodec/arm/blockdsp_init_neon.c
@@ -0,0 +1,37 @@
+/*
+ * ARM NEON optimised block operations
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavcodec/blockdsp.h"
+#include "blockdsp_arm.h"
+
+void ff_clear_block_neon(int16_t *block);
+void ff_clear_blocks_neon(int16_t *blocks);
+
+av_cold void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth)
+{
+    if (!high_bit_depth) {
+        c->clear_block  = ff_clear_block_neon;
+        c->clear_blocks = ff_clear_blocks_neon;
+    }
+}
diff --git a/libavcodec/arm/blockdsp_neon.S b/libavcodec/arm/blockdsp_neon.S

new file mode 100644 (file)

index 0000000..98df2c6
--- /dev/null
+++ b/libavcodec/arm/blockdsp_neon.S
@@ -0,0 +1,38 @@
+/*
+ * ARM NEON optimised block functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_clear_block_neon, export=1
+        vmov.i16        q0,  #0
+        .rept           8
+        vst1.16         {q0}, [r0,:128]!
+        .endr
+        bx              lr
+endfunc
+
+function ff_clear_blocks_neon, export=1
+        vmov.i16        q0,  #0
+        .rept           8*6
+        vst1.16         {q0}, [r0,:128]!
+        .endr
+        bx              lr
+endfunc
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c

index c9bdaa5..6863e05 100644 (file)
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -30,9 +30,6 @@ void ff_simple_idct_neon(int16_t *data);
  void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data);
  void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);
  
-void ff_clear_block_neon(int16_t *block);
-void ff_clear_blocks_neon(int16_t *blocks);
-
  void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
  void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
  void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
@@ -61,11 +58,6 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
      c->put_pixels_clamped        = ff_put_pixels_clamped_neon;
      c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
  
-    if (!high_bit_depth) {
-        c->clear_block  = ff_clear_block_neon;
-        c->clear_blocks = ff_clear_blocks_neon;
-    }
-
      c->vector_clipf      = ff_vector_clipf_neon;
      c->vector_clip_int32 = ff_vector_clip_int32_neon;
  
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S

index e30bd10..d494ec7 100644 (file)
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -21,22 +21,6 @@
  
  #include "libavutil/arm/asm.S"
  
-function ff_clear_block_neon, export=1
-        vmov.i16        q0,  #0
-        .rept           8
-        vst1.16         {q0}, [r0,:128]!
-        .endr
-        bx              lr
-endfunc
-
-function ff_clear_blocks_neon, export=1
-        vmov.i16        q0,  #0
-        .rept           8*6
-        vst1.16         {q0}, [r0,:128]!
-        .endr
-        bx              lr
-endfunc
-
  function ff_put_pixels_clamped_neon, export=1
          vld1.16         {d16-d19}, [r0,:128]!
          vqmovun.s16     d0, q8
diff --git a/libavcodec/asv.h b/libavcodec/asv.h

index 3e56857..7a4e48b 100644 (file)
--- a/libavcodec/asv.h
+++ b/libavcodec/asv.h
@@ -31,12 +31,14 @@
  #include "libavutil/mem.h"
  
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "dsputil.h"
  #include "get_bits.h"
  #include "put_bits.h"
  
  typedef struct ASV1Context{
      AVCodecContext *avctx;
+    BlockDSPContext bdsp;
      DSPContext dsp;
      PutBitContext pb;
      GetBitContext gb;
diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c

index f160434..5bbca46 100644 (file)
--- a/libavcodec/asvdec.c
+++ b/libavcodec/asvdec.c
@@ -28,6 +28,7 @@
  
  #include "asv.h"
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "put_bits.h"
  #include "internal.h"
  #include "mathops.h"
@@ -164,7 +165,7 @@ static inline int decode_mb(ASV1Context *a, int16_t block[6][64])
  {
      int i;
  
-    a->dsp.clear_blocks(block[0]);
+    a->bdsp.clear_blocks(block[0]);
  
      if (a->avctx->codec_id == AV_CODEC_ID_ASV1) {
          for (i = 0; i < 6; i++) {
@@ -280,6 +281,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
      }
  
      ff_asv_common_init(avctx);
+    ff_blockdsp_init(&a->bdsp, avctx);
      init_vlcs(a);
      ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_asv_scantab);
      avctx->pix_fmt = AV_PIX_FMT_YUV420P;
diff --git a/libavcodec/bink.c b/libavcodec/bink.c

index d1e94d9..e34585b 100644 (file)
--- a/libavcodec/bink.c
+++ b/libavcodec/bink.c
@@ -24,9 +24,9 @@
  #include "libavutil/imgutils.h"
  #include "libavutil/internal.h"
  #include "avcodec.h"
-#include "dsputil.h"
  #include "binkdata.h"
  #include "binkdsp.h"
+#include "blockdsp.h"
  #include "hpeldsp.h"
  #include "internal.h"
  #include "mathops.h"
@@ -113,7 +113,7 @@ typedef struct Bundle {
   */
  typedef struct BinkContext {
      AVCodecContext *avctx;
-    DSPContext     dsp;
+    BlockDSPContext bdsp;
      HpelDSPContext hdsp;
      BinkDSPContext binkdsp;
      AVFrame        *last;
@@ -880,7 +880,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                  } else {
                      put_pixels8x8_overlapped(dst, ref, stride);
                  }
-                c->dsp.clear_block(block);
+                c->bdsp.clear_block(block);
                  v = binkb_get_value(c, BINKB_SRC_INTER_COEFS);
                  read_residue(gb, block, v);
                  c->binkdsp.add_pixels8(dst, block, stride);
@@ -904,7 +904,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                  break;
              case 5:
                  v = binkb_get_value(c, BINKB_SRC_COLORS);
-                c->dsp.fill_block_tab[1](dst, v, stride, 8);
+                c->bdsp.fill_block_tab[1](dst, v, stride, 8);
                  break;
              case 6:
                  for (i = 0; i < 2; i++)
@@ -1047,7 +1047,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                      break;
                  case FILL_BLOCK:
                      v = get_value(c, BINK_SRC_COLORS);
-                    c->dsp.fill_block_tab[0](dst, v, stride, 16);
+                    c->bdsp.fill_block_tab[0](dst, v, stride, 16);
                      break;
                  case PATTERN_BLOCK:
                      for (i = 0; i < 2; i++)
@@ -1117,7 +1117,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                      return AVERROR_INVALIDDATA;
                  }
                  c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8);
-                c->dsp.clear_block(block);
+                c->bdsp.clear_block(block);
                  v = get_bits(gb, 7);
                  read_residue(gb, block, v);
                  c->binkdsp.add_pixels8(dst, block, stride);
@@ -1130,7 +1130,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                  break;
              case FILL_BLOCK:
                  v = get_value(c, BINK_SRC_COLORS);
-                c->dsp.fill_block_tab[1](dst, v, stride, 8);
+                c->bdsp.fill_block_tab[1](dst, v, stride, 8);
                  break;
              case INTER_BLOCK:
                  xoff = get_value(c, BINK_SRC_X_OFF);
@@ -1310,7 +1310,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
  
      avctx->pix_fmt = c->has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P;
  
-    ff_dsputil_init(&c->dsp, avctx);
+    ff_blockdsp_init(&c->bdsp, avctx);
      ff_hpeldsp_init(&c->hdsp, avctx->flags);
      ff_binkdsp_init(&c->binkdsp);
  
diff --git a/libavcodec/blockdsp.c b/libavcodec/blockdsp.c

new file mode 100644 (file)

index 0000000..e3d2ca1
--- /dev/null
+++ b/libavcodec/blockdsp.c
@@ -0,0 +1,78 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "avcodec.h"
+#include "blockdsp.h"
+#include "version.h"
+
+static void clear_block_8_c(int16_t *block)
+{
+    memset(block, 0, sizeof(int16_t) * 64);
+}
+
+static void clear_blocks_8_c(int16_t *blocks)
+{
+    memset(blocks, 0, sizeof(int16_t) * 6 * 64);
+}
+
+static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
+{
+    int i;
+
+    for (i = 0; i < h; i++) {
+        memset(block, value, 16);
+        block += line_size;
+    }
+}
+
+static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
+{
+    int i;
+
+    for (i = 0; i < h; i++) {
+        memset(block, value, 8);
+        block += line_size;
+    }
+}
+
+av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx)
+{
+    const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
+
+    c->clear_block  = clear_block_8_c;
+    c->clear_blocks = clear_blocks_8_c;
+
+    c->fill_block_tab[0] = fill_block16_c;
+    c->fill_block_tab[1] = fill_block8_c;
+
+    if (ARCH_ARM)
+        ff_blockdsp_init_arm(c, high_bit_depth);
+    if (ARCH_PPC)
+        ff_blockdsp_init_ppc(c, high_bit_depth);
+    if (ARCH_X86)
+#if FF_API_XVMC
+        ff_blockdsp_init_x86(c, high_bit_depth, avctx);
+#else
+        ff_blockdsp_init_x86(c, high_bit_depth);
+#endif /* FF_API_XVMC */
+}
diff --git a/libavcodec/blockdsp.h b/libavcodec/blockdsp.h

new file mode 100644 (file)

index 0000000..32c671c
--- /dev/null
+++ b/libavcodec/blockdsp.h
@@ -0,0 +1,52 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_BLOCKDSP_H
+#define AVCODEC_BLOCKDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+#include "version.h"
+
+/* add and put pixel (decoding)
+ * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
+ * h for op_pixels_func is limited to { width / 2, width },
+ * but never larger than 16 and never smaller than 4. */
+typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */,
+                             uint8_t value, int line_size, int h);
+
+typedef struct BlockDSPContext {
+    void (*clear_block)(int16_t *block /* align 16 */);
+    void (*clear_blocks)(int16_t *blocks /* align 16 */);
+
+    op_fill_func fill_block_tab[2];
+} BlockDSPContext;
+
+void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx);
+
+void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth);
+void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth);
+#if FF_API_XVMC
+void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
+                          AVCodecContext *avctx);
+#else
+void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth);
+#endif /* FF_API_XVMC */
+
+#endif /* AVCODEC_BLOCKDSP_H */
diff --git a/libavcodec/cavs.c b/libavcodec/cavs.c

index 3f21dcc..21bc1ed 100644 (file)
--- a/libavcodec/cavs.c
+++ b/libavcodec/cavs.c
@@ -759,6 +759,7 @@ av_cold int ff_cavs_init(AVCodecContext *avctx)
  {
      AVSContext *h = avctx->priv_data;
  
+    ff_blockdsp_init(&h->bdsp, avctx);
      ff_dsputil_init(&h->dsp, avctx);
      ff_h264chroma_init(&h->h264chroma, 8);
      ff_videodsp_init(&h->vdsp, 8);
diff --git a/libavcodec/cavs.h b/libavcodec/cavs.h

index 7d9b94e..c5a10b5 100644 (file)
--- a/libavcodec/cavs.h
+++ b/libavcodec/cavs.h
@@ -23,6 +23,7 @@
  #define AVCODEC_CAVS_H
  
  #include "cavsdsp.h"
+#include "blockdsp.h"
  #include "dsputil.h"
  #include "h264chroma.h"
  #include "get_bits.h"
@@ -162,6 +163,7 @@ typedef struct AVSFrame {
  typedef struct AVSContext {
      AVCodecContext *avctx;
      DSPContext       dsp;
+    BlockDSPContext bdsp;
      H264ChromaContext h264chroma;
      VideoDSPContext vdsp;
      CAVSDSPContext  cdsp;
diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c

index a8ed192..fbbd048 100644 (file)
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c
@@ -581,7 +581,7 @@ static int decode_residual_block(AVSContext *h, GetBitContext *gb,
                        dequant_shift[qp], i)) < 0)
          return ret;
      h->cdsp.cavs_idct8_add(dst, block, stride);
-    h->dsp.clear_block(block);
+    h->bdsp.clear_block(block);
      return 0;
  }
  
diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c

index 4daee04..3bd8ffe 100644 (file)
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -25,6 +25,7 @@
  #include "libavutil/imgutils.h"
  #include "libavutil/timer.h"
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "get_bits.h"
  #include "dnxhddata.h"
  #include "dsputil.h"
@@ -33,6 +34,7 @@
  typedef struct DNXHDContext {
      AVCodecContext *avctx;
      GetBitContext gb;
+    BlockDSPContext bdsp;
      int cid;                            ///< compression id
      unsigned int width, height;
      unsigned int mb_width, mb_height;
@@ -133,6 +135,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
          ctx->avctx->pix_fmt = AV_PIX_FMT_YUV444P10;
          ctx->avctx->bits_per_raw_sample = 10;
          if (ctx->bit_depth != 10) {
+            ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
              ff_dsputil_init(&ctx->dsp, ctx->avctx);
              ctx->bit_depth = 10;
              ctx->decode_dct_block = dnxhd_decode_dct_block_10_444;
@@ -142,6 +145,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
          ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P10;
          ctx->avctx->bits_per_raw_sample = 10;
          if (ctx->bit_depth != 10) {
+            ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
              ff_dsputil_init(&ctx->dsp, ctx->avctx);
              ctx->bit_depth = 10;
              ctx->decode_dct_block = dnxhd_decode_dct_block_10;
@@ -150,6 +154,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
          ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P;
          ctx->avctx->bits_per_raw_sample = 8;
          if (ctx->bit_depth != 8) {
+            ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
              ff_dsputil_init(&ctx->dsp, ctx->avctx);
              ctx->bit_depth = 8;
              ctx->decode_dct_block = dnxhd_decode_dct_block_8;
@@ -307,12 +312,12 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, AVFrame *frame,
      skip_bits1(&ctx->gb);
  
      for (i = 0; i < 8; i++) {
-        ctx->dsp.clear_block(ctx->blocks[i]);
+        ctx->bdsp.clear_block(ctx->blocks[i]);
          ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
      }
      if (ctx->is_444) {
          for (; i < 12; i++) {
-            ctx->dsp.clear_block(ctx->blocks[i]);
+            ctx->bdsp.clear_block(ctx->blocks[i]);
              ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
          }
      }
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c

index 4b06f6c..c637415 100644 (file)
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -29,6 +29,7 @@
  #include "libavutil/timer.h"
  
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "dsputil.h"
  #include "internal.h"
  #include "mpegvideo.h"
@@ -305,6 +306,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
  
      avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
  
+    ff_blockdsp_init(&ctx->bdsp, avctx);
      ff_dsputil_init(&ctx->m.dsp, avctx);
      ff_dct_common_init(&ctx->m);
      if (!ctx->m.dct_quantize)
@@ -556,10 +558,10 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
                                      ptr_v + ctx->dct_uv_offset,
                                      ctx->m.uvlinesize);
          } else {
-            dsp->clear_block(ctx->blocks[4]);
-            dsp->clear_block(ctx->blocks[5]);
-            dsp->clear_block(ctx->blocks[6]);
-            dsp->clear_block(ctx->blocks[7]);
+            ctx->bdsp.clear_block(ctx->blocks[4]);
+            ctx->bdsp.clear_block(ctx->blocks[5]);
+            ctx->bdsp.clear_block(ctx->blocks[6]);
+            ctx->bdsp.clear_block(ctx->blocks[7]);
          }
      } else {
          dsp->get_pixels(ctx->blocks[4],
diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h

index 215482e..c3248a2 100644 (file)
--- a/libavcodec/dnxhdenc.h
+++ b/libavcodec/dnxhdenc.h
@@ -41,6 +41,7 @@ typedef struct RCEntry {
  
  typedef struct DNXHDEncContext {
      AVClass *class;
+    BlockDSPContext bdsp;
      MpegEncContext m; ///< Used for quantization dsp functions
  
      int cid;
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c

index 0ef9d8c..8f5ddd0 100644 (file)
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -373,26 +373,6 @@ static int sum_abs_dctelem_c(int16_t *block)
      return sum;
  }
  
-static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
-{
-    int i;
-
-    for (i = 0; i < h; i++) {
-        memset(block, value, 16);
-        block += line_size;
-    }
-}
-
-static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
-{
-    int i;
-
-    for (i = 0; i < h; i++) {
-        memset(block, value, 8);
-        block += line_size;
-    }
-}
-
  #define avg2(a, b) ((a + b + 1) >> 1)
  #define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2)
  
@@ -1408,16 +1388,6 @@ static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
              memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
  }
  
-static void clear_block_8_c(int16_t *block)
-{
-    memset(block, 0, sizeof(int16_t) * 64);
-}
-
-static void clear_blocks_8_c(int16_t *blocks)
-{
-    memset(blocks, 0, sizeof(int16_t) * 6 * 64);
-}
-
  /* init static data */
  av_cold void ff_dsputil_static_init(void)
  {
@@ -1487,9 +1457,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
      c->pix_sum   = pix_sum_c;
      c->pix_norm1 = pix_norm1_c;
  
-    c->fill_block_tab[0] = fill_block16_c;
-    c->fill_block_tab[1] = fill_block8_c;
-
      /* TODO [0] 16  [1] 8 */
      c->pix_abs[0][0] = pix_abs16_c;
      c->pix_abs[0][1] = pix_abs16_x2_c;
@@ -1546,9 +1513,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
  
      c->draw_edges = draw_edges_8_c;
  
-    c->clear_block  = clear_block_8_c;
-    c->clear_blocks = clear_blocks_8_c;
-
      switch (avctx->bits_per_raw_sample) {
      case 9:
      case 10:
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h

index 070580f..1aad789 100644 (file)
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -38,26 +38,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
                int dxx, int dxy, int dyx, int dyy, int shift, int r,
                int width, int height);
  
-/* minimum alignment rules ;)
- * If you notice errors in the align stuff, need more alignment for some ASM code
- * for some CPU or need to use a function with less aligned data then send a mail
- * to the libav-devel mailing list, ...
- *
- * !warning These alignments might not match reality, (missing attribute((align))
- * stuff somewhere possible).
- * I (Michael) did not check them, these are just the alignments which I think
- * could be reached easily ...
- *
- * !future video codecs might need functions with less strict alignment
- */
-
-/* add and put pixel (decoding)
- * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
- * h for op_pixels_func is limited to { width / 2, width },
- * but never larger than 16 and never smaller than 4. */
-typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */,
-                             uint8_t value, int line_size, int h);
-
  struct MpegEncContext;
  /* Motion estimation:
   * h is limited to { width / 2, width, 2 * width },
@@ -116,8 +96,7 @@ typedef struct DSPContext {
                  int stride, int h, int ox, int oy,
                  int dxx, int dxy, int dyx, int dyy,
                  int shift, int r, int width, int height);
-    void (*clear_block)(int16_t *block /* align 16 */);
-    void (*clear_blocks)(int16_t *blocks /* align 16 */);
+
      int (*pix_sum)(uint8_t *pix, int line_size);
      int (*pix_norm1)(uint8_t *pix, int line_size);
  
@@ -234,8 +213,6 @@ typedef struct DSPContext {
       */
      void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
                                int32_t max, unsigned int len);
-
-    op_fill_func fill_block_tab[2];
  } DSPContext;
  
  void ff_dsputil_static_init(void);
diff --git a/libavcodec/eamad.c b/libavcodec/eamad.c

index 22070a4..4bc0739 100644 (file)
--- a/libavcodec/eamad.c
+++ b/libavcodec/eamad.c
@@ -44,6 +44,7 @@
  
  typedef struct MadContext {
      AVCodecContext *avctx;
+    BlockDSPContext bdsp;
      DSPContext dsp;
      AVFrame *last_frame;
      GetBitContext gb;
@@ -61,6 +62,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
      MadContext *s = avctx->priv_data;
      s->avctx = avctx;
      avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+    ff_blockdsp_init(&s->bdsp, avctx);
      ff_dsputil_init(&s->dsp, avctx);
      ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM);
      ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
@@ -207,7 +209,7 @@ static void decode_mb(MadContext *s, AVFrame *frame, int inter)
              int add = 2*decode_motion(&s->gb);
              comp_block(s, frame, s->mb_x, s->mb_y, j, mv_x, mv_y, add);
          } else {
-            s->dsp.clear_block(s->block);
+            s->bdsp.clear_block(s->block);
              decode_block_intra(s, s->block);
              idct_put(s, frame, s->block, s->mb_x, s->mb_y, j);
          }
diff --git a/libavcodec/eatqi.c b/libavcodec/eatqi.c

index 2345cc7..8c31f1f 100644 (file)
--- a/libavcodec/eatqi.c
+++ b/libavcodec/eatqi.c
@@ -27,6 +27,7 @@
   */
  
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "get_bits.h"
  #include "aandcttab.h"
  #include "eaidct.h"
@@ -46,6 +47,7 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx)
      TqiContext *t = avctx->priv_data;
      MpegEncContext *s = &t->s;
      s->avctx = avctx;
+    ff_blockdsp_init(&s->bdsp, avctx);
      ff_dsputil_init(&s->dsp, avctx);
      ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM);
      ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
@@ -59,7 +61,7 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx)
  static int tqi_decode_mb(MpegEncContext *s, int16_t (*block)[64])
  {
      int n;
-    s->dsp.clear_blocks(block[0]);
+    s->bdsp.clear_blocks(block[0]);
      for (n=0; n<6; n++)
          if (ff_mpeg1_decode_block_intra(s, block[n], n) < 0)
              return -1;
diff --git a/libavcodec/g2meet.c b/libavcodec/g2meet.c

index a741b6f..9660155 100644 (file)
--- a/libavcodec/g2meet.c
+++ b/libavcodec/g2meet.c
@@ -29,6 +29,7 @@
  
  #include "libavutil/intreadwrite.h"
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "bytestream.h"
  #include "dsputil.h"
  #include "get_bits.h"
@@ -72,6 +73,7 @@ static const uint8_t chroma_quant[64] = {
  };
  
  typedef struct JPGContext {
+    BlockDSPContext bdsp;
      DSPContext dsp;
      ScanTable  scantable;
  
@@ -150,6 +152,7 @@ static av_cold int jpg_init(AVCodecContext *avctx, JPGContext *c)
      if (ret)
          return ret;
  
+    ff_blockdsp_init(&c->bdsp, avctx);
      ff_dsputil_init(&c->dsp, avctx);
      ff_init_scantable(c->dsp.idct_permutation, &c->scantable,
                        ff_zigzag_direct);
@@ -193,7 +196,7 @@ static int jpg_decode_block(JPGContext *c, GetBitContext *gb,
      const int is_chroma = !!plane;
      const uint8_t *qmat = is_chroma ? chroma_quant : luma_quant;
  
-    c->dsp.clear_block(block);
+    c->bdsp.clear_block(block);
      dc = get_vlc2(gb, c->dc_vlc[is_chroma].table, 9, 3);
      if (dc < 0)
          return AVERROR_INVALIDDATA;
@@ -259,7 +262,7 @@ static int jpg_decode_data(JPGContext *c, int width, int height,
      for (i = 0; i < 3; i++)
          c->prev_dc[i] = 1024;
      bx = by = 0;
-    c->dsp.clear_blocks(c->block[0]);
+    c->bdsp.clear_blocks(c->block[0]);
      for (mb_y = 0; mb_y < mb_h; mb_y++) {
          for (mb_x = 0; mb_x < mb_w; mb_x++) {
              if (mask && !mask[mb_x * 2] && !mask[mb_x * 2 + 1] &&
diff --git a/libavcodec/h261dec.c b/libavcodec/h261dec.c

index 0d996eb..73f6a59 100644 (file)
--- a/libavcodec/h261dec.c
+++ b/libavcodec/h261dec.c
@@ -433,7 +433,7 @@ static int h261_decode_mb(H261Context *h)
  intra:
      /* decode each block */
      if (s->mb_intra || HAS_CBP(h->mtype)) {
-        s->dsp.clear_blocks(s->block[0]);
+        s->bdsp.clear_blocks(s->block[0]);
          for (i = 0; i < 6; i++) {
              if (h261_decode_block(h, s->block[i], i, cbp & 32) < 0)
                  return SLICE_ERROR;
diff --git a/libavcodec/h263.h b/libavcodec/h263.h

index c6ad618..dbbe7ce 100644 (file)
--- a/libavcodec/h263.h
+++ b/libavcodec/h263.h
@@ -197,7 +197,7 @@ static inline int get_p_cbp(MpegEncContext * s,
          for (i = 0; i < 6; i++) {
              if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
                  s->block_last_index[i]= -1;
-                s->dsp.clear_block(s->block[i]);
+                s->bdsp.clear_block(s->block[i]);
              }
          }
      }else{
diff --git a/libavcodec/intrax8.c b/libavcodec/intrax8.c

index 962c460..2bda723 100644 (file)
--- a/libavcodec/intrax8.c
+++ b/libavcodec/intrax8.c
@@ -538,7 +538,7 @@ static int x8_decode_intra_mb(IntraX8Context* const w, const int chroma){
      int sign;
  
      assert(w->orient<12);
-    s->dsp.clear_block(s->block[0]);
+    s->bdsp.clear_block(s->block[0]);
  
      if(chroma){
          dc_mode=2;
diff --git a/libavcodec/ituh263dec.c b/libavcodec/ituh263dec.c

index 55a8c45..dc3de30 100644 (file)
--- a/libavcodec/ituh263dec.c
+++ b/libavcodec/ituh263dec.c
@@ -538,7 +538,7 @@ retry:
                  rl = &ff_rl_intra_aic;
                  i = 0;
                  s->gb= gb;
-                s->dsp.clear_block(block);
+                s->bdsp.clear_block(block);
                  goto retry;
              }
              av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra);
@@ -628,7 +628,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
              }
          }while(cbpc == 20);
  
-        s->dsp.clear_blocks(s->block[0]);
+        s->bdsp.clear_blocks(s->block[0]);
  
          dquant = cbpc & 8;
          s->mb_intra = ((cbpc & 4) != 0);
@@ -723,7 +723,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
  
          s->mb_intra = IS_INTRA(mb_type);
          if(HAS_CBP(mb_type)){
-            s->dsp.clear_blocks(s->block[0]);
+            s->bdsp.clear_blocks(s->block[0]);
              cbpc = get_vlc2(&s->gb, cbpc_b_vlc.table, CBPC_B_VLC_BITS, 1);
              if(s->mb_intra){
                  dquant = IS_QUANT(mb_type);
@@ -797,7 +797,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
              }
          }while(cbpc == 8);
  
-        s->dsp.clear_blocks(s->block[0]);
+        s->bdsp.clear_blocks(s->block[0]);
  
          dquant = cbpc & 4;
          s->mb_intra = 1;
diff --git a/libavcodec/jvdec.c b/libavcodec/jvdec.c

index 662a944..bb347e0 100644 (file)
--- a/libavcodec/jvdec.c
+++ b/libavcodec/jvdec.c
@@ -28,12 +28,12 @@
  #include "libavutil/intreadwrite.h"
  
  #include "avcodec.h"
-#include "dsputil.h"
+#include "blockdsp.h"
  #include "get_bits.h"
  #include "internal.h"
  
  typedef struct JvContext {
-    DSPContext dsp;
+    BlockDSPContext bdsp;
      AVFrame   *frame;
      uint32_t   palette[AVPALETTE_COUNT];
      int        palette_has_changed;
@@ -48,7 +48,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
          return AVERROR(ENOMEM);
  
      avctx->pix_fmt = AV_PIX_FMT_PAL8;
-    ff_dsputil_init(&s->dsp, avctx);
+    ff_blockdsp_init(&s->bdsp, avctx);
      return 0;
  }
  
@@ -113,14 +113,14 @@ static inline void decode4x4(GetBitContext *gb, uint8_t *dst, int linesize)
   * Decode 8x8 block
   */
  static inline void decode8x8(GetBitContext *gb, uint8_t *dst, int linesize,
-                             DSPContext *dsp)
+                             BlockDSPContext *bdsp)
  {
      int i, j, v[2];
  
      switch (get_bits(gb, 2)) {
      case 1:
          v[0] = get_bits(gb, 8);
-        dsp->fill_block_tab[1](dst, v[0], linesize, 8);
+        bdsp->fill_block_tab[1](dst, v[0], linesize, 8);
          break;
      case 2:
          v[0] = get_bits(gb, 8);
@@ -163,7 +163,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                  for (i = 0; i < avctx->width; i += 8)
                      decode8x8(&gb,
                                s->frame->data[0] + j * s->frame->linesize[0] + i,
-                              s->frame->linesize[0], &s->dsp);
+                              s->frame->linesize[0], &s->bdsp);
  
              buf += video_size;
          } else if (video_type == 2) {
diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c

index d6c6060..b421397 100644 (file)
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -28,12 +28,14 @@
   */
  
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "mpegvideo.h"
  #include "mpeg12.h"
  #include "thread.h"
  
  typedef struct MDECContext {
      AVCodecContext *avctx;
+    BlockDSPContext bdsp;
      DSPContext dsp;
      ThreadFrame frame;
      GetBitContext gb;
@@ -123,7 +125,7 @@ static inline int decode_mb(MDECContext *a, int16_t block[6][64])
      int i, ret;
      const int block_index[6] = { 5, 4, 0, 1, 2, 3 };
  
-    a->dsp.clear_blocks(block[0]);
+    a->bdsp.clear_blocks(block[0]);
  
      for (i = 0; i < 6; i++) {
          if ((ret = mdec_decode_block_intra(a, block[block_index[i]],
@@ -212,6 +214,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
  
      a->avctx           = avctx;
  
+    ff_blockdsp_init(&a->bdsp, avctx);
      ff_dsputil_init(&a->dsp, avctx);
      ff_mpeg12_init_vlcs();
      ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct);
diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c

index 264c74a..179ffea 100644 (file)
--- a/libavcodec/mimic.c
+++ b/libavcodec/mimic.c
@@ -24,6 +24,7 @@
  #include <stdint.h>
  
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "internal.h"
  #include "get_bits.h"
  #include "bytestream.h"
@@ -52,6 +53,7 @@ typedef struct {
  
      GetBitContext   gb;
      ScanTable       scantable;
+    BlockDSPContext bdsp;
      DSPContext      dsp;
      HpelDSPContext  hdsp;
      VLC             vlc;
@@ -145,6 +147,7 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx)
          av_log(avctx, AV_LOG_ERROR, "error initializing vlc table\n");
          return ret;
      }
+    ff_blockdsp_init(&ctx->bdsp, avctx);
      ff_dsputil_init(&ctx->dsp, avctx);
      ff_hpeldsp_init(&ctx->hdsp, avctx->flags);
      ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, col_zag);
@@ -227,7 +230,7 @@ static int vlc_decode_block(MimicContext *ctx, int num_coeffs, int qscale)
      int16_t *block = ctx->dct_block;
      unsigned int pos;
  
-    ctx->dsp.clear_block(block);
+    ctx->bdsp.clear_block(block);
  
      block[0] = get_bits(&ctx->gb, 8) << 3;
  
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c

index b1192c5..cd1e292 100644 (file)
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -35,6 +35,7 @@
  #include "libavutil/imgutils.h"
  #include "libavutil/opt.h"
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "internal.h"
  #include "mjpeg.h"
  #include "mjpegdec.h"
@@ -92,6 +93,7 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx)
      }
  
      s->avctx = avctx;
+    ff_blockdsp_init(&s->bdsp, avctx);
      ff_hpeldsp_init(&s->hdsp, avctx->flags);
      ff_dsputil_init(&s->dsp, avctx);
      ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
@@ -486,7 +488,7 @@ static int decode_dc_progressive(MJpegDecodeContext *s, int16_t *block,
                                   int16_t *quant_matrix, int Al)
  {
      int val;
-    s->dsp.clear_block(block);
+    s->bdsp.clear_block(block);
      val = mjpeg_decode_dc(s, dc_index);
      if (val == 0xffff) {
          av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
@@ -878,7 +880,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
                                  reference_data[c] + block_offset,
                                  linesize[c], 8);
                          else {
-                            s->dsp.clear_block(s->block);
+                            s->bdsp.clear_block(s->block);
                              if (decode_block(s, s->block, i,
                                               s->dc_index[i], s->ac_index[i],
                                               s->quant_matrixes[s->quant_index[c]]) < 0) {
diff --git a/libavcodec/mjpegdec.h b/libavcodec/mjpegdec.h

index 344d2cb..0d1dd9e 100644 (file)
--- a/libavcodec/mjpegdec.h
+++ b/libavcodec/mjpegdec.h
@@ -33,6 +33,7 @@
  #include "libavutil/pixdesc.h"
  
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "get_bits.h"
  #include "dsputil.h"
  #include "hpeldsp.h"
@@ -95,6 +96,7 @@ typedef struct MJpegDecodeContext {
      uint8_t *last_nnz[MAX_COMPONENTS];
      uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
      ScanTable scantable;
+    BlockDSPContext bdsp;
      DSPContext dsp;
      HpelDSPContext hdsp;
  
diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c

index 1cd37fa..195f9f3 100644 (file)
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c
@@ -776,10 +776,10 @@ static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64])
      av_dlog(s->avctx, "mb_type=%x\n", mb_type);
  //    motion_type = 0; /* avoid warning */
      if (IS_INTRA(mb_type)) {
-        s->dsp.clear_blocks(s->block[0]);
+        s->bdsp.clear_blocks(s->block[0]);
  
          if (!s->chroma_y_shift)
-            s->dsp.clear_blocks(s->block[6]);
+            s->bdsp.clear_blocks(s->block[6]);
  
          /* compute DCT type */
          // FIXME: add an interlaced_dct coded var?
@@ -1014,13 +1014,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
  
          s->mb_intra = 0;
          if (HAS_CBP(mb_type)) {
-            s->dsp.clear_blocks(s->block[0]);
+            s->bdsp.clear_blocks(s->block[0]);
  
              cbp = get_vlc2(&s->gb, ff_mb_pat_vlc.table, MB_PAT_VLC_BITS, 1);
              if (mb_block_count > 6) {
                  cbp <<= mb_block_count - 6;
                  cbp  |= get_bits(&s->gb, mb_block_count - 6);
-                s->dsp.clear_blocks(s->block[6]);
+                s->bdsp.clear_blocks(s->block[6]);
              }
              if (cbp <= 0) {
                  av_log(s->avctx, AV_LOG_ERROR,
diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c

index 9405a02..0e3e580 100644 (file)
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -1227,7 +1227,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, int16_t block[6][64])
  
      if (!IS_SKIP(mb_type)) {
          int i;
-        s->dsp.clear_blocks(s->block[0]);
+        s->bdsp.clear_blocks(s->block[0]);
          /* decode each block */
          for (i = 0; i < 6; i++) {
              if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, s->mb_intra, ctx->rvlc) < 0) {
@@ -1305,7 +1305,7 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64])
              }
          } while (cbpc == 20);
  
-        s->dsp.clear_blocks(s->block[0]);
+        s->bdsp.clear_blocks(s->block[0]);
          dquant      = cbpc & 8;
          s->mb_intra = ((cbpc & 4) != 0);
          if (s->mb_intra)
@@ -1451,7 +1451,7 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64])
              if (modb2) {
                  cbp = 0;
              } else {
-                s->dsp.clear_blocks(s->block[0]);
+                s->bdsp.clear_blocks(s->block[0]);
                  cbp = get_bits(&s->gb, 6);
              }
  
@@ -1586,7 +1586,7 @@ intra:
          if (!s->progressive_sequence)
              s->interlaced_dct = get_bits1(&s->gb);
  
-        s->dsp.clear_blocks(s->block[0]);
+        s->bdsp.clear_blocks(s->block[0]);
          /* decode each block */
          for (i = 0; i < 6; i++) {
              if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, 1, 0) < 0)
diff --git a/libavcodec/mpeg4videoenc.c b/libavcodec/mpeg4videoenc.c

index 189664d..b95752f 100644 (file)
--- a/libavcodec/mpeg4videoenc.c
+++ b/libavcodec/mpeg4videoenc.c
@@ -485,7 +485,7 @@ static inline int get_b_cbp(MpegEncContext *s, int16_t block[6][64],
          for (i = 0; i < 6; i++) {
              if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i)) & 1) == 0) {
                  s->block_last_index[i] = -1;
-                s->dsp.clear_block(s->block[i]);
+                s->bdsp.clear_block(s->block[i]);
              }
          }
      } else {
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c

index aea6321..f6fc8dc 100644 (file)
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -33,6 +33,7 @@
  #include "libavutil/internal.h"
  #include "libavutil/timer.h"
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "dsputil.h"
  #include "internal.h"
  #include "mathops.h"
@@ -363,7 +364,7 @@ static void mpeg_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
      ff_init_block_index(s);
      ff_update_block_index(s);
  
-    s->dsp.clear_blocks(s->block[0]);
+    s->bdsp.clear_blocks(s->block[0]);
  
      s->dest[0] = s->current_picture.f->data[0] + (s->mb_y *  16                       * s->linesize)   + s->mb_x *  16;
      s->dest[1] = s->current_picture.f->data[1] + (s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16 >> s->chroma_x_shift);
@@ -376,6 +377,7 @@ static void mpeg_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
  /* init common dct for both encoder and decoder */
  av_cold int ff_dct_common_init(MpegEncContext *s)
  {
+    ff_blockdsp_init(&s->bdsp, s->avctx);
      ff_dsputil_init(&s->dsp, s->avctx);
      ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
      ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h

index a0114fd..7b0673c 100644 (file)
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -29,6 +29,7 @@
  #define AVCODEC_MPEGVIDEO_H
  
  #include "avcodec.h"
+#include "blockdsp.h"
  #include "dsputil.h"
  #include "error_resilience.h"
  #include "get_bits.h"
@@ -347,6 +348,7 @@ typedef struct MpegEncContext {
      int unrestricted_mv;        ///< mv can point outside of the coded picture
      int h263_long_vectors;      ///< use horrible h263v1 long vector mode
  
+    BlockDSPContext bdsp;
      DSPContext dsp;             ///< pointers for accelerated dsp functions
      HpelDSPContext hdsp;
      QpelDSPContext qdsp;
diff --git a/libavcodec/msmpeg4dec.c b/libavcodec/msmpeg4dec.c

index 40660ed..191f81a 100644 (file)
--- a/libavcodec/msmpeg4dec.c
+++ b/libavcodec/msmpeg4dec.c
@@ -174,7 +174,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, int16_t block[6][64])
          }
      }
  
-    s->dsp.clear_blocks(s->block[0]);
+    s->bdsp.clear_blocks(s->block[0]);
      for (i = 0; i < 6; i++) {
          if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
          {
@@ -265,7 +265,7 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, int16_t block[6][64])
          }
      }
  
-    s->dsp.clear_blocks(s->block[0]);
+    s->bdsp.clear_blocks(s->block[0]);
      for (i = 0; i < 6; i++) {
          if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
          {
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile

index b78d4be..bd78f8e 100644 (file)
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -1,5 +1,6 @@
  OBJS                                   += ppc/fmtconvert_altivec.o      \
  
+OBJS-$(CONFIG_BLOCKDSP)                += ppc/blockdsp.o
  OBJS-$(CONFIG_DSPUTIL)                 += ppc/dsputil_ppc.o
  OBJS-$(CONFIG_FFT)                     += ppc/fft_altivec.o
  OBJS-$(CONFIG_H264CHROMA)              += ppc/h264chroma_init.o
diff --git a/libavcodec/ppc/blockdsp.c b/libavcodec/ppc/blockdsp.c

new file mode 100644 (file)

index 0000000..679bc04
--- /dev/null
+++ b/libavcodec/ppc/blockdsp.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2002 Brian Foley
+ * Copyright (c) 2002 Dieter Shirley
+ * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+#include <string.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/ppc/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavcodec/blockdsp.h"
+
+/* ***** WARNING ***** WARNING ***** WARNING ***** */
+/*
+ * clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with
+ * a cache line size not equal to 32 bytes. Fortunately all processors used
+ * by Apple up to at least the 7450 (AKA second generation G4) use 32-byte
+ * cache lines. This is due to the use of the 'dcbz' instruction. It simply
+ * clears a single cache line to zero, so you need to know the cache line
+ * size to use it! It's absurd, but it's fast...
+ *
+ * update 24/06/2003: Apple released the G5 yesterday, with a PPC970.
+ * cache line size: 128 bytes. Oups.
+ * The semantics of dcbz was changed, it always clears 32 bytes. So the function
+ * below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
+ * which is defined to clear a cache line (as dcbz before). So we can still
+ * distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
+ *
+ * see <http://developer.apple.com/technotes/tn/tn2087.html>
+ * and <http://developer.apple.com/technotes/tn/tn2086.html>
+ */
+static void clear_blocks_dcbz32_ppc(int16_t *blocks)
+{
+    register int misal = (unsigned long) blocks & 0x00000010, i = 0;
+
+    if (misal) {
+        ((unsigned long *) blocks)[0] = 0L;
+        ((unsigned long *) blocks)[1] = 0L;
+        ((unsigned long *) blocks)[2] = 0L;
+        ((unsigned long *) blocks)[3] = 0L;
+        i += 16;
+    }
+    for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32)
+        __asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory");
+    if (misal) {
+        ((unsigned long *) blocks)[188] = 0L;
+        ((unsigned long *) blocks)[189] = 0L;
+        ((unsigned long *) blocks)[190] = 0L;
+        ((unsigned long *) blocks)[191] = 0L;
+        i += 16;
+    }
+}
+
+/* Same as above, when dcbzl clears a whole 128 bytes cache line
+ * i.e. the PPC970 AKA G5. */
+static void clear_blocks_dcbz128_ppc(int16_t *blocks)
+{
+#if HAVE_DCBZL
+    register int misal = (unsigned long) blocks & 0x0000007f, i = 0;
+
+    if (misal) {
+        /* We could probably also optimize this case,
+         * but there's not much point as the machines
+         * aren't available yet (2003-06-26). */
+        memset(blocks, 0, sizeof(int16_t) * 6 * 64);
+    } else {
+        for (; i < sizeof(int16_t) * 6 * 64; i += 128)
+            __asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory");
+    }
+#else
+    memset(blocks, 0, sizeof(int16_t) * 6 * 64);
+#endif
+}
+
+/* Check dcbz report how many bytes are set to 0 by dcbz. */
+/* update 24/06/2003: Replace dcbz by dcbzl to get the intended effect
+ * (Apple "fixed" dcbz). Unfortunately this cannot be used unless the
+ * assembler knows about dcbzl ... */
+static long check_dcbzl_effect(void)
+{
+    long count = 0;
+#if HAVE_DCBZL
+    register char *fakedata = av_malloc(1024);
+    register char *fakedata_middle;
+    register long zero = 0, i = 0;
+
+    if (!fakedata)
+        return 0L;
+
+    fakedata_middle = fakedata + 512;
+
+    memset(fakedata, 0xFF, 1024);
+
+    /* Below the constraint "b" seems to mean "address base register"
+     * in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */
+    __asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero));
+
+    for (i = 0; i < 1024; i++)
+        if (fakedata[i] == (char) 0)
+            count++;
+
+    av_free(fakedata);
+#endif
+
+    return count;
+}
+
+#if HAVE_ALTIVEC
+static void clear_block_altivec(int16_t *block)
+{
+    LOAD_ZERO;
+    vec_st(zero_s16v,   0, block);
+    vec_st(zero_s16v,  16, block);
+    vec_st(zero_s16v,  32, block);
+    vec_st(zero_s16v,  48, block);
+    vec_st(zero_s16v,  64, block);
+    vec_st(zero_s16v,  80, block);
+    vec_st(zero_s16v,  96, block);
+    vec_st(zero_s16v, 112, block);
+}
+#endif /* HAVE_ALTIVEC */
+
+av_cold void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth)
+{
+    // common optimizations whether AltiVec is available or not
+    if (!high_bit_depth) {
+        switch (check_dcbzl_effect()) {
+        case 32:
+            c->clear_blocks = clear_blocks_dcbz32_ppc;
+            break;
+        case 128:
+            c->clear_blocks = clear_blocks_dcbz128_ppc;
+            break;
+        default:
+            break;
+        }
+    }
+
+#if HAVE_ALTIVEC
+    if (!PPC_ALTIVEC(av_get_cpu_flags()))
+        return;
+
+    if (!high_bit_depth)
+        c->clear_block = clear_block_altivec;
+#endif /* HAVE_ALTIVEC */
+}
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c

index 442be6c..c3f90e9 100644 (file)
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -558,19 +558,6 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
      }
  }
  
-static void clear_block_altivec(int16_t *block)
-{
-    LOAD_ZERO;
-    vec_st(zero_s16v,   0, block);
-    vec_st(zero_s16v,  16, block);
-    vec_st(zero_s16v,  32, block);
-    vec_st(zero_s16v,  48, block);
-    vec_st(zero_s16v,  64, block);
-    vec_st(zero_s16v,  80, block);
-    vec_st(zero_s16v,  96, block);
-    vec_st(zero_s16v, 112, block);
-}
-
  static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst,
                                       uint8_t *src, int stride, int h)
  {
@@ -931,7 +918,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
  
      if (!high_bit_depth) {
          c->get_pixels = get_pixels_altivec;
-        c->clear_block = clear_block_altivec;
      }
  
      c->hadamard8_diff[0] = hadamard8_diff16_altivec;
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c

index 698f545..b92fbf0 100644 (file)
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -24,124 +24,14 @@
  
  #include "libavutil/attributes.h"
  #include "libavutil/cpu.h"
-#include "libavutil/mem.h"
  #include "libavutil/ppc/cpu.h"
  #include "libavcodec/avcodec.h"
  #include "libavcodec/dsputil.h"
  #include "dsputil_altivec.h"
  
-/* ***** WARNING ***** WARNING ***** WARNING ***** */
-/*
- * clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with
- * a cache line size not equal to 32 bytes. Fortunately all processors used
- * by Apple up to at least the 7450 (AKA second generation G4) use 32-byte
- * cache lines. This is due to the use of the 'dcbz' instruction. It simply
- * clears a single cache line to zero, so you need to know the cache line
- * size to use it! It's absurd, but it's fast...
- *
- * update 24/06/2003: Apple released the G5 yesterday, with a PPC970.
- * cache line size: 128 bytes. Oups.
- * The semantics of dcbz was changed, it always clears 32 bytes. So the function
- * below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
- * which is defined to clear a cache line (as dcbz before). So we can still
- * distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
- *
- * see <http://developer.apple.com/technotes/tn/tn2087.html>
- * and <http://developer.apple.com/technotes/tn/tn2086.html>
- */
-static void clear_blocks_dcbz32_ppc(int16_t *blocks)
-{
-    register int misal = (unsigned long) blocks & 0x00000010, i = 0;
-
-    if (misal) {
-        ((unsigned long *) blocks)[0] = 0L;
-        ((unsigned long *) blocks)[1] = 0L;
-        ((unsigned long *) blocks)[2] = 0L;
-        ((unsigned long *) blocks)[3] = 0L;
-        i += 16;
-    }
-    for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32)
-        __asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory");
-    if (misal) {
-        ((unsigned long *) blocks)[188] = 0L;
-        ((unsigned long *) blocks)[189] = 0L;
-        ((unsigned long *) blocks)[190] = 0L;
-        ((unsigned long *) blocks)[191] = 0L;
-        i += 16;
-    }
-}
-
-/* Same as above, when dcbzl clears a whole 128 bytes cache line
- * i.e. the PPC970 AKA G5. */
-static void clear_blocks_dcbz128_ppc(int16_t *blocks)
-{
-#if HAVE_DCBZL
-    register int misal = (unsigned long) blocks & 0x0000007f, i = 0;
-
-    if (misal) {
-        /* We could probably also optimize this case,
-         * but there's not much point as the machines
-         * aren't available yet (2003-06-26). */
-        memset(blocks, 0, sizeof(int16_t) * 6 * 64);
-    } else {
-        for (; i < sizeof(int16_t) * 6 * 64; i += 128)
-            __asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory");
-    }
-#else
-    memset(blocks, 0, sizeof(int16_t) * 6 * 64);
-#endif
-}
-
-/* Check dcbz report how many bytes are set to 0 by dcbz. */
-/* update 24/06/2003: Replace dcbz by dcbzl to get the intended effect
- * (Apple "fixed" dcbz). Unfortunately this cannot be used unless the
- * assembler knows about dcbzl ... */
-static long check_dcbzl_effect(void)
-{
-    long count = 0;
-#if HAVE_DCBZL
-    register char *fakedata = av_malloc(1024);
-    register char *fakedata_middle;
-    register long zero = 0, i = 0;
-
-    if (!fakedata)
-        return 0L;
-
-    fakedata_middle = fakedata + 512;
-
-    memset(fakedata, 0xFF, 1024);
-
-    /* Below the constraint "b" seems to mean "address base register"
-     * in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */
-    __asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero));
-
-    for (i = 0; i < 1024; i++)
-        if (fakedata[i] == (char) 0)
-            count++;
-
-    av_free(fakedata);
-#endif
-
-    return count;
-}
-
  av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx,
                                   unsigned high_bit_depth)
  {
-    // common optimizations whether AltiVec is available or not
-    if (!high_bit_depth) {
-        switch (check_dcbzl_effect()) {
-        case 32:
-            c->clear_blocks = clear_blocks_dcbz32_ppc;
-            break;
-        case 128:
-            c->clear_blocks = clear_blocks_dcbz128_ppc;
-            break;
-        default:
-            break;
-        }
-    }
-
      if (PPC_ALTIVEC(av_get_cpu_flags())) {
          ff_dsputil_init_altivec(c, avctx, high_bit_depth);
          ff_int_init_altivec(c, avctx);
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c

index c8a195c..c36b249 100644 (file)
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -3019,7 +3019,7 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n,
      int scale;
      int q1, q2 = 0;
  
-    s->dsp.clear_block(block);
+    s->bdsp.clear_block(block);
  
      /* XXX: Guard against dumb values of mquant */
      mquant = (mquant < 1) ? 0 : ((mquant > 31) ? 31 : mquant);
@@ -3226,7 +3226,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
      int ttblk = ttmb & 7;
      int pat = 0;
  
-    s->dsp.clear_block(block);
+    s->bdsp.clear_block(block);
  
      if (ttmb == -1) {
          ttblk = ff_vc1_ttblk_to_tt[v->tt_index][get_vlc2(gb, ff_vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)];
@@ -4797,7 +4797,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
              dst[3] = dst[2] + 8;
              dst[4] = s->dest[1];
              dst[5] = s->dest[2];
-            s->dsp.clear_blocks(s->block[0]);
+            s->bdsp.clear_blocks(s->block[0]);
              mb_pos = s->mb_x + s->mb_y * s->mb_width;
              s->current_picture.mb_type[mb_pos]                     = MB_TYPE_INTRA;
              s->current_picture.qscale_table[mb_pos]                = v->pq;
@@ -4937,7 +4937,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
          for (;s->mb_x < s->mb_width; s->mb_x++) {
              int16_t (*block)[64] = v->block[v->cur_blk_idx];
              ff_update_block_index(s);
-            s->dsp.clear_blocks(block[0]);
+            s->bdsp.clear_blocks(block[0]);
              mb_pos = s->mb_x + s->mb_y * s->mb_stride;
              s->current_picture.mb_type[mb_pos + v->mb_off]                         = MB_TYPE_INTRA;
              s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0;
@@ -5603,6 +5603,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
  
      if (ff_vc1_init_common(v) < 0)
          return -1;
+    ff_blockdsp_init(&s->bdsp, avctx);
      ff_h264chroma_init(&v->h264chroma, 8);
      ff_qpeldsp_init(&s->qdsp);
      ff_vc1dsp_init(&v->vc1dsp);
diff --git a/libavcodec/wmv2.c b/libavcodec/wmv2.c

index 003f022..bd799d0 100644 (file)
--- a/libavcodec/wmv2.c
+++ b/libavcodec/wmv2.c
@@ -28,6 +28,7 @@
  av_cold void ff_wmv2_common_init(Wmv2Context * w){
      MpegEncContext * const s= &w->s;
  
+    ff_blockdsp_init(&s->bdsp, s->avctx);
      ff_wmv2dsp_init(&w->wdsp);
      s->dsp.idct_permutation_type = w->wdsp.idct_perm;
      ff_init_scantable_permutation(s->dsp.idct_permutation,
@@ -60,12 +61,12 @@ static void wmv2_add_block(Wmv2Context *w, int16_t *block1, uint8_t *dst, int st
      case 1:
          ff_simple_idct84_add(dst           , stride, block1);
          ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]);
-        s->dsp.clear_block(w->abt_block2[n]);
+        s->bdsp.clear_block(w->abt_block2[n]);
          break;
      case 2:
          ff_simple_idct48_add(dst           , stride, block1);
          ff_simple_idct48_add(dst + 4       , stride, w->abt_block2[n]);
-        s->dsp.clear_block(w->abt_block2[n]);
+        s->bdsp.clear_block(w->abt_block2[n]);
          break;
      default:
          av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n");
diff --git a/libavcodec/wmv2dec.c b/libavcodec/wmv2dec.c

index 366aa1f..4ebc801 100644 (file)
--- a/libavcodec/wmv2dec.c
+++ b/libavcodec/wmv2dec.c
@@ -385,7 +385,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64])
          wmv2_pred_motion(w, &mx, &my);
  
          if(cbp){
-            s->dsp.clear_blocks(s->block[0]);
+            s->bdsp.clear_blocks(s->block[0]);
              if(s->per_mb_rl_table){
                  s->rl_table_index = decode012(&s->gb);
                  s->rl_chroma_table_index = s->rl_table_index;
@@ -431,7 +431,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64])
              s->rl_chroma_table_index = s->rl_table_index;
          }
  
-        s->dsp.clear_blocks(s->block[0]);
+        s->bdsp.clear_blocks(s->block[0]);
          for (i = 0; i < 6; i++) {
              if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
              {
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile

index 5fddf3f..222a0ff 100644 (file)
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -44,6 +44,7 @@ OBJS-$(CONFIG_VP7_DECODER)             += x86/vp8dsp_init.o
  OBJS-$(CONFIG_VP8_DECODER)             += x86/vp8dsp_init.o
  OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o
  
+MMX-OBJS-$(CONFIG_BLOCKDSP)            += x86/blockdsp_mmx.o
  MMX-OBJS-$(CONFIG_DSPUTIL)             += x86/dsputil_mmx.o             \
                                            x86/idct_mmx_xvid.o           \
                                            x86/idct_sse2_xvid.o          \
diff --git a/libavcodec/x86/blockdsp_mmx.c b/libavcodec/x86/blockdsp_mmx.c

new file mode 100644 (file)

index 0000000..b529424
--- /dev/null
+++ b/libavcodec/x86/blockdsp_mmx.c
@@ -0,0 +1,120 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/blockdsp.h"
+#include "libavcodec/version.h"
+
+#if HAVE_INLINE_ASM
+
+#define CLEAR_BLOCKS(name, n)                           \
+static void name(int16_t *blocks)                       \
+{                                                       \
+    __asm__ volatile (                                  \
+        "pxor %%mm7, %%mm7              \n\t"           \
+        "mov     %1,        %%"REG_a"   \n\t"           \
+        "1:                             \n\t"           \
+        "movq %%mm7,   (%0, %%"REG_a")  \n\t"           \
+        "movq %%mm7,  8(%0, %%"REG_a")  \n\t"           \
+        "movq %%mm7, 16(%0, %%"REG_a")  \n\t"           \
+        "movq %%mm7, 24(%0, %%"REG_a")  \n\t"           \
+        "add    $32, %%"REG_a"          \n\t"           \
+        "js      1b                     \n\t"           \
+        :: "r"(((uint8_t *) blocks) + 128 * n),         \
+           "i"(-128 * n)                                \
+        : "%"REG_a);                                    \
+}
+CLEAR_BLOCKS(clear_blocks_mmx, 6)
+CLEAR_BLOCKS(clear_block_mmx, 1)
+
+static void clear_block_sse(int16_t *block)
+{
+    __asm__ volatile (
+        "xorps  %%xmm0, %%xmm0          \n"
+        "movaps %%xmm0,    (%0)         \n"
+        "movaps %%xmm0,  16(%0)         \n"
+        "movaps %%xmm0,  32(%0)         \n"
+        "movaps %%xmm0,  48(%0)         \n"
+        "movaps %%xmm0,  64(%0)         \n"
+        "movaps %%xmm0,  80(%0)         \n"
+        "movaps %%xmm0,  96(%0)         \n"
+        "movaps %%xmm0, 112(%0)         \n"
+        :: "r" (block)
+        : "memory");
+}
+
+static void clear_blocks_sse(int16_t *blocks)
+{
+    __asm__ volatile (
+        "xorps  %%xmm0, %%xmm0              \n"
+        "mov        %1,         %%"REG_a"   \n"
+        "1:                                 \n"
+        "movaps %%xmm0,    (%0, %%"REG_a")  \n"
+        "movaps %%xmm0,  16(%0, %%"REG_a")  \n"
+        "movaps %%xmm0,  32(%0, %%"REG_a")  \n"
+        "movaps %%xmm0,  48(%0, %%"REG_a")  \n"
+        "movaps %%xmm0,  64(%0, %%"REG_a")  \n"
+        "movaps %%xmm0,  80(%0, %%"REG_a")  \n"
+        "movaps %%xmm0,  96(%0, %%"REG_a")  \n"
+        "movaps %%xmm0, 112(%0, %%"REG_a")  \n"
+        "add      $128,         %%"REG_a"   \n"
+        "js         1b                      \n"
+        :: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6)
+        : "%"REG_a);
+}
+
+#endif /* HAVE_INLINE_ASM */
+
+#if FF_API_XVMC
+av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
+                                  AVCodecContext *avctx)
+#else
+av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth)
+#endif /* FF_API_XVMC */
+{
+#if HAVE_INLINE_ASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (!high_bit_depth) {
+        if (INLINE_MMX(cpu_flags)) {
+            c->clear_block  = clear_block_mmx;
+            c->clear_blocks = clear_blocks_mmx;
+        }
+
+#if FF_API_XVMC
+FF_DISABLE_DEPRECATION_WARNINGS
+    /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
+    if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)
+        return;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif /* FF_API_XVMC */
+
+        if (INLINE_SSE(cpu_flags)) {
+            c->clear_block  = clear_block_sse;
+            c->clear_blocks = clear_blocks_sse;
+        }
+    }
+#endif /* HAVE_INLINE_ASM */
+}
diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c

index 389e763..a19b83d 100644 (file)
--- a/libavcodec/x86/dsputil_init.c
+++ b/libavcodec/x86/dsputil_init.c
@@ -19,12 +19,10 @@
  #include "config.h"
  #include "libavutil/attributes.h"
  #include "libavutil/cpu.h"
-#include "libavutil/internal.h"
  #include "libavutil/x86/cpu.h"
  #include "libavcodec/avcodec.h"
  #include "libavcodec/dsputil.h"
  #include "libavcodec/simple_idct.h"
-#include "libavcodec/version.h"
  #include "dsputil_x86.h"
  #include "idct_xvid.h"
  
@@ -54,8 +52,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
      c->add_pixels_clamped        = ff_add_pixels_clamped_mmx;
  
      if (!high_bit_depth) {
-        c->clear_block  = ff_clear_block_mmx;
-        c->clear_blocks = ff_clear_blocks_mmx;
          c->draw_edges   = ff_draw_edges_mmx;
  
          switch (avctx->idct_algo) {
@@ -103,19 +99,6 @@ static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
  {
  #if HAVE_SSE_INLINE
      c->vector_clipf = ff_vector_clipf_sse;
-
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
-    /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
-    if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)
-        return;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
-
-    if (!high_bit_depth) {
-        c->clear_block  = ff_clear_block_sse;
-        c->clear_blocks = ff_clear_blocks_sse;
-    }
  #endif /* HAVE_SSE_INLINE */
  }
  
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c

index c17f8d0..fd74efe 100644 (file)
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -166,62 +166,6 @@ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
      } while (--i);
  }
  
-#define CLEAR_BLOCKS(name, n)                           \
-void name(int16_t *blocks)                              \
-{                                                       \
-    __asm__ volatile (                                  \
-        "pxor %%mm7, %%mm7              \n\t"           \
-        "mov     %1,        %%"REG_a"   \n\t"           \
-        "1:                             \n\t"           \
-        "movq %%mm7,   (%0, %%"REG_a")  \n\t"           \
-        "movq %%mm7,  8(%0, %%"REG_a")  \n\t"           \
-        "movq %%mm7, 16(%0, %%"REG_a")  \n\t"           \
-        "movq %%mm7, 24(%0, %%"REG_a")  \n\t"           \
-        "add    $32, %%"REG_a"          \n\t"           \
-        "js      1b                     \n\t"           \
-        :: "r"(((uint8_t *) blocks) + 128 * n),         \
-           "i"(-128 * n)                                \
-        : "%"REG_a);                                    \
-}
-CLEAR_BLOCKS(ff_clear_blocks_mmx, 6)
-CLEAR_BLOCKS(ff_clear_block_mmx, 1)
-
-void ff_clear_block_sse(int16_t *block)
-{
-    __asm__ volatile (
-        "xorps  %%xmm0, %%xmm0          \n"
-        "movaps %%xmm0,    (%0)         \n"
-        "movaps %%xmm0,  16(%0)         \n"
-        "movaps %%xmm0,  32(%0)         \n"
-        "movaps %%xmm0,  48(%0)         \n"
-        "movaps %%xmm0,  64(%0)         \n"
-        "movaps %%xmm0,  80(%0)         \n"
-        "movaps %%xmm0,  96(%0)         \n"
-        "movaps %%xmm0, 112(%0)         \n"
-        :: "r" (block)
-        : "memory");
-}
-
-void ff_clear_blocks_sse(int16_t *blocks)
-{
-    __asm__ volatile (
-        "xorps  %%xmm0, %%xmm0              \n"
-        "mov        %1,         %%"REG_a"   \n"
-        "1:                                 \n"
-        "movaps %%xmm0,    (%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  16(%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  32(%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  48(%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  64(%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  80(%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  96(%0, %%"REG_a")  \n"
-        "movaps %%xmm0, 112(%0, %%"REG_a")  \n"
-        "add      $128,         %%"REG_a"   \n"
-        "js         1b                      \n"
-        :: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6)
-        : "%"REG_a);
-}
-
  /* Draw the edges of width 'w' of an image of size width, height
   * this MMX version can only handle w == 8 || w == 16. */
  void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h

index a4bc8c2..e99b6b7 100644 (file)
--- a/libavcodec/x86/dsputil_x86.h
+++ b/libavcodec/x86/dsputil_x86.h
@@ -38,11 +38,6 @@ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
  void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
                                        int line_size);
  
-void ff_clear_block_mmx(int16_t *block);
-void ff_clear_block_sse(int16_t *block);
-void ff_clear_blocks_mmx(int16_t *blocks);
-void ff_clear_blocks_sse(int16_t *blocks);
-
  void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
                         int w, int h, int sides);
author	Diego Biurrun <diego@biurrun.de>
	Tue, 14 Jan 2014 09:33:47 +0000 (10:33 +0100)
committer	Diego Biurrun <diego@biurrun.de>
	Wed, 18 Jun 2014 21:07:23 +0000 (14:07 -0700)
configure		patch \| blob \| history
libavcodec/4xm.c		patch \| blob \| history
libavcodec/Makefile		patch \| blob \| history
libavcodec/arm/Makefile		patch \| blob \| history
libavcodec/arm/blockdsp_arm.h	[new file with mode: 0644]	patch \| blob
libavcodec/arm/blockdsp_init_arm.c	[new file with mode: 0644]	patch \| blob
libavcodec/arm/blockdsp_init_neon.c	[new file with mode: 0644]	patch \| blob
libavcodec/arm/blockdsp_neon.S	[new file with mode: 0644]	patch \| blob
libavcodec/arm/dsputil_init_neon.c		patch \| blob \| history
libavcodec/arm/dsputil_neon.S		patch \| blob \| history
libavcodec/asv.h		patch \| blob \| history
libavcodec/asvdec.c		patch \| blob \| history
libavcodec/bink.c		patch \| blob \| history
libavcodec/blockdsp.c	[new file with mode: 0644]	patch \| blob
libavcodec/blockdsp.h	[new file with mode: 0644]	patch \| blob
libavcodec/cavs.c		patch \| blob \| history
libavcodec/cavs.h		patch \| blob \| history
libavcodec/cavsdec.c		patch \| blob \| history
libavcodec/dnxhddec.c		patch \| blob \| history
libavcodec/dnxhdenc.c		patch \| blob \| history
libavcodec/dnxhdenc.h		patch \| blob \| history
libavcodec/dsputil.c		patch \| blob \| history
libavcodec/dsputil.h		patch \| blob \| history
libavcodec/eamad.c		patch \| blob \| history
libavcodec/eatqi.c		patch \| blob \| history
libavcodec/g2meet.c		patch \| blob \| history
libavcodec/h261dec.c		patch \| blob \| history
libavcodec/h263.h		patch \| blob \| history
libavcodec/intrax8.c		patch \| blob \| history
libavcodec/ituh263dec.c		patch \| blob \| history
libavcodec/jvdec.c		patch \| blob \| history
libavcodec/mdec.c		patch \| blob \| history
libavcodec/mimic.c		patch \| blob \| history
libavcodec/mjpegdec.c		patch \| blob \| history
libavcodec/mjpegdec.h		patch \| blob \| history
libavcodec/mpeg12dec.c		patch \| blob \| history
libavcodec/mpeg4videodec.c		patch \| blob \| history
libavcodec/mpeg4videoenc.c		patch \| blob \| history
libavcodec/mpegvideo.c		patch \| blob \| history
libavcodec/mpegvideo.h		patch \| blob \| history
libavcodec/msmpeg4dec.c		patch \| blob \| history
libavcodec/ppc/Makefile		patch \| blob \| history
libavcodec/ppc/blockdsp.c	[new file with mode: 0644]	patch \| blob
libavcodec/ppc/dsputil_altivec.c		patch \| blob \| history
libavcodec/ppc/dsputil_ppc.c		patch \| blob \| history
libavcodec/vc1dec.c		patch \| blob \| history
libavcodec/wmv2.c		patch \| blob \| history
libavcodec/wmv2dec.c		patch \| blob \| history
libavcodec/x86/Makefile		patch \| blob \| history
libavcodec/x86/blockdsp_mmx.c	[new file with mode: 0644]	patch \| blob
libavcodec/x86/dsputil_init.c		patch \| blob \| history
libavcodec/x86/dsputil_mmx.c		patch \| blob \| history
libavcodec/x86/dsputil_x86.h		patch \| blob \| history