Store intra4x4_pred_mode per row only.
authorMichael Niedermayer <michaelni@gmx.at>
Thu, 25 Feb 2010 14:02:39 +0000 (14:02 +0000)
committerMichael Niedermayer <michaelni@gmx.at>
Thu, 25 Feb 2010 14:02:39 +0000 (14:02 +0000)
about 5 cpu cycles slower in the local code but should be overall faster
due to reduced cache use. (my sample though has too few intra4x4 blocks
for this to be meassureable easily either way)

Originally committed as revision 22052 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/h264.c
libavcodec/h264.h

index fc39bdb49db146e12b6806d7678d77ff226c461d..7f9b411639ec59fea3246228f12dfa269ed1ac96 100644 (file)
@@ -52,15 +52,15 @@ static const uint8_t div6[52]={
 };
 
 void ff_h264_write_back_intra_pred_mode(H264Context *h){
-    const int mb_xy= h->mb_xy;
-
-    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
-    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
-    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
-    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
-    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
-    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
-    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
+    int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
+
+    mode[0]= h->intra4x4_pred_mode_cache[7+8*1];
+    mode[1]= h->intra4x4_pred_mode_cache[7+8*2];
+    mode[2]= h->intra4x4_pred_mode_cache[7+8*3];
+    mode[3]= h->intra4x4_pred_mode_cache[7+8*4];
+    mode[4]= h->intra4x4_pred_mode_cache[4+8*4];
+    mode[5]= h->intra4x4_pred_mode_cache[5+8*4];
+    mode[6]= h->intra4x4_pred_mode_cache[6+8*4];
 }
 
 /**
index f443554f3ef33fabe259f54f2e55030fd97d9bfd..cded523ed695a182e3a0ba894f91f80c45cc2ac1 100644 (file)
@@ -298,7 +298,7 @@ typedef struct H264Context{
     int topleft_partition;
 
     int8_t intra4x4_pred_mode_cache[5*8];
-    int8_t (*intra4x4_pred_mode)[8];
+    int8_t (*intra4x4_pred_mode);
     H264PredContext hpc;
     unsigned int topleft_samples_available;
     unsigned int top_samples_available;
@@ -886,10 +886,11 @@ static void fill_decode_caches(H264Context *h, int mb_type){
 
             if(IS_INTRA4x4(mb_type)){
                 if(IS_INTRA4x4(top_type)){
-                    h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
-                    h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
-                    h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
-                    h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
+                    int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[top_xy];
+                    h->intra4x4_pred_mode_cache[4+8*0]= mode[4];
+                    h->intra4x4_pred_mode_cache[5+8*0]= mode[5];
+                    h->intra4x4_pred_mode_cache[6+8*0]= mode[6];
+                    h->intra4x4_pred_mode_cache[7+8*0]= mode[3];
                 }else{
                     int pred;
                     if(!(top_type & type_mask))
@@ -904,8 +905,9 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                 }
                 for(i=0; i<2; i++){
                     if(IS_INTRA4x4(left_type[i])){
-                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
-                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
+                        int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]];
+                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[left_block[0+2*i]];
+                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[left_block[1+2*i]];
                     }else{
                         int pred;
                         if(!(left_type[i] & type_mask))