framework for assembly version of the detokenizer
authorJohann <johannkoenig@google.com>
Thu, 12 Aug 2010 13:05:37 +0000 (09:05 -0400)
committerJohann <johannkoenig@google.com>
Thu, 12 Aug 2010 20:39:56 +0000 (16:39 -0400)
adds a compile time option: --enable-arm-asm-detok which pulls in
vp8/decoder/arm/detokenize.asm

currently about break even speed wise, but changes are pending to
the fill code (branch and load 3 bytes versus conditionally always
load one) and the error handling. Currently it doesn't handle zero
runs or overrunning the buffer.

this is really just so i don't have to rebase my changes all the
time to run benchmarks - now just need to replace one file!

Change-Id: I56d0e2354dc0ca3811bffd0e88fe1f952fa6c797

configure
vp8/decoder/arm/detokenize.asm [new file with mode: 0644]
vp8/decoder/arm/detokenize_arm.h [new file with mode: 0644]
vp8/decoder/detokenize.c
vp8/decoder/detokenize.h
vp8/decoder/onyxd_if.c
vp8/vp8dx_arm.mk

index 5c908d4..ac3d162 100755 (executable)
--- a/configure
+++ b/configure
@@ -38,6 +38,7 @@ Advanced options:
   ${toggle_realtime_only}         enable this option while building for real-time encoding
   ${toggle_runtime_cpu_detect}    runtime cpu detection
   ${toggle_shared}                shared library support
+  ${toggle_arm_asm_detok}         assembly version of the detokenizer (ARM platforms only)
 
 Codecs:
   Codecs can be selectively enabled or disabled individually, or by family:
@@ -242,6 +243,7 @@ CONFIG_LIST="
     spatial_resampling
     realtime_only
     shared
+    arm_asm_detok
 "
 CMDLINE_SELECT="
     extra_warnings
@@ -278,6 +280,7 @@ CMDLINE_SELECT="
     spatial_resampling
     realtime_only
     shared
+    arm_asm_detok
 "
 
 process_cmdline() {
diff --git a/vp8/decoder/arm/detokenize.asm b/vp8/decoder/arm/detokenize.asm
new file mode 100644 (file)
index 0000000..bafacb9
--- /dev/null
@@ -0,0 +1,333 @@
+;
+;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_decode_mb_tokens_v6|
+
+    AREA    |.text|, CODE, READONLY  ; name this block of code
+
+    INCLUDE vpx_asm_offsets.asm
+
+l_qcoeff    EQU     0
+l_i         EQU     4
+l_type      EQU     8
+l_stop      EQU     12
+l_c         EQU     16
+l_l_ptr     EQU     20
+l_a_ptr     EQU     24
+l_bc        EQU     28
+l_coef_ptr  EQU     32
+l_stacksize EQU     64
+
+
+;; constant offsets -- these should be created at build time
+c_onyxblock2left_offset      EQU 25
+c_onyxblock2above_offset     EQU 50
+c_entropy_nodes              EQU 11
+c_dct_eob_token              EQU 11
+
+|vp8_decode_mb_tokens_v6| PROC
+    stmdb       sp!, {r4 - r11, lr}
+    sub         sp, sp, #l_stacksize
+    mov         r7, r1                      ; type
+    mov         r9, r0                      ; detoken
+
+    ldr         r1, [r9, #detok_current_bc]
+    ldr         r0, [r9, #detok_qcoeff_start_ptr]
+    mov         r11, #0                     ; i
+    mov         r3, #0x10                   ; stop
+
+    cmp         r7, #1                      ; type ?= 1
+    addeq       r11, r11, #24               ; i = 24
+    addeq       r3, r3, #8                  ; stop = 24
+    addeq       r0, r0, #3, 24              ; qcoefptr += 24*16 ?CHECKME
+
+    str         r0, [sp, #l_qcoeff]
+    str         r11, [sp, #l_i]
+    str         r7, [sp, #l_type]
+    str         r3, [sp, #l_stop]
+    str         r1, [sp, #l_bc]
+
+    add         lr, r9, r7, lsl #2          ; detoken + type*4
+
+    ldr         r8, [r1, #bool_decoder_user_buffer]
+
+    ldr         r10, [lr, #detok_coef_probs] ; coef_probs[type]
+    ldr         r5, [r1, #bool_decoder_count]
+    ldr         r6, [r1, #bool_decoder_range]
+    ldr         r4, [r1, #bool_decoder_value]
+
+    str         r10, [sp, #l_coef_ptr]
+
+    ;align 4
+BLOCK_LOOP
+    ldr         r3, [r9, #detok_ptr_onyxblock2context_leftabove]
+    ldr         r2, [r9, #detok_A]
+    ldr         r1, [r9, #detok_L]
+    ldrb        r12, [r3, r11]!             ; onyxblock2context[i]
+
+    cmp         r7, #0                      ; c = !type
+    moveq       r7, #1
+    movne       r7, #0
+
+    ldr         r0, [r2, r12, lsl #2]       ; A[onyxblock2context[i]]
+    add         r1, r1, r12, lsl #4         ; L + onyxblock2context[i] << 4
+      ; A is ptr to ptr (**)
+      ; L is ptr to data (*[4])
+
+    ldrb        r2, [r3, #c_onyxblock2above_offset] ; + above offset
+    ldrb        r3, [r3, #c_onyxblock2left_offset] ; + left offset
+    mov         lr, #c_entropy_nodes        ; ENTROPY_NODES = 11
+;;  ;++
+
+    ldr         r2, [r0, r2, lsl #2]!       ; A + above offset
+    ldr         r3, [r1, r3, lsl #2]!       ; L + left offset
+; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
+    cmp         r2, #0                      ; *a ?= 0
+    movne       r2, #1                      ; haha if a == 0 no need to set up another var to state that pretty sweet :)
+    cmp         r3, #0                      ; *l ?= 0
+    addne       r2, r2, #1                  ; t
+
+    str         r1, [sp, #l_l_ptr]          ; save &l
+    str         r0, [sp, #l_a_ptr]          ; save &a
+    smlabb      r0, r2, lr, r10             ; Prob = coef_probs + (t * ENTROPY_NODES)
+    mov         r1, #0                      ; t = 0
+    str         r7, [sp, #l_c]
+
+    ;align 4
+COEFF_LOOP
+    ldr         r3, [r9, #detok_ptr_onyx_coef_bands_x]
+    ldr         lr, [r9, #detok_onyx_coef_tree_ptr]
+
+      ; onyx_coef_bands_x is UINT16
+    add         r3, r3, r7, lsl #1            ; coef_bands_x[c]
+    ldrh        r3, [r3]                      ; UINT16
+
+    ;++
+    add         r0, r0, r3                  ; Prob += coef_bands_x[c]
+
+    ;align 4
+get_token_loop
+    ldrb        r2, [r0, +r1, asr #1]       ; Prob[t >> 1]
+    mov         r3, r6, lsl #8              ; range << 8
+    sub         r3, r3, #256                ; (range << 8) - (1 << 8)
+    mov         r10, #1                     ; 1
+
+    smlawb      r2, r3, r2, r10             ; split = 1 + (((range-1) * probability) >> 8)
+
+    ldrb        r12, [r8]                   ; load cx data byte in stall slot : r8 = bufptr
+    ;++
+
+    subs        r3, r4, r2, lsl #24         ; value-(split<<24): used later to calculate shift for NORMALIZE
+    addhs       r1, r1, #1                  ; t += 1
+    movhs       r4, r3                      ; value -= bigsplit (split << 24)
+    subhs       r2, r6, r2                  ; range -= split
+ ;   movlo       r6, r2                      ; range = split
+
+    ldrsb     r1, [lr, r1]                  ; t = onyx_coef_tree_ptr[t]
+
+; NORMALIZE
+    clz         r3, r2                      ; vp8dx_bitreader_norm[range] + 24
+    sub         r3, r3, #24                 ; vp8dx_bitreader_norm[range]
+    subs        r5, r5, r3                  ; count -= shift
+    mov         r6, r2, lsl r3              ; range <<= shift
+    mov         r4, r4, lsl r3              ; value <<= shift
+
+; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
+    addle         r5, r5, #8                ; count += 8
+    rsble         r3, r5, #24               ; 24 - count
+    addle         r8, r8, #1                ; bufptr++
+    orrle         r4, r4, r12, lsl r3       ; value |= *bufptr << shift + 16
+
+    cmp         r1, #0                      ; t ?= 0
+    bgt         get_token_loop              ; while (t > 0)
+
+    cmn         r1, #c_dct_eob_token        ; if(t == -DCT_EOB_TOKEN)
+    beq         END_OF_BLOCK                ; break
+
+    rsb         lr, r1, #0                  ; v = -t;
+
+    cmp         lr, #4                      ; if(v > FOUR_TOKEN)
+    ble         SKIP_EXTRABITS
+
+    ldr         r3, [r9, #detok_teb_base_ptr]
+    mov         r11, #1                     ; 1 in split = 1 + ... nope, v+= 1 << bits_count
+    add         r7, r3, lr, lsl #4          ; detok_teb_base_ptr + (v << 4)
+
+    ldrsh       lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
+    ldrsh       r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
+
+extrabits_loop
+    add         r3, r0, r7                  ; &teb_ptr->Probs[bits_count]
+
+    ldrb        r2, [r3, #4]                ; probability. why +4?
+    mov         r3, r6, lsl #8              ; range << 8
+    sub         r3, r3, #256                ; range << 8 + 1 << 8
+
+    smlawb      r2, r3, r2, r11             ; split = 1 +  (((range-1) * probability) >> 8)
+
+    ldrb        r12, [r8]                   ; *bufptr
+    ;++
+
+    subs        r10, r4, r2, lsl #24        ; value - (split<<24)
+    movhs       r4, r10                     ; value = value - (split << 24)
+    subhs       r2, r6, r2                  ; range = range - split
+    addhs       lr, lr, r11, lsl r0         ; v += ((UINT16)1<<bits_count)
+
+; NORMALIZE
+    clz         r3, r2                      ; shift - leading zeros in split
+    sub         r3, r3, #24                 ; don't count first 3 bytes
+    subs        r5, r5, r3                  ; count -= shift
+    mov         r6, r2, lsl r3              ; range = range << shift
+    mov         r4, r4, lsl r3              ; value <<= shift
+
+    addle       r5, r5, #8                  ; count += BR_COUNT
+    addle       r8, r8, #1                  ; bufptr++
+    rsble       r3, r5, #24                 ; BR_COUNT - count
+    orrle       r4, r4, r12, lsl r3         ; value |= *bufptr << (BR_COUNT - count)
+
+    subs        r0, r0, #1                  ; bits_count --
+    bpl         extrabits_loop
+
+
+SKIP_EXTRABITS
+    ldr         r11, [sp, #l_qcoeff]
+    ldr         r0, [sp, #l_coef_ptr]       ; Prob = coef_probs
+
+    cmp         r1, #0                      ; check for nonzero token - if (t)
+    beq         SKIP_EOB_CHECK              ; if t is zero, we will skip the eob table chec
+
+    add         r3, r6, #1                  ; range + 1
+    mov         r2, r3, lsr #1              ; split = (range + 1) >> 1
+
+    subs        r3, r4, r2, lsl #24         ; value - (split<<24)
+    movhs       r4, r3                      ; value -= (split << 24)
+    subhs       r2, r6, r2                  ; range -= split
+    mvnhs       r3, lr                      ; -v
+    addhs       lr, r3, #1                  ; v = (v ^ -1) + 1
+
+; NORMALIZE
+    clz         r3, r2                      ; leading 0s in split
+    sub         r3, r3, #24                 ; shift
+    subs        r5, r5, r3                  ; count -= shift
+    mov         r6, r2, lsl r3              ; range <<= shift
+    mov         r4, r4, lsl r3              ; value <<= shift
+    ldrleb      r2, [r8], #1                ; *(bufptr++)
+    addle       r5, r5, #8                  ; count += 8
+    rsble       r3, r5, #24                  ; BR_COUNT - count
+    orrle       r4, r4, r2, lsl r3          ; value |= *bufptr << (BR_COUNT - count)
+
+    add         r0, r0, #0xB                ; Prob += ENTROPY_NODES (11)
+
+    cmn         r1, #1                      ; t < -ONE_TOKEN
+
+    addlt       r0, r0, #0xB                ; Prob += ENTROPY_NODES (11)
+
+    mvn         r1, #1                      ; t = -1 ???? C is -2
+
+SKIP_EOB_CHECK
+    ldr         r7, [sp, #l_c]              ; c
+    ldr         r3, [r9, #detok_scan]
+    add         r1, r1, #2                  ; t+= 2
+    cmp         r7, #(0x10 - 1)             ; c should will be one higher
+
+    ldr         r3, [r3, +r7, lsl #2]       ; scan[c] this needs pre-inc c value
+    add         r7, r7, #1                  ; c++
+    add         r3, r11, r3, lsl #1         ; qcoeff + scan[c]
+
+    str         r7, [sp, #l_c]              ; store c
+    strh        lr, [r3]                    ; qcoef_ptr[scan[c]] = v
+
+    blt         COEFF_LOOP
+
+    sub         r7, r7, #1                  ; if(t != -DCT_EOB_TOKEN) --c ; never stored! no condition!
+
+END_OF_BLOCK
+    ldr         r3, [sp, #l_type]           ; type
+    ldr         r10, [sp, #l_coef_ptr]      ; coef_ptr
+    ldr         r0, [sp, #l_qcoeff]         ; qcoeff
+    ldr         r11, [sp, #l_i]             ; i
+    ldr         r12, [sp, #l_stop]          ; stop
+
+    cmp         r3, #0                      ; type ?= 0
+    moveq       r1, #1
+    movne       r1, #0
+    add         r3, r11, r9                 ; detok + i
+
+    cmp         r7, r1                      ; c ?= !type
+    strb        r7, [r3, #detok_eob]        ; eob[i] = c
+
+    ldr         r7, [sp, #l_l_ptr]          ; l
+    ldr         r2, [sp, #l_a_ptr]          ; a
+    movne       r3, #1                      ; t
+    moveq       r3, #0
+
+    add         r0, r0, #0x20               ; qcoeff += 32 (16 * 2?)
+    add         r11, r11, #1                ; i++
+    str         r3, [r7]                    ; *l = t
+    str         r3, [r2]                    ; *a = t
+    str         r0, [sp, #l_qcoeff]         ; qcoeff
+    str         r11, [sp, #l_i]             ; i
+
+    cmp         r11, r12                    ; i >= stop ? VERIFY should be strictly LT(<)?
+    ldr         r7, [sp, #l_type]           ; type
+    mov         lr, #0xB                    ; 11 (ENTORPY_NODES?)
+
+    blt         BLOCK_LOOP
+
+    cmp         r11, #0x19                  ; i ?= 25
+    bne         ln2_decode_mb_to
+
+    ldr         r12, [r9, #detok_qcoeff_start_ptr]
+    ldr         r10, [r9, #detok_coef_probs]
+    mov         r7, #0                      ; type/i = 0
+    mov         r3, #0x10                   ; stop = 0
+    str         r12, [sp, #l_qcoeff]        ; qcoeff_ptr = qcoeff_start_ptr
+    str         r7, [sp, #l_i]
+    str         r7, [sp, #l_type]
+    str         r3, [sp, #l_stop]
+
+    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type] (0)
+
+    b           BLOCK_LOOP
+
+ln2_decode_mb_to
+    cmp         r11, #0x10                  ; i ?= 16
+    bne         ln1_decode_mb_to
+
+    mov         r10, #detok_coef_probs
+    add         r10, r10, #2*4              ; coef_probs[type]
+    ldr         r10, [r9, r10]              ; detok + 48 - THIS IS PROBABLY THE ISSUE: NEW STRUCTURE
+
+    mov         r7, #2                      ; type = 2
+    mov         r3, #0x18                   ; stop = 24
+
+    str         r7, [sp, #l_type]
+    str         r3, [sp, #l_stop]
+
+    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type] - didn't want to add 2 to coef_probs
+    b           BLOCK_LOOP
+
+ln1_decode_mb_to
+    ldr         r2, [sp, #l_bc]
+    mov         r0, #0
+    nop
+
+    str         r8, [r2, #bool_decoder_user_buffer]
+    str         r5, [r2, #bool_decoder_count]
+    str         r4, [r2, #bool_decoder_value]
+    str         r6, [r2, #bool_decoder_range]
+
+    add         sp, sp, #l_stacksize
+    ldmia       sp!, {r4 - r11, pc}
+
+    ENDP  ; |vp8_decode_mb_tokens_v6|
+
+    END
diff --git a/vp8/decoder/arm/detokenize_arm.h b/vp8/decoder/arm/detokenize_arm.h
new file mode 100644 (file)
index 0000000..1c53f7b
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef DETOKENIZE_ARM_H
+#define DETOKENIZE_ARM_H
+
+#if HAVE_ARMV6
+#if CONFIG_ARM_ASM_DETOK
+void vp8_init_detokenizer(VP8D_COMP *dx);
+void vp8_decode_mb_tokens_v6(DETOK *detoken, int type);
+#endif
+#endif
+
+#endif
index 7407417..34faae3 100644 (file)
@@ -14,6 +14,7 @@
 #include "onyxd_int.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
+#include "detokenize.h"
 
 #define BOOL_DATA UINT8
 
@@ -103,6 +104,34 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
         *l = 0;
     }
 }
+
+#if CONFIG_ARM_ASM_DETOK
+DECLARE_ALIGNED(16, const UINT8, vp8_block2context_leftabove[25*3]) =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, //end of vp8_block2context
+    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0, //end of vp8_block2left
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0  //end of vp8_block2above
+};
+
+void vp8_init_detokenizer(VP8D_COMP *dx)
+{
+    const VP8_COMMON *const oc = & dx->common;
+    MACROBLOCKD *x = & dx->mb;
+
+    dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree;
+    dx->detoken.ptr_onyxblock2context_leftabove = vp8_block2context_leftabove;
+    dx->detoken.ptr_onyx_coef_bands_x = vp8_coef_bands_x;
+    dx->detoken.scan = vp8_default_zig_zag1d;
+    dx->detoken.teb_base_ptr = vp8d_token_extra_bits2;
+    dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
+
+    dx->detoken.coef_probs[0] = (oc->fc.coef_probs [0] [ 0 ] [0]);
+    dx->detoken.coef_probs[1] = (oc->fc.coef_probs [1] [ 0 ] [0]);
+    dx->detoken.coef_probs[2] = (oc->fc.coef_probs [2] [ 0 ] [0]);
+    dx->detoken.coef_probs[3] = (oc->fc.coef_probs [3] [ 0 ] [0]);
+}
+#endif
+
 DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
 #define FILL \
     if(count < 0) \
@@ -200,6 +229,35 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
     }\
     NORMALIZE
 
+#if CONFIG_ARM_ASM_DETOK
+int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
+{
+    int eobtotal = 0;
+    int i, type;
+
+    dx->detoken.current_bc = x->current_bc;
+    dx->detoken.A = x->above_context;
+    dx->detoken.L = x->left_context;
+
+    type = 3;
+
+    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+    {
+        type = 1;
+        eobtotal -= 16;
+    }
+
+    vp8_decode_mb_tokens_v6(&dx->detoken, type);
+
+    for (i = 0; i < 25; i++)
+    {
+        x->block[i].eob = dx->detoken.eob[i];
+        eobtotal += dx->detoken.eob[i];
+    }
+
+    return eobtotal;
+}
+#else
 int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
 {
     ENTROPY_CONTEXT **const A = x->above_context;
@@ -395,3 +453,4 @@ BLOCK_FINISHED:
     return eobtotal;
 
 }
+#endif //!CONFIG_ASM_DETOK
index 2f6b4a9..aa98dea 100644 (file)
@@ -9,12 +9,16 @@
  */
 
 
-#ifndef detokenize_h
-#define detokenize_h 1
+#ifndef DETOKENIZE_H
+#define DETOKENIZE_H
 
 #include "onyxd_int.h"
 
+#if ARCH_ARM
+#include "arm/detokenize_arm.h"
+#endif
+
 void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
 int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
 
-#endif /* detokenize_h */
+#endif /* DETOKENIZE_H */
index 728d5ca..5a88ba0 100644 (file)
 #include "vpx_scale/vpxscale.h"
 #include "systemdependent.h"
 #include "vpx_ports/vpx_timer.h"
-
+#include "detokenize.h"
 
 extern void vp8_init_loop_filter(VP8_COMMON *cm);
-
 extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
 
-// DEBUG code
 #if CONFIG_DEBUG
 void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
 {
@@ -129,6 +127,9 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
         cm->last_sharpness_level = cm->sharpness_level;
     }
 
+#if CONFIG_ARM_ASM_DETOK
+    vp8_init_detokenizer(pbi);
+#endif
     pbi->common.error.setjmp = 0;
     return (VP8D_PTR) pbi;
 }
index e9674ca..d40f76e 100644 (file)
 
 #VP8_DX_SRCS list is modified according to different platforms.
 
-#File list for arm
-# decoder
-#VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/decodframe_arm.c
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dequantize_arm.c
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dsystemdependent.c
-
-#VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/decodframe.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/dequantize.c
 VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/generic/dsystemdependent.c
+VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK)  += decoder/arm/detokenize$(ASM)
 
 #File list for armv6
-# decoder
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_dc_idct_v6$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_idct_v6$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantize_v6$(ASM)
 
 #File list for neon
-# decoder
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_dc_idct_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_idct_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantizeb_neon$(ASM)