1 ; vim:filetype=nasm ts=8
3 ; libFLAC - Free Lossless Audio Codec library
4 ; Copyright (C) 2001-2009 Josh Coalson
5 ; Copyright (C) 2011-2013 Xiph.Org Foundation
7 ; Redistribution and use in source and binary forms, with or without
8 ; modification, are permitted provided that the following conditions
11 ; - Redistributions of source code must retain the above copyright
12 ; notice, this list of conditions and the following disclaimer.
14 ; - Redistributions in binary form must reproduce the above copyright
15 ; notice, this list of conditions and the following disclaimer in the
16 ; documentation and/or other materials provided with the distribution.
18 ; - Neither the name of the Xiph.org Foundation nor the names of its
19 ; contributors may be used to endorse or promote products derived from
20 ; this software without specific prior written permission.
22 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 ; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
26 ; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 ; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 ; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 ; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 ; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 ; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 ; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 cglobal precompute_partition_info_sums_32bit_asm_ia32_
43 ; **********************************************************************
45 ; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
46 ; void precompute_partition_info_sums_32bit_(
47 ; const FLAC__int32 residual[],
48 ; FLAC__uint64 abs_residual_partition_sums[],
50 ; unsigned predictor_order,
51 ; unsigned min_partition_order,
52 ; unsigned max_partition_order
56 cident precompute_partition_info_sums_32bit_asm_ia32_
58 ;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time
59 ;; [esp + 4] const FLAC__int32 residual[]
60 ;; [esp + 8] FLAC__uint64 abs_residual_partition_sums[]
61 ;; [esp + 12] unsigned blocksize
62 ;; [esp + 16] unsigned predictor_order
63 ;; [esp + 20] unsigned min_partition_order
64 ;; [esp + 24] unsigned max_partition_order
70 ;; [esp + 28] const FLAC__int32 residual[]
71 ;; [esp + 32] FLAC__uint64 abs_residual_partition_sums[]
72 ;; [esp + 36] unsigned blocksize
73 ;; [esp + 40] unsigned predictor_order
74 ;; [esp + 44] unsigned min_partition_order
75 ;; [esp + 48] unsigned max_partition_order
77 ;; [esp + 4] default_partition_samples
82 mov [esp], eax ; [esp] <- partitions = 1u << max_partition_order;
85 mov [esp + 4], eax ; [esp + 4] <- default_partition_samples = blocksize >> max_partition_order;
88 ; first do max_partition_order
91 sub edi, [esp + 40] ; edi <- end = (unsigned)(-(int)predictor_order) + default_partition_samples
92 xor esi, esi ; esi <- residual_sample = 0
93 xor ecx, ecx ; ecx <- partition = 0
94 mov ebp, [esp + 28] ; ebp <- residual[]
95 xor ebx, ebx ; ebx <- abs_residual_partition_sum = 0;
96 ; note we put the updates to 'end' and 'abs_residual_partition_sum' at the end of loop0 and in the initialization above so we could align loop0 and loop1
98 .loop0: ; for(partition = residual_sample = 0; partition < partitions; partition++) {
99 .loop1: ; for( ; residual_sample < end; residual_sample++)
100 mov eax, [ebp + esi * 4]
104 add ebx, eax ; abs_residual_partition_sum += abs(residual[residual_sample]);
105 ;@@@@@@ check overflow flag and abort here?
107 cmp esi, edi ; /* since the loop will always run at least once, we can put the loop check down here */
110 add edi, [esp + 4] ; end += default_partition_samples;
112 mov [eax + ecx * 8], ebx ; abs_residual_partition_sums[partition] = abs_residual_partition_sum;
113 mov [eax + ecx * 8 + 4], dword 0
114 xor ebx, ebx ; abs_residual_partition_sum = 0;
116 cmp ecx, [esp] ; /* since the loop will always run at least once, we can put the loop check down here */
120 ; now merge partitions for lower orders
122 mov esi, [esp + 32] ; esi <- abs_residual_partition_sums[from_partition==0];
124 lea edi, [esi + eax * 8] ; edi <- abs_residual_partition_sums[to_partition==partitions];
126 sub ecx, byte 1 ; ecx <- partition_order = (int)max_partition_order - 1;
128 .loop2: ; for(; partition_order >= (int)min_partition_order; partition_order--) {
132 shl edx, cl ; const unsigned partitions = 1u << partition_order;
134 .loop3: ; for(i = 0; i < partitions; i++) {
140 mov [edi + 4], ebx ; a_r_p_s[to_partition] = a_r_p_s[from_partition] + a_r_p_s[from_partition+1];