1 ; vim:filetype=nasm ts=8
3 ; libFLAC - Free Lossless Audio Codec library
4 ; Copyright (C) 2001,2002,2003,2004,2005,2006,2007,2008,2009 Josh Coalson
6 ; Redistribution and use in source and binary forms, with or without
7 ; modification, are permitted provided that the following conditions
10 ; - Redistributions of source code must retain the above copyright
11 ; notice, this list of conditions and the following disclaimer.
13 ; - Redistributions in binary form must reproduce the above copyright
14 ; notice, this list of conditions and the following disclaimer in the
15 ; documentation and/or other materials provided with the distribution.
17 ; - Neither the name of the Xiph.org Foundation nor the names of its
18 ; contributors may be used to endorse or promote products derived from
19 ; this software without specific prior written permission.
21 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 ; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
25 ; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 ; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 ; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 ; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 ; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 ; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 ; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 cextern FLAC__crc16_table ; unsigned FLAC__crc16_table[256];
38 cextern bitreader_read_from_client_ ; FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br);
40 cglobal FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
45 ; **********************************************************************
47 ; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
49 ; Some details like assertions and other checking is performed by the caller.
51 cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
54 ;ASSERT(0 != br->buffer);
55 ; WATCHOUT: code only works if sizeof(brword)==32; we can make things much faster with this assertion
56 ;ASSERT(FLAC__BITS_PER_WORD == 32);
57 ;ASSERT(parameter < 32);
58 ; the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it
60 ;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time
61 ;; [esp + 16] unsigned parameter
62 ;; [esp + 12] unsigned nvals
63 ;; [esp + 8] int vals[]
64 ;; [esp + 4] FLAC__BitReader *br
65 mov eax, [esp + 12] ; if(nvals == 0)
68 mov eax, 1 ; return true;
77 ;; [esp + 36] unsigned parameter
78 ;; [esp + 32] unsigned nvals
79 ;; [esp + 28] int vals[]
80 ;; [esp + 24] FLAC__BitReader *br
82 mov ebp, [esp + 24] ; ebp <- br == br->buffer
83 mov esi, [ebp + 16] ; esi <- br->consumed_words (aka 'cwords' in the C version)
84 mov ecx, [ebp + 20] ; ecx <- br->consumed_bits (aka 'cbits' in the C version)
85 xor edi, edi ; edi <- 0 'uval'
91 ;; [ebp + 8] br->words
92 ;; [ebp + 12] br->bytes
93 ;; [ebp + 16] br->consumed_words
94 ;; [ebp + 20] br->consumed_bits
95 ;; [ebp + 24] br->read_crc
96 ;; [ebp + 28] br->crc16_align
98 ; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
99 mov eax, [ebp + 8] ; eax <- br->words
100 sub eax, esi ; eax <- br->words-cwords
101 shl eax, 2 ; eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD
102 add eax, [ebp + 12] ; eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
103 shl eax, 3 ; eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
104 sub eax, ecx ; eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
105 mov [esp], eax ; ucbits <- eax
108 .val_loop: ; while(1) {
113 .unary_loop: ; while(1) {
118 cmp esi, [ebp + 8] ; while(cwords < br->words) /* if we've not consumed up to a partial tail word... */
122 mov eax, [ebx + 4*esi] ; b = br->buffer[cwords]
123 mov edx, eax ; edx = br->buffer[cwords] (saved for later use)
124 shl eax, cl ; b = br->buffer[cwords] << cbits
125 test eax, eax ; (still have to test since cbits may be 0, thus ZF not updated for shl eax,0)
126 jz near .c1_next2 ; if(b) {
129 and ebx, 31 ; ebx = 'i' = # of leading 0 bits in 'b' (eax)
130 add ecx, ebx ; cbits += i;
131 add edi, ebx ; uval += i;
132 add ecx, byte 1 ; cbits++; /* skip over stop bit */
134 jz near .break1 ; if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */
135 ; crc16_update_word_(br, br->buffer[cwords]);
136 push edi ; [need more registers]
137 bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
138 mov ecx, [ebp + 28] ; ecx <- br->crc16_align
139 mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc)
140 %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
141 mov edi, _FLAC__crc16_table
143 %ifdef OBJ_FORMAT_elf
144 mov edi, [esp + 16] ; saved ebx (GOT base)
145 lea edi, [edi + FLAC__crc16_table wrt ..gotoff]
147 mov edi, FLAC__crc16_table
150 ;; eax (ax) crc a.k.a. br->read_crc
151 ;; ebx (bl) intermediate result index into FLAC__crc16_table[]
152 ;; ecx br->crc16_align
153 ;; edx byteswapped brword to CRC
155 ;; edi unsigned FLAC__crc16_table[]
157 test ecx, ecx ; switch(br->crc16_align) ...
158 jnz .c0b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
159 .c0b0: xor dl, ah ; dl <- (crc>>8)^(word>>24)
161 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
162 shl eax, 8 ; ax <- (crc<<8)
163 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
164 .c0b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff))
166 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
167 shl eax, 8 ; ax <- (crc<<8)
168 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
170 .c0b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff))
172 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
173 shl eax, 8 ; ax <- (crc<<8)
174 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
175 .c0b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff)
177 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
178 shl eax, 8 ; ax <- (crc<<8)
179 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
181 mov [ebp + 24], eax ; br->read_crc <- crc
184 add esi, byte 1 ; cwords++;
185 xor ecx, ecx ; cbits = 0;
187 jmp near .break1 ; goto break1;
188 ;; this section relocated out of the way for performance
190 mov [ebp + 28], dword 0 ; br->crc16_align <- 0
198 ;; this section relocated out of the way for performance
200 mov [ebp + 28], dword 0 ; br->crc16_align <- 0
208 .c1_next2: ; } else {
210 ;; edx current brword 'b'
215 sub edi, ecx ; uval += FLAC__BITS_PER_WORD - cbits;
216 ; crc16_update_word_(br, br->buffer[cwords]);
217 push edi ; [need more registers]
218 bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
219 mov ecx, [ebp + 28] ; ecx <- br->crc16_align
220 mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc)
221 %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
222 mov edi, _FLAC__crc16_table
224 %ifdef OBJ_FORMAT_elf
225 mov edi, [esp + 16] ; saved ebx (GOT base)
226 lea edi, [edi + FLAC__crc16_table wrt ..gotoff]
228 mov edi, FLAC__crc16_table
231 ;; eax (ax) crc a.k.a. br->read_crc
232 ;; ebx (bl) intermediate result index into FLAC__crc16_table[]
233 ;; ecx br->crc16_align
234 ;; edx byteswapped brword to CRC
236 ;; edi unsigned FLAC__crc16_table[]
238 test ecx, ecx ; switch(br->crc16_align) ...
239 jnz .c1b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
240 .c1b0: xor dl, ah ; dl <- (crc>>8)^(word>>24)
242 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
243 shl eax, 8 ; ax <- (crc<<8)
244 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
245 .c1b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff))
247 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
248 shl eax, 8 ; ax <- (crc<<8)
249 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
251 .c1b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff))
253 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
254 shl eax, 8 ; ax <- (crc<<8)
255 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
256 .c1b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff)
258 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
259 shl eax, 8 ; ax <- (crc<<8)
260 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
262 mov [ebp + 24], eax ; br->read_crc <- crc
265 add esi, byte 1 ; cwords++;
266 xor ecx, ecx ; cbits = 0;
267 ; /* didn't find stop bit yet, have to keep going... */
270 cmp esi, [ebp + 8] ; } while(cwords < br->words) /* if we've not consumed up to a partial tail word... */
274 ; at this point we've eaten up all the whole words; have to try
275 ; reading through any tail bytes before calling the read callback.
276 ; this is a repeat of the above logic adjusted for the fact we
277 ; don't have a whole word. note though if the client is feeding
278 ; us data a byte at a time (unlikely), br->consumed_bits may not
284 mov edx, [ebp + 12] ; edx <- br->bytes
285 shl edx, 3 ; edx <- br->bytes*8
287 jbe .read1 ; if(br->bytes*8 > cbits) { [NOTE: this case is rare so it doesn't have to be all that fast ]
289 ; edx <- const unsigned end = br->bytes * 8;
290 mov eax, [ebx + 4*esi] ; b = br->buffer[cwords]
291 xchg edx, ecx ; [edx <- cbits , ecx <- end]
292 mov ebx, 0xffffffff ; ebx <- FLAC__WORD_ALL_ONES
293 shr ebx, cl ; ebx <- FLAC__WORD_ALL_ONES >> end
294 not ebx ; ebx <- ~(FLAC__WORD_ALL_ONES >> end)
295 xchg edx, ecx ; [edx <- end , ecx <- cbits]
296 and eax, ebx ; b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end));
297 shl eax, cl ; b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits;
298 test eax, eax ; (still have to test since cbits may be 0, thus ZF not updated for shl eax,0)
299 jz .c1_next3 ; if(b) {
302 and ebx, 31 ; ebx = 'i' = # of leading 0 bits in 'b' (eax)
303 add ecx, ebx ; cbits += i;
304 add edi, ebx ; uval += i;
305 add ecx, byte 1 ; cbits++; /* skip over stop bit */
306 jmp short .break1 ; goto break1;
307 .c1_next3: ; } else {
309 add edi, edx ; uval += end - cbits;
310 mov ecx, edx ; cbits = end
311 ; /* didn't find stop bit yet, have to keep going... */
315 ; flush registers and read; bitreader_read_from_client_() does
316 ; not touch br->consumed_bits at all but we still need to set
317 ; it in case it fails and we have to return false.
322 mov [ebp + 16], esi ; br->consumed_words = cwords;
323 mov [ebp + 20], ecx ; br->consumed_bits = cbits;
324 push ecx ; /* save */
325 push ebp ; /* push br argument */
326 %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
327 call _bitreader_read_from_client_
329 %ifdef OBJ_FORMAT_elf
330 mov ebx, [esp + 20] ; saved ebx (GOT base)
331 call bitreader_read_from_client_ wrt ..plt
333 call bitreader_read_from_client_
336 pop edx ; /* discard, unused */
337 pop ecx ; /* restore */
338 mov esi, [ebp + 16] ; cwords = br->consumed_words;
339 ; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
340 mov ebx, [ebp + 8] ; ebx <- br->words
341 sub ebx, esi ; ebx <- br->words-cwords
342 shl ebx, 2 ; ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD
343 add ebx, [ebp + 12] ; ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
344 shl ebx, 3 ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
345 sub ebx, ecx ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
346 add ebx, edi ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits + uval
347 ; + uval to offset our count by the # of unary bits already
348 ; consumed before the read, because we will add these back
349 ; in all at once at break1
350 mov [esp], ebx ; ucbits <- ebx
351 test eax, eax ; if(!bitreader_read_from_client_(br))
353 jmp .end ; return false; /* eax (the return value) is already 0 */
354 ; } /* end while(1) unary part */
363 sub [esp], edi ; ucbits -= uval;
364 sub dword [esp], byte 1 ; ucbits--; /* account for stop bit */
369 mov ebx, [esp + 36] ; ebx <- parameter
370 test ebx, ebx ; if(parameter) {
373 cmp [esp], ebx ; while(ucbits < parameter) {
375 ; flush registers and read; bitreader_read_from_client_() does
376 ; not touch br->consumed_bits at all but we still need to set
377 ; it in case it fails and we have to return false.
378 mov [ebp + 16], esi ; br->consumed_words = cwords;
379 mov [ebp + 20], ecx ; br->consumed_bits = cbits;
380 push ecx ; /* save */
381 push ebx ; /* save */
382 push ebp ; /* push br argument */
383 %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
384 call _bitreader_read_from_client_
386 %ifdef OBJ_FORMAT_elf
387 mov ebx, [esp + 24] ; saved ebx (GOT base)
388 call bitreader_read_from_client_ wrt ..plt
390 call bitreader_read_from_client_
393 pop edx ; /* discard, unused */
394 pop ebx ; /* restore */
395 pop ecx ; /* restore */
396 mov esi, [ebp + 16] ; cwords = br->consumed_words;
397 ; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
398 mov edx, [ebp + 8] ; edx <- br->words
399 sub edx, esi ; edx <- br->words-cwords
400 shl edx, 2 ; edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD
401 add edx, [ebp + 12] ; edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
402 shl edx, 3 ; edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
403 sub edx, ecx ; edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
404 mov [esp], edx ; ucbits <- edx
405 test eax, eax ; if(!bitreader_read_from_client_(br))
407 jmp .end ; return false; /* eax (the return value) is already 0 */
416 cmp esi, [ebp + 8] ; if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
418 test ecx, ecx ; if(cbits) {
419 jz near .c2_next3 ; /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
422 sub eax, ecx ; const unsigned n = FLAC__BITS_PER_WORD - cbits;
423 mov edx, [edx + 4*esi] ; const brword word = br->buffer[cwords];
424 cmp ebx, eax ; if(parameter < n) {
426 ; uval <<= parameter;
427 ; uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-parameter);
431 add ebx, ecx ; cbits += parameter;
432 xchg ebx, ecx ; ebx <- parameter, ecx <- cbits
433 jmp .break2 ; goto break2;
437 ; uval |= word & (FLAC__WORD_ALL_ONES >> cbits);
439 rol edx, cl ; @@@@@@OPT: may be faster to use rol to save edx so we can restore it for CRC'ing
440 ; @@@@@@OPT: or put parameter in ch instead and free up ebx completely again
448 ror edx, cl ; restored.
451 mov edx, [edx + 4*esi]
453 ; crc16_update_word_(br, br->buffer[cwords]);
454 push edi ; [need more registers]
455 push ebx ; [need more registers]
456 push eax ; [need more registers]
457 bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
458 mov ecx, [ebp + 28] ; ecx <- br->crc16_align
459 mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc)
460 %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
461 mov edi, _FLAC__crc16_table
463 %ifdef OBJ_FORMAT_elf
464 mov edi, [esp + 24] ; saved ebx (GOT base)
465 lea edi, [edi + FLAC__crc16_table wrt ..gotoff]
467 mov edi, FLAC__crc16_table
470 ;; eax (ax) crc a.k.a. br->read_crc
471 ;; ebx (bl) intermediate result index into FLAC__crc16_table[]
472 ;; ecx br->crc16_align
473 ;; edx byteswapped brword to CRC
475 ;; edi unsigned FLAC__crc16_table[]
477 test ecx, ecx ; switch(br->crc16_align) ...
478 jnz .c2b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
479 .c2b0: xor dl, ah ; dl <- (crc>>8)^(word>>24)
481 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
482 shl eax, 8 ; ax <- (crc<<8)
483 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
484 .c2b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff))
486 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
487 shl eax, 8 ; ax <- (crc<<8)
488 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
490 .c2b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff))
492 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
493 shl eax, 8 ; ax <- (crc<<8)
494 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
495 .c2b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff)
497 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
498 shl eax, 8 ; ax <- (crc<<8)
499 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
501 mov [ebp + 24], eax ; br->read_crc <- crc
505 add esi, byte 1 ; cwords++;
507 sub ecx, eax ; cbits = parameter - n;
508 jz .break2 ; if(cbits) { /* parameter > n, i.e. if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
510 ; uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
512 mov eax, [eax + 4*esi]
515 jmp .break2 ; goto break2;
517 ;; this section relocated out of the way for performance
519 mov [ebp + 28], dword 0 ; br->crc16_align <- 0
527 .c2_next3: ; } else {
528 mov ecx, ebx ; cbits = parameter;
530 ; uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
532 mov eax, [eax + 4*esi]
534 jmp .break2 ; goto break2;
536 .c2_next2: ; } else {
537 ; in this case we're starting our read at a partial tail word;
538 ; the reader has guaranteed that we have at least 'parameter'
539 ; bits available to read, which makes this case simpler.
540 ; uval <<= parameter;
542 ; /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
543 ; uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-parameter);
544 ; cbits += parameter;
548 ; uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits);
551 ; the above is much shorter in assembly:
553 mov eax, [eax + 4*esi] ; eax <- br->buffer[cwords]
554 shl eax, cl ; eax <- br->buffer[cwords] << cbits
555 add ecx, ebx ; cbits += parameter
556 xchg ebx, ecx ; ebx <- cbits, ecx <- parameter
557 shld edi, eax, cl ; uval <<= parameter <<< 'parameter' bits of tail word
558 xchg ebx, ecx ; ebx <- parameter, ecx <- cbits
562 sub [esp], ebx ; ucbits -= parameter;
567 mov ebx, [esp + 28] ; ebx <- vals
568 mov edx, edi ; edx <- uval
569 and edi, 1 ; edi <- uval & 1
570 shr edx, 1 ; edx <- uval >> 1
571 neg edi ; edi <- -(int)(uval & 1)
572 xor edx, edi ; edx <- (uval >> 1 ^ -(int)(uval & 1))
573 mov [ebx], edx ; *vals <- edx
574 sub dword [esp + 32], byte 1 ; --nvals;
575 jz .finished ; if(nvals == 0) /* jump to finish */
576 xor edi, edi ; uval = 0;
577 add dword [esp + 28], 4 ; ++vals
581 mov [ebp + 16], esi ; br->consumed_words = cwords;
582 mov [ebp + 20], ecx ; br->consumed_bits = cbits;