From e74dc5c074f683d6b8ab71f9434533a6c74bdf43 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 9 Jun 2008 12:21:05 -0700 Subject: [PATCH] FAT: make all codepage data derived from the same place Make the codepage data all derive from the Unicode tables, and create files that could be dynamically loaded in the future. --- codepage/Makefile | 8 +++---- codepage/cptable.pl | 52 ++++++++++++++++++++++++++++++---------- core/Makefile | 6 ++--- core/ldlinux.asm | 68 +++++++++++++++++++++++------------------------------ 4 files changed, 77 insertions(+), 57 deletions(-) diff --git a/codepage/Makefile b/codepage/Makefile index d426eaa..5495a6a 100644 --- a/codepage/Makefile +++ b/codepage/Makefile @@ -1,16 +1,16 @@ PERL = perl CPSRC = $(wildcard *.txt) -GENFILES = $(patsubst %.txt,%.bin,$(CPSRC)) +GENFILES = $(patsubst %.txt,%.cp,$(CPSRC)) -.SUFFIXES: .txt .bin +.SUFFIXES: .txt .cp all: $(GENFILES) -%.bin: %.txt cptable.pl UnicodeData +%.cp: %.txt cptable.pl UnicodeData $(PERL) cptable.pl UnicodeData $< $@ tidy: - rm -f $(GENFILES) + rm -f *.cp *.bin clean: tidy diff --git a/codepage/cptable.pl b/codepage/cptable.pl index c183d08..44c710c 100755 --- a/codepage/cptable.pl +++ b/codepage/cptable.pl @@ -10,25 +10,23 @@ ($ucd, $cpin, $cpout) = @ARGV; -%altcase = (); +%ucase = (); +%lcase = (); +%tcase = (); open(UCD, '<', $ucd) or die; while (defined($line = )) { chomp $line; @f = split(/;/, $line); - if ($f[12] ne '') { - $altcase{hex $f[0]} = hex $f[12]; # Upper case equivalent - } elsif ($f[13] ne '') { - $altcase{hex $f[0]} = hex $f[13]; # Lower case equivalent - } elsif ($f[14] ne '') { - $altcase{hex $f[0]} = hex $f[14]; # Title case, would be unusual - } else { - $altcase{hex $f[0]} = hex $f[0]; - } + $n = hex $f[0]; + $ucase{$n} = hex $f[12] if ($f[12] ne ''); + $lcase{$n} = hex $f[13] if ($f[13] ne ''); + $tcase{$n} = hex $f[14] if ($f[14] ne ''); } close(UCD); @xtab = (undef) x 256; +%tabx = (); open(CPIN, '<', $cpin) or die; while (defined($line = )) { @@ -36,17 +34,47 @@ while (defined($line = )) { @f = split(/\s+/, $line); next if (scalar @f != 2); next if (hex $f[0] > 255); - $xtab[hex $f[0]] = hex $f[1]; + $xtab[hex $f[0]] = hex $f[1]; # Codepage -> Unicode + $tabx{hex $f[1]} = hex $f[0]; # Unicode -> Codepage } close(CPIN); open(CPOUT, '>', $cpout) or die; +# +# Magic number, in anticipation of being able to load these +# files dynamically... +# +print CPOUT pack("VV", 0x8fad232b, 0x9c295319); + +# Header fields available for future use... +print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0); + +# +# Self (shortname) uppercase table +# +for ($i = 0; $i < 256; $i++) { + $u = $tabx{$ucase{$xtab[$i]}}; + $u = $i unless (defined($u)); + print CPOUT pack("C", $u); +} + +# +# Unicode (longname) matching table +# for ($i = 0; $i < 256; $i++) { if (!defined($xtab[$i])) { $p0 = $p1 = 0xffff; } else { $p0 = $xtab[$i]; - $p1 = defined($altcase{$p0}) ? $altcase{$p0} : $p0; + if (defined($ucase{$p0})) { + $p1 = $ucase{$p0}; + } elsif (defined($lcase{$p0})) { + $p1 = $lcase{$p0}; + } elsif (defined($tcase{$p0})) { + $p1 = $tcase{$p0}; + } else { + $p1 = $p0; + } } # Only the BMP is supported... $p0 = 0xffff if ($p0 > 0xffff); diff --git a/core/Makefile b/core/Makefile index 7289294..3e58696 100644 --- a/core/Makefile +++ b/core/Makefile @@ -146,9 +146,9 @@ extlinux_sys_bin.c: extlinux.sys ../bin2c.pl $(PERL) ../bin2c.pl extlinux_image 512 < $< > $@ # NASM prior to 2.03 wouldn't auto-generate this dependency... -ldlinux.o: codepage.bin +ldlinux.o: codepage.cp -codepage.bin: ../codepage/$(CODEPAGE).bin +codepage.cp: ../codepage/$(CODEPAGE).cp cp -f $< $@ install: installer @@ -160,7 +160,7 @@ install-all: install install-lib netinstall: installer tidy dist: - rm -f codepage.bin *.o *.elf stupid.* patch.offset + rm -f codepage.cp *.o *.elf stupid.* patch.offset rm -f *.lsr *.lst *.map *.sec rm -f $(OBSOLETE) diff --git a/core/ldlinux.asm b/core/ldlinux.asm index 8243188..cb045c8 100644 --- a/core/ldlinux.asm +++ b/core/ldlinux.asm @@ -93,6 +93,16 @@ file_left resd 1 ; Number of sectors left resd 1 ; Unused endstruc +; +; Structure for codepage files +; + struc cp +.magic resd 2 ; 8-byte magic number +.reserved resd 6 ; Reserved for future use +.uppercase resb 256 ; Internal upper-case table +.unicode resw 2*256 ; Unicode matching table + endstruc + %ifndef DEPEND %if (open_file_t_size & (open_file_t_size-1)) %error "open_file_t is not a power of 2" @@ -1026,9 +1036,9 @@ search_dos_dir: jae .vfat_tail movzx bx,byte [bx+di] shl bx,2 - cmp ax,[ucs_codepage+bx] ; Primary case + cmp ax,[cp_unicode+bx] ; Primary case je .ucs_ok - cmp ax,[ucs_codepage+bx+2] ; Alternate case + cmp ax,[cp_unicode+bx+2] ; Alternate case je .ucs_ok ; Mismatch... jmp .not_us_pop @@ -1150,8 +1160,14 @@ search_dos_dir: section .data alignb 4 -ucs_codepage: - incbin "codepage.bin" + ; Note: we have no use of the first 32 bytes (header), + ; nor of the folloing 32 bytes (case mapping of control + ; characters), as long as we adjust the offsets appropriately. +codepage equ $-(32+32) +codepage_data: incbin "codepage.cp",32+32 +cp_uppercase equ codepage+cp.uppercase +cp_unicode equ codepage+cp.unicode +codepage_end equ $ section .bss VFATInit resb 1 @@ -1367,6 +1383,7 @@ mangle_dos_name: mov [NameStart],si mov cx,11 ; # of bytes to write + mov bx,cp_uppercase ; Case-conversion table .loop: lodsb cmp al,' ' ; If control or space, end @@ -1375,24 +1392,8 @@ mangle_dos_name: je .end cmp al,'.' ; Period -> space-fill je .is_period - cmp al,'a' - jb .not_lower - cmp al,'z' - ja .not_uslower - sub al,020h - jmp short .not_lower -.is_period: mov al,' ' ; We need to space-fill -.period_loop: cmp cx,3 ; If <= 3 characters left - jbe .loop ; Just ignore it - stosb ; Otherwise, write a period - loop .period_loop ; Dec CX and (always) jump -.not_uslower: cmp al,ucase_low - jb .not_lower - cmp al,ucase_high - ja .not_lower - mov bx,ucase_tab-ucase_low - xlatb -.not_lower: stosb + xlatb ; Convert to upper case + stosb loop .loop ; Don't continue if too long ; Find the end for the benefit of longname search .find_end: @@ -1410,6 +1411,13 @@ mangle_dos_name: popa ret ; Done +.is_period: + mov al,' ' ; We need to space-fill +.period_loop: cmp cx,3 ; If <= 3 characters left + jbe .loop ; Just ignore it + stosb ; Otherwise, write a space + loop .period_loop ; Dec CX and *always* jump + section .bss alignb 2 NameStart resw 1 @@ -1418,22 +1426,6 @@ MangledBuf resb 11 section .text ; -; Case tables for extended characters; this is technically code page 865, -; but code page 437 users will probably not miss not being able to use the -; cent sign in kernel images too much :-) -; -; The table only covers the range 129 to 164; the rest we can deal with. -; - section .data - -ucase_low equ 129 -ucase_high equ 164 -ucase_tab db 154, 144, 'A', 142, 'A', 143, 128, 'EEEIII' - db 142, 143, 144, 146, 146, 'O', 153, 'OUUY', 153, 154 - db 157, 156, 157, 158, 159, 'AIOU', 165 - - section .text -; ; getfssec_edx: Get multiple sectors from a file ; ; This routine makes sure the subtransfers do not cross a 64K boundary, -- 2.7.4