($ucd, $cpin, $cpout) = @ARGV;
-%altcase = ();
+%ucase = ();
+%lcase = ();
+%tcase = ();
open(UCD, '<', $ucd) or die;
while (defined($line = <UCD>)) {
chomp $line;
@f = split(/;/, $line);
- if ($f[12] ne '') {
- $altcase{hex $f[0]} = hex $f[12]; # Upper case equivalent
- } elsif ($f[13] ne '') {
- $altcase{hex $f[0]} = hex $f[13]; # Lower case equivalent
- } elsif ($f[14] ne '') {
- $altcase{hex $f[0]} = hex $f[14]; # Title case, would be unusual
- } else {
- $altcase{hex $f[0]} = hex $f[0];
- }
+ $n = hex $f[0];
+ $ucase{$n} = hex $f[12] if ($f[12] ne '');
+ $lcase{$n} = hex $f[13] if ($f[13] ne '');
+ $tcase{$n} = hex $f[14] if ($f[14] ne '');
}
close(UCD);
@xtab = (undef) x 256;
+%tabx = ();
open(CPIN, '<', $cpin) or die;
while (defined($line = <CPIN>)) {
@f = split(/\s+/, $line);
next if (scalar @f != 2);
next if (hex $f[0] > 255);
- $xtab[hex $f[0]] = hex $f[1];
+ $xtab[hex $f[0]] = hex $f[1]; # Codepage -> Unicode
+ $tabx{hex $f[1]} = hex $f[0]; # Unicode -> Codepage
}
close(CPIN);
open(CPOUT, '>', $cpout) or die;
+#
+# Magic number, in anticipation of being able to load these
+# files dynamically...
+#
+print CPOUT pack("VV", 0x8fad232b, 0x9c295319);
+
+# Header fields available for future use...
+print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0);
+
+#
+# Self (shortname) uppercase table
+#
+for ($i = 0; $i < 256; $i++) {
+ $u = $tabx{$ucase{$xtab[$i]}};
+ $u = $i unless (defined($u));
+ print CPOUT pack("C", $u);
+}
+
+#
+# Unicode (longname) matching table
+#
for ($i = 0; $i < 256; $i++) {
if (!defined($xtab[$i])) {
$p0 = $p1 = 0xffff;
} else {
$p0 = $xtab[$i];
- $p1 = defined($altcase{$p0}) ? $altcase{$p0} : $p0;
+ if (defined($ucase{$p0})) {
+ $p1 = $ucase{$p0};
+ } elsif (defined($lcase{$p0})) {
+ $p1 = $lcase{$p0};
+ } elsif (defined($tcase{$p0})) {
+ $p1 = $tcase{$p0};
+ } else {
+ $p1 = $p0;
+ }
}
# Only the BMP is supported...
$p0 = 0xffff if ($p0 > 0xffff);
resd 1 ; Unused
endstruc
+;
+; Structure for codepage files
+;
+ struc cp
+.magic resd 2 ; 8-byte magic number
+.reserved resd 6 ; Reserved for future use
+.uppercase resb 256 ; Internal upper-case table
+.unicode resw 2*256 ; Unicode matching table
+ endstruc
+
%ifndef DEPEND
%if (open_file_t_size & (open_file_t_size-1))
%error "open_file_t is not a power of 2"
jae .vfat_tail
movzx bx,byte [bx+di]
shl bx,2
- cmp ax,[ucs_codepage+bx] ; Primary case
+ cmp ax,[cp_unicode+bx] ; Primary case
je .ucs_ok
- cmp ax,[ucs_codepage+bx+2] ; Alternate case
+ cmp ax,[cp_unicode+bx+2] ; Alternate case
je .ucs_ok
; Mismatch...
jmp .not_us_pop
section .data
alignb 4
-ucs_codepage:
- incbin "codepage.bin"
+ ; Note: we have no use of the first 32 bytes (header),
+ ; nor of the folloing 32 bytes (case mapping of control
+ ; characters), as long as we adjust the offsets appropriately.
+codepage equ $-(32+32)
+codepage_data: incbin "codepage.cp",32+32
+cp_uppercase equ codepage+cp.uppercase
+cp_unicode equ codepage+cp.unicode
+codepage_end equ $
section .bss
VFATInit resb 1
mov [NameStart],si
mov cx,11 ; # of bytes to write
+ mov bx,cp_uppercase ; Case-conversion table
.loop:
lodsb
cmp al,' ' ; If control or space, end
je .end
cmp al,'.' ; Period -> space-fill
je .is_period
- cmp al,'a'
- jb .not_lower
- cmp al,'z'
- ja .not_uslower
- sub al,020h
- jmp short .not_lower
-.is_period: mov al,' ' ; We need to space-fill
-.period_loop: cmp cx,3 ; If <= 3 characters left
- jbe .loop ; Just ignore it
- stosb ; Otherwise, write a period
- loop .period_loop ; Dec CX and (always) jump
-.not_uslower: cmp al,ucase_low
- jb .not_lower
- cmp al,ucase_high
- ja .not_lower
- mov bx,ucase_tab-ucase_low
- xlatb
-.not_lower: stosb
+ xlatb ; Convert to upper case
+ stosb
loop .loop ; Don't continue if too long
; Find the end for the benefit of longname search
.find_end:
popa
ret ; Done
+.is_period:
+ mov al,' ' ; We need to space-fill
+.period_loop: cmp cx,3 ; If <= 3 characters left
+ jbe .loop ; Just ignore it
+ stosb ; Otherwise, write a space
+ loop .period_loop ; Dec CX and *always* jump
+
section .bss
alignb 2
NameStart resw 1
section .text
;
-; Case tables for extended characters; this is technically code page 865,
-; but code page 437 users will probably not miss not being able to use the
-; cent sign in kernel images too much :-)
-;
-; The table only covers the range 129 to 164; the rest we can deal with.
-;
- section .data
-
-ucase_low equ 129
-ucase_high equ 164
-ucase_tab db 154, 144, 'A', 142, 'A', 143, 128, 'EEEIII'
- db 142, 143, 144, 146, 146, 'O', 153, 'OUUY', 153, 154
- db 157, 156, 157, 158, 159, 'AIOU', 165
-
- section .text
-;
; getfssec_edx: Get multiple sectors from a file
;
; This routine makes sure the subtransfers do not cross a 64K boundary,