aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2008-06-09 12:21:05 -0700
committerH. Peter Anvin <hpa@zytor.com>2008-06-09 16:14:44 -0700
commite74dc5c074f683d6b8ab71f9434533a6c74bdf43 (patch)
treed8794c12d0a25df935f49c8da81bc9c26ca40f86
parent78de189bb80b89f63d9bcac8c26c69ab4f913c89 (diff)
downloadsyslinux.git-e74dc5c074f683d6b8ab71f9434533a6c74bdf43.tar.gz
syslinux.git-e74dc5c074f683d6b8ab71f9434533a6c74bdf43.tar.xz
syslinux.git-e74dc5c074f683d6b8ab71f9434533a6c74bdf43.zip
FAT: make all codepage data derived from the same place
Make the codepage data all derive from the Unicode tables, and create files that could be dynamically loaded in the future.
-rw-r--r--codepage/Makefile8
-rwxr-xr-xcodepage/cptable.pl52
-rw-r--r--core/Makefile6
-rw-r--r--core/ldlinux.asm68
4 files changed, 77 insertions, 57 deletions
diff --git a/codepage/Makefile b/codepage/Makefile
index d426eaad..5495a6a7 100644
--- a/codepage/Makefile
+++ b/codepage/Makefile
@@ -1,16 +1,16 @@
PERL = perl
CPSRC = $(wildcard *.txt)
-GENFILES = $(patsubst %.txt,%.bin,$(CPSRC))
+GENFILES = $(patsubst %.txt,%.cp,$(CPSRC))
-.SUFFIXES: .txt .bin
+.SUFFIXES: .txt .cp
all: $(GENFILES)
-%.bin: %.txt cptable.pl UnicodeData
+%.cp: %.txt cptable.pl UnicodeData
$(PERL) cptable.pl UnicodeData $< $@
tidy:
- rm -f $(GENFILES)
+ rm -f *.cp *.bin
clean: tidy
diff --git a/codepage/cptable.pl b/codepage/cptable.pl
index c183d08c..44c710ce 100755
--- a/codepage/cptable.pl
+++ b/codepage/cptable.pl
@@ -10,25 +10,23 @@
($ucd, $cpin, $cpout) = @ARGV;
-%altcase = ();
+%ucase = ();
+%lcase = ();
+%tcase = ();
open(UCD, '<', $ucd) or die;
while (defined($line = <UCD>)) {
chomp $line;
@f = split(/;/, $line);
- if ($f[12] ne '') {
- $altcase{hex $f[0]} = hex $f[12]; # Upper case equivalent
- } elsif ($f[13] ne '') {
- $altcase{hex $f[0]} = hex $f[13]; # Lower case equivalent
- } elsif ($f[14] ne '') {
- $altcase{hex $f[0]} = hex $f[14]; # Title case, would be unusual
- } else {
- $altcase{hex $f[0]} = hex $f[0];
- }
+ $n = hex $f[0];
+ $ucase{$n} = hex $f[12] if ($f[12] ne '');
+ $lcase{$n} = hex $f[13] if ($f[13] ne '');
+ $tcase{$n} = hex $f[14] if ($f[14] ne '');
}
close(UCD);
@xtab = (undef) x 256;
+%tabx = ();
open(CPIN, '<', $cpin) or die;
while (defined($line = <CPIN>)) {
@@ -36,17 +34,47 @@ while (defined($line = <CPIN>)) {
@f = split(/\s+/, $line);
next if (scalar @f != 2);
next if (hex $f[0] > 255);
- $xtab[hex $f[0]] = hex $f[1];
+ $xtab[hex $f[0]] = hex $f[1]; # Codepage -> Unicode
+ $tabx{hex $f[1]} = hex $f[0]; # Unicode -> Codepage
}
close(CPIN);
open(CPOUT, '>', $cpout) or die;
+#
+# Magic number, in anticipation of being able to load these
+# files dynamically...
+#
+print CPOUT pack("VV", 0x8fad232b, 0x9c295319);
+
+# Header fields available for future use...
+print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0);
+
+#
+# Self (shortname) uppercase table
+#
+for ($i = 0; $i < 256; $i++) {
+ $u = $tabx{$ucase{$xtab[$i]}};
+ $u = $i unless (defined($u));
+ print CPOUT pack("C", $u);
+}
+
+#
+# Unicode (longname) matching table
+#
for ($i = 0; $i < 256; $i++) {
if (!defined($xtab[$i])) {
$p0 = $p1 = 0xffff;
} else {
$p0 = $xtab[$i];
- $p1 = defined($altcase{$p0}) ? $altcase{$p0} : $p0;
+ if (defined($ucase{$p0})) {
+ $p1 = $ucase{$p0};
+ } elsif (defined($lcase{$p0})) {
+ $p1 = $lcase{$p0};
+ } elsif (defined($tcase{$p0})) {
+ $p1 = $tcase{$p0};
+ } else {
+ $p1 = $p0;
+ }
}
# Only the BMP is supported...
$p0 = 0xffff if ($p0 > 0xffff);
diff --git a/core/Makefile b/core/Makefile
index 7289294a..3e58696b 100644
--- a/core/Makefile
+++ b/core/Makefile
@@ -146,9 +146,9 @@ extlinux_sys_bin.c: extlinux.sys ../bin2c.pl
$(PERL) ../bin2c.pl extlinux_image 512 < $< > $@
# NASM prior to 2.03 wouldn't auto-generate this dependency...
-ldlinux.o: codepage.bin
+ldlinux.o: codepage.cp
-codepage.bin: ../codepage/$(CODEPAGE).bin
+codepage.cp: ../codepage/$(CODEPAGE).cp
cp -f $< $@
install: installer
@@ -160,7 +160,7 @@ install-all: install install-lib
netinstall: installer
tidy dist:
- rm -f codepage.bin *.o *.elf stupid.* patch.offset
+ rm -f codepage.cp *.o *.elf stupid.* patch.offset
rm -f *.lsr *.lst *.map *.sec
rm -f $(OBSOLETE)
diff --git a/core/ldlinux.asm b/core/ldlinux.asm
index 8243188a..cb045c83 100644
--- a/core/ldlinux.asm
+++ b/core/ldlinux.asm
@@ -93,6 +93,16 @@ file_left resd 1 ; Number of sectors left
resd 1 ; Unused
endstruc
+;
+; Structure for codepage files
+;
+ struc cp
+.magic resd 2 ; 8-byte magic number
+.reserved resd 6 ; Reserved for future use
+.uppercase resb 256 ; Internal upper-case table
+.unicode resw 2*256 ; Unicode matching table
+ endstruc
+
%ifndef DEPEND
%if (open_file_t_size & (open_file_t_size-1))
%error "open_file_t is not a power of 2"
@@ -1026,9 +1036,9 @@ search_dos_dir:
jae .vfat_tail
movzx bx,byte [bx+di]
shl bx,2
- cmp ax,[ucs_codepage+bx] ; Primary case
+ cmp ax,[cp_unicode+bx] ; Primary case
je .ucs_ok
- cmp ax,[ucs_codepage+bx+2] ; Alternate case
+ cmp ax,[cp_unicode+bx+2] ; Alternate case
je .ucs_ok
; Mismatch...
jmp .not_us_pop
@@ -1150,8 +1160,14 @@ search_dos_dir:
section .data
alignb 4
-ucs_codepage:
- incbin "codepage.bin"
+ ; Note: we have no use of the first 32 bytes (header),
+ ; nor of the folloing 32 bytes (case mapping of control
+ ; characters), as long as we adjust the offsets appropriately.
+codepage equ $-(32+32)
+codepage_data: incbin "codepage.cp",32+32
+cp_uppercase equ codepage+cp.uppercase
+cp_unicode equ codepage+cp.unicode
+codepage_end equ $
section .bss
VFATInit resb 1
@@ -1367,6 +1383,7 @@ mangle_dos_name:
mov [NameStart],si
mov cx,11 ; # of bytes to write
+ mov bx,cp_uppercase ; Case-conversion table
.loop:
lodsb
cmp al,' ' ; If control or space, end
@@ -1375,24 +1392,8 @@ mangle_dos_name:
je .end
cmp al,'.' ; Period -> space-fill
je .is_period
- cmp al,'a'
- jb .not_lower
- cmp al,'z'
- ja .not_uslower
- sub al,020h
- jmp short .not_lower
-.is_period: mov al,' ' ; We need to space-fill
-.period_loop: cmp cx,3 ; If <= 3 characters left
- jbe .loop ; Just ignore it
- stosb ; Otherwise, write a period
- loop .period_loop ; Dec CX and (always) jump
-.not_uslower: cmp al,ucase_low
- jb .not_lower
- cmp al,ucase_high
- ja .not_lower
- mov bx,ucase_tab-ucase_low
- xlatb
-.not_lower: stosb
+ xlatb ; Convert to upper case
+ stosb
loop .loop ; Don't continue if too long
; Find the end for the benefit of longname search
.find_end:
@@ -1410,6 +1411,13 @@ mangle_dos_name:
popa
ret ; Done
+.is_period:
+ mov al,' ' ; We need to space-fill
+.period_loop: cmp cx,3 ; If <= 3 characters left
+ jbe .loop ; Just ignore it
+ stosb ; Otherwise, write a space
+ loop .period_loop ; Dec CX and *always* jump
+
section .bss
alignb 2
NameStart resw 1
@@ -1418,22 +1426,6 @@ MangledBuf resb 11
section .text
;
-; Case tables for extended characters; this is technically code page 865,
-; but code page 437 users will probably not miss not being able to use the
-; cent sign in kernel images too much :-)
-;
-; The table only covers the range 129 to 164; the rest we can deal with.
-;
- section .data
-
-ucase_low equ 129
-ucase_high equ 164
-ucase_tab db 154, 144, 'A', 142, 'A', 143, 128, 'EEEIII'
- db 142, 143, 144, 146, 146, 'O', 153, 'OUUY', 153, 154
- db 157, 156, 157, 158, 159, 'AIOU', 165
-
- section .text
-;
; getfssec_edx: Get multiple sectors from a file
;
; This routine makes sure the subtransfers do not cross a 64K boundary,