diff options
author | H. Peter Anvin <hpa@zytor.com> | 2008-06-16 00:07:23 -0700 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2008-06-16 00:07:23 -0700 |
commit | 52ff420fba565c4803cd43260c2d36189ff3d71a (patch) | |
tree | c7ad33a0acb5bd9e1c2c5c2453fbb0ec28dd7ea4 /codepage | |
parent | 08844f9e811b9ad77b281d8608e90c140c76e44f (diff) | |
download | syslinux.git-52ff420fba565c4803cd43260c2d36189ff3d71a.tar.gz syslinux.git-52ff420fba565c4803cd43260c2d36189ff3d71a.tar.xz syslinux.git-52ff420fba565c4803cd43260c2d36189ff3d71a.zip |
codepage: include case variant characters in UnicodeData
Adjust the gensubset.pl script to that all case variants are
explicitly included in UnicodeData.
Diffstat (limited to 'codepage')
-rw-r--r-- | codepage/UnicodeData | 12 | ||||
-rwxr-xr-x | codepage/gensubset.pl | 19 |
2 files changed, 31 insertions, 0 deletions
diff --git a/codepage/UnicodeData b/codepage/UnicodeData index 3eb8eb7a..9b2209da 100644 --- a/codepage/UnicodeData +++ b/codepage/UnicodeData @@ -223,19 +223,31 @@ 00FE;LATIN SMALL LETTER THORN;Ll;0;L;;;;;N;;Icelandic;00DE;;00DE 00FF;LATIN SMALL LETTER Y WITH DIAERESIS;Ll;0;L;0079 0308;;;;N;LATIN SMALL LETTER Y DIAERESIS;;0178;;0178 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049 +0178;LATIN CAPITAL LETTER Y WITH DIAERESIS;Lu;0;L;0059 0308;;;;N;LATIN CAPITAL LETTER Y DIAERESIS;;;00FF; +0191;LATIN CAPITAL LETTER F WITH HOOK;Lu;0;L;;;;;N;LATIN CAPITAL LETTER F HOOK;;;0192; 0192;LATIN SMALL LETTER F WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER SCRIPT F;;0191;;0191 +0391;GREEK CAPITAL LETTER ALPHA;Lu;0;L;;;;;N;;;;03B1; 0393;GREEK CAPITAL LETTER GAMMA;Lu;0;L;;;;;N;;;;03B3; +0394;GREEK CAPITAL LETTER DELTA;Lu;0;L;;;;;N;;;;03B4; +0395;GREEK CAPITAL LETTER EPSILON;Lu;0;L;;;;;N;;;;03B5; 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8; +039C;GREEK CAPITAL LETTER MU;Lu;0;L;;;;;N;;;;03BC; +03A0;GREEK CAPITAL LETTER PI;Lu;0;L;;;;;N;;;;03C0; 03A3;GREEK CAPITAL LETTER SIGMA;Lu;0;L;;;;;N;;;;03C3; +03A4;GREEK CAPITAL LETTER TAU;Lu;0;L;;;;;N;;;;03C4; 03A6;GREEK CAPITAL LETTER PHI;Lu;0;L;;;;;N;;;;03C6; 03A9;GREEK CAPITAL LETTER OMEGA;Lu;0;L;;;;;N;;;;03C9; 03B1;GREEK SMALL LETTER ALPHA;Ll;0;L;;;;;N;;;0391;;0391 +03B3;GREEK SMALL LETTER GAMMA;Ll;0;L;;;;;N;;;0393;;0393 03B4;GREEK SMALL LETTER DELTA;Ll;0;L;;;;;N;;;0394;;0394 03B5;GREEK SMALL LETTER EPSILON;Ll;0;L;;;;;N;;;0395;;0395 +03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398 +03BC;GREEK SMALL LETTER MU;Ll;0;L;;;;;N;;;039C;;039C 03C0;GREEK SMALL LETTER PI;Ll;0;L;;;;;N;;;03A0;;03A0 03C3;GREEK SMALL LETTER SIGMA;Ll;0;L;;;;;N;;;03A3;;03A3 03C4;GREEK SMALL LETTER TAU;Ll;0;L;;;;;N;;;03A4;;03A4 03C6;GREEK SMALL LETTER PHI;Ll;0;L;;;;;N;;;03A6;;03A6 +03C9;GREEK SMALL LETTER OMEGA;Ll;0;L;;;;;N;;;03A9;;03A9 2017;DOUBLE LOW LINE;Po;0;ON;<compat> 0020 0333;;;;N;SPACING DOUBLE UNDERSCORE;;;; 207F;SUPERSCRIPT LATIN SMALL LETTER N;Ll;0;L;<super> 006E;;;;N;;;;; 20A7;PESETA SIGN;Sc;0;ET;;;;;N;;;;; diff --git a/codepage/gensubset.pl b/codepage/gensubset.pl index 5fde460f..4dd7f2c1 100755 --- a/codepage/gensubset.pl +++ b/codepage/gensubset.pl @@ -9,6 +9,7 @@ %need_these = (); +# Mark as needed all the characters mentioned in the relevant files foreach $file (@ARGV) { open(F, '<', $file) or die; while (defined($line = <F>)) { @@ -20,9 +21,27 @@ foreach $file (@ARGV) { close(F); } +# Also mark as needed any case variants of those +# (Note: this doesn't necessarily provide the full transitive closure, +# but we shouldn't need it.) +while (defined($line = <STDIN>)) { + @f = split(/;/, $line); + if ($f[0] =~ /^([0-9a-f]+)$/i) { + $r = hex $f[0]; + if ($need_these{$r}) { + $need_these{hex $f[12]}++ if ($f[12] ne ''); + $need_these{hex $f[13]}++ if ($f[13] ne ''); + $need_these{hex $f[14]}++ if ($f[14] ne ''); + } + } +} + +# Finally, write out the subset +seek(STDIN, 0, 0); while (defined($line = <STDIN>)) { ($v, $l) = split(/;/, $line, 2); if ($v =~ /^([0-9a-f]+)\-([0-9a-f]+)$/i) { + # This isn't actually the format... fix that if it ever matters $r1 = hex $1; $r2 = hex $2; } elsif ($v =~ /^([0-9a-f]+)$/i) { |