aboutsummaryrefslogtreecommitdiffstats
path: root/codepage/cptable.pl
blob: c183d08ce1a5a69b34c88a45a845fc38780d9459 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/perl
#
# Produce a codepage matching table.  For each 8-bit character, list
# a primary and an alternate match (the latter used for case-insensitive
# matching.)
#
# Usage:
#	cptable.pl UnicodeData cpXXX.txt cpXXX.bin
#

($ucd, $cpin, $cpout) = @ARGV;

%altcase = ();

open(UCD, '<', $ucd) or die;
while (defined($line = <UCD>)) {
    chomp $line;
    @f = split(/;/, $line);
    if ($f[12] ne '') {
	$altcase{hex $f[0]} = hex $f[12]; # Upper case equivalent
    } elsif ($f[13] ne '') {
	$altcase{hex $f[0]} = hex $f[13]; # Lower case equivalent
    } elsif ($f[14] ne '') {
	$altcase{hex $f[0]} = hex $f[14]; # Title case, would be unusual
    } else {
	$altcase{hex $f[0]} = hex $f[0];
    }
}
close(UCD);

@xtab = (undef) x 256;

open(CPIN, '<', $cpin) or die;
while (defined($line = <CPIN>)) {
    $line =~ s/\s*(\#.*|)$//;
    @f = split(/\s+/, $line);
    next if (scalar @f != 2);
    next if (hex $f[0] > 255);
    $xtab[hex $f[0]] = hex $f[1];
}
close(CPIN);

open(CPOUT, '>', $cpout) or die;
for ($i = 0; $i < 256; $i++) {
    if (!defined($xtab[$i])) {
	$p0 = $p1 = 0xffff;
    } else {
	$p0 = $xtab[$i];
	$p1 = defined($altcase{$p0}) ? $altcase{$p0} : $p0;
    }
    # Only the BMP is supported...
    $p0 = 0xffff if ($p0 > 0xffff);
    $p1 = 0xffff if ($p1 > 0xffff);
    print CPOUT pack("vv", $p0, $p1);
}
close (CPOUT);