summaryrefslogtreecommitdiffstats
path: root/dirtree.c
diff options
context:
space:
mode:
Diffstat (limited to 'dirtree.c')
-rw-r--r--dirtree.c220
1 files changed, 207 insertions, 13 deletions
diff --git a/dirtree.c b/dirtree.c
index b8c28f0..5582b43 100644
--- a/dirtree.c
+++ b/dirtree.c
@@ -1,7 +1,7 @@
#ident "$Id$"
/* ----------------------------------------------------------------------- *
*
- * Copyright 2001 H. Peter Anvin - All Rights Reserved
+ * Copyright 2001-2003 H. Peter Anvin - All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,27 +24,134 @@
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
+#include <iconv.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <string.h>
#include "dirtree.h"
#include "xmalloc.h"
+#define VFAT_MAX_LEN 64
+#define VFAT_MAX_SLOTS ((VFAT_MAX_LEN+12)/13)
+
extern const char *program;
+struct uni2cp {
+ uint16_t uni;
+ uint8_t cp;
+ uint8_t upper;
+};
+extern struct uni2cp cp437;
-static int
-dos_mangle_name(const char *name, char *buf, uint32_t numtail)
+static iconv_t iconv_utf16;
+
+static void
+fat_mangle_setup(void)
+{
+ iconv_utf16 = iconv_open("UTF-16LE", "");
+ if ( iconv_utf16 == (iconv_t)-1 ) {
+ fprintf("%s: failed to iconv_open UTF-16LE: %s\n",
+ program, strerror(errno));
+ exit(1);
+ }
+}
+
+#define VFAT_LOSSY_CONVERSION 0x01 /* Unrepresentable character */
+#define VFAT_UPPER_CASE 0x02 /* A..Z */
+#define VFAT_BAD_CHAR 0x04 /* Forbidden character */
+#define VFAT_EXTENDED_CHAR 0x08 /* char >= 0x80 */
+
+static unsigned int
+fat_uni_to_cp(char *cp, uint16_t *uni, int uni_len)
{
- const char *dosforbidden =
+ /* NUL is implied; space is technically legal but rarely used in practice */
+ static const char dosforbidden[] =
"\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"
"\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
- " \"\'\\/[]|<>,*$+";
+ " \"\'\\/[]|<>,*$+\177\377";
+ static const char cp437toupper[256] =
+ "\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"
+ "\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
+ " !\"#$%&\'()*+,-./0123456789:;<=>?"
+ "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"
+ "`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\177"
+ "\200\232\220A\216A\217\200EEEIII\216\217"
+ "\220\222\222O\231OUUY\231\232\233\234\235\236\237"
+ "\240\241\242\243\245\245\246\247\250\251\252\253\254\255\256\257"
+ "\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277"
+ "\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317"
+ "\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337"
+ "\340\341\342\343\344\344\346\347\350\351\352\353\354\355\356\357"
+ "\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377";
+ uint16_t uc, uref;
+ int ui, ul, uh;
+ struct uni2cp *ux;
+ uint8_t cpc;
+ int name_flags = 0;
+ int ext_flags = 0;
+ int found_dot = 0;
+
+ while ( uni_len-- ) {
+ uc = read16((le16_t *)(uni++));
+
+ ui = 128; ul = 0; uh = 255;
+ while ( uc != (uref = cp437[ui].uni) && ul < uh ) {
+ if ( uc < uref ) {
+ ul = ui+1;
+ } else if ( uc > uref ) {
+ uh = ui-1;
+ }
+ ui = (ul+uh)/2;
+ }
+ if ( uc != uref ) {
+ cpc = '_';
+ ext_flags |= VFAT_LOSSY_CONVERSION;
+ } else {
+ cpc = cp437[uni].cp;
+ if ( strchr(dosforbidden, cpc) ) {
+ cpc = '_';
+ ext_flags |= VFAT_BAD_CHAR;
+ } else if ( cpc >= 'A' && cpc <= 'Z' ) {
+ ext_flags |= VFAT_UPPER_CASE;
+ } else if ( cpc >= 0x80 ) {
+ ext_flags |= VFAT_EXTENDED_CHAR;
+ }
+ }
+ if ( cpc == '.' ) {
+ name_flags |= ext_flags;
+ ext_flags = 0;
+ found_dot = 1;
+ }
+
+ *cp++ = cp437toupper[cpc];
+ }
+ *cp = '\0';
+
+ return found_dot ? (name_flags | (ext_flags << 4)) : ext_flags;
+}
+
+static int
+fat_mangle_name(const char *name, char *buf)
+{
const char *dot;
char *cp, *endp;
char xc;
const char numtail_str[9];
+ char *cpname;
+ char *inb, *outb;
+ int inbl, outbl;
+
+ if ( !iconv_cp437 )
+ fat_mangle_setup();
- dot = strrchr(name, '.'); /* Find last dot */
+ cpname = xmalloc((inbl = outbl = strlen(name))+1);
+
+ inb = name;
+ outb = cpname;
+ iconv(iconv_cp437, NULL, NULL, NULL, NULL); /* Reset input state */
+ iconv(iconv_cp437, &inb, &inbl, &outb, &outbl);
+ *outb = '\0';
+
+ dot = strrchr(cpname, '.'); /* Find last dot */
if ( dot == name )
dot = NULL; /* Leading dot? */
@@ -52,10 +159,10 @@ dos_mangle_name(const char *name, char *buf, uint32_t numtail)
memset(buf, ' ', 11);
*endp = '\0';
- for ( ; *name ; name++ ) {
- xc = *name;
+ for ( ; *cpname ; cpname++ ) {
+ xc = *cpname;
if ( xc == '.' ) {
- if ( name == dot ) {
+ if ( cpname == dot ) {
/* Last dot */
cp = buf+8;
cp[0] = ' ';
@@ -82,15 +189,101 @@ dos_mangle_name(const char *name, char *buf, uint32_t numtail)
if ( cp < endp )
*cp++ = xc;
}
+ return 0;
+}
+
+/* Add a numeric tail to an already mangled iname */
+static void
+fat_set_numtail(char *oname, char *iname, int numtail)
+{
+ int nc, char *cp;
+ char numtail_str[9];
+
+ memcpy(oname, iname, 11);
if ( numtail ) {
- int nc = sprintf(numtail_str, "~%X", numtail);
- cp = buf+8-nc;
+ nc = snprintf(numtail_str, 9, "~%u", numtail);
+ cp = oname+8-nc;
while ( cp > buf && cp[-1] == ' ' ) cp--;
memcpy(cp, numtail_str, nc);
}
+}
- return 0;
+static void
+dos_mangle_name(struct direntry *dirent,
+ struct direntry *prev,
+ int prev_cnt)
+{
+ (void)prev; (void)prev_cnt;
+
+ dirent->nfatents = 1; /* Only 1 entry/file required */
+ dirent->fatents = xmalloc(sizeof(struct fat_dirent));
+ fat_mangle_name(dirent->name, dirent->fatents->name, 0);
+}
+
+static void
+int vfat_mangle_name(struct direntry *dirent,
+ struct direntry *prev,
+ int prev_cnt)
+{
+ uint16_t buffer[VFAT_MAX_LEN];
+ uint16_t *xb;
+ char *inb, *outb;
+ int inbl, outbl, xl, nx;
+ int seq = 1;
+ int i;
+ uint16_t cv;
+ static const char *vfat_bad_long =
+ "\1\2\3\4\5\6\7\10\11\12\13\14\15\16\17"
+ "\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37"
+ "\"*/:<>?\\|\177";
+
+ inb = str;
+ inbl = strlen(inb);
+ outb = (char *)buffer;
+ outbl = VFAT_MAX_LEN << 1;
+
+ iconv(iconv_utf16, NULL, NULL, NULL, NULL); /* Reset input state */
+ iconv(iconv_utf16, &inb, &inbl, &outb, &outbl);
+
+ xl = VFAT_MAX_LEN - (outbl >> 1);
+ xb = buffer;
+
+ /* The VFAT spec says trailing periods and leading and trailing spaces
+ are ignored. However, it doesn't specify if those should actually
+ be stripped. Ignore that for now and let the filesystem worry about it. */
+
+ for ( i = 0 ; i < xl ; i++ ) {
+ cv = read16((le16_t *)&buffer[i]);
+ if ( cv <= 127 && strchr(vfat_bad_long, cv) )
+ write16((le16_t *)&buffer[i], '_'); /* Bad character, replace with _ */
+ }
+
+ while ( xl ) {
+ memset(slots, 0, sizeof(*slots));
+ slots->id = seq++;
+ slots->attribute = 0x0f; /* Part of a long name */
+ slots->alias_csum = csum;
+
+ nx = min(xl,5);
+ memcpy(slots->name0, xb, nx<<1);
+ xl -= nx;
+
+ nx = min(xl,6);
+ memcpy(slots->name5, xb, nx<<1);
+ xl -= nx;
+
+ nx = min(xl,2);
+ memcpy(slots->name11, xb, nx<<1);
+ xl -= nx;
+
+ if ( !xl )
+ slots->id |= 0x40; /* Last slot */
+
+ slots++;
+ }
+
+ return seq-1;
}
@@ -163,7 +356,8 @@ make_dir_tree(const char *root, int *dirsizeptr, struct direntry *parent)
thisent->next = NULL;
/* Generate mangled DOS filename (eventually) */
- dos_mangle_name(thisent->name, thisent->mangled_name, 0);
+ /* FIX: Handle VFAT */
+ dos_mangle_name(thisent);
/* Add to linked list */
*link = thisent;