diff options
author | H. Peter Anvin (Intel) <hpa@zytor.com> | 2019-08-09 16:11:28 -0700 |
---|---|---|
committer | H. Peter Anvin (Intel) <hpa@zytor.com> | 2019-08-09 16:11:28 -0700 |
commit | 98031bfff4d50aa6c2578513a10c63c75c9c6a6c (patch) | |
tree | 9e63f332e68182eaf84bb0b945c724ee1301ed4c | |
parent | 5067fde4831683f36956c2029b590e5a37adf7c2 (diff) | |
download | nasm-98031bfff4d50aa6c2578513a10c63c75c9c6a6c.tar.gz nasm-98031bfff4d50aa6c2578513a10c63c75c9c6a6c.tar.xz nasm-98031bfff4d50aa6c2578513a10c63c75c9c6a6c.zip |
preproc.c: make sure we have the correct token lengths
It turns out that in tokenize() we would sometimes truncate a token
string by inserting a NUL into the input string, expecting new_Token()
to pick it up using strlen(). With explicit lengths, that no longer
works, but there is a better solution anyway: instead of inserting
NUL characters, keep track of where the token actually ends and feed
the correct length to new_Token().
This triggered a buffer overflow in detoken(), add a debug level 2
assert for this condition. Use a relatively high debug level, because
strlen() is fairly expensive, and this is an extremely
performance-critical path.
Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
-rw-r--r-- | asm/preproc.c | 114 |
1 files changed, 66 insertions, 48 deletions
diff --git a/asm/preproc.c b/asm/preproc.c index b16ed0c9..10cc2197 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -937,13 +937,15 @@ static char *read_line(void) */ static Token *tokenize(char *line) { - char c, *p = line; + char c; enum pp_token_type type; Token *list = NULL; Token *t, **tail = &list; while (*line) { - p = line; + char *p = line; + char *ep = NULL; /* End of token, for trimming the end */ + if (*p == '%') { p++; if (*p == '+' && !nasm_isdigit(p[1])) { @@ -966,7 +968,7 @@ static Token *tokenize(char *line) } if (*p != '}') nasm_warn(WARN_OTHER, "unterminated %%{ construct"); - p[-1] = '\0'; + ep = &p[-1]; if (*p) p++; type = TOK_PREPROC_ID; @@ -995,8 +997,9 @@ static Token *tokenize(char *line) } } p--; + ep = p; if (*p) - *p++ = '\0'; + p++; if (lvl) nasm_nonfatalf(ERR_PASS1, "unterminated %%[ construct"); type = TOK_INDIRECT; @@ -1183,7 +1186,9 @@ static Token *tokenize(char *line) } else */ if (type != TOK_COMMENT) { - *tail = t = new_Token(NULL, type, line, p - line); + if (!ep) + ep = p; + *tail = t = new_Token(NULL, type, line, ep - line); tail = &t->next; } line = p; @@ -1295,32 +1300,32 @@ static char *detoken(Token * tlist, bool expand_locals) list_for_each(t, tlist) { if (t->type == TOK_PREPROC_ID && t->text && - t->text[0] && t->text[1] == '!') { + t->text[0] == '%' && t->text[1] == '!') { char *v; char *q = t->text; v = t->text + 2; - if (nasm_isquote(*v)) { - size_t len = nasm_unquote(v, NULL); - size_t clen = strlen(v); - - if (len != clen) { - nasm_nonfatalf(ERR_PASS1, "NUL character in %%! string"); - v = NULL; - } - } + if (nasm_isquote(*v)) + nasm_unquote_cstr(v, NULL); if (v) { char *p = getenv(v); if (!p) { - nasm_nonfatalf(ERR_PASS1, "nonexistent environment variable `%s'", v); - /* - * FIXME We better should investigate if accessing - * ->text[1] without ->text[0] is safe enough. + /*! + *!environment [on] nonexistent environment variable + *! warns if a nonexistent environment variable + *! is accessed using the \c{%!} preprocessor + *! construct (see \k{getenv}.) Such environment + *! variables are treated as empty (with this + *! warning issued) starting in NASM 2.15; + *! earlier versions of NASM would treat this as + *! an error. */ - t->text = nasm_zalloc(2); - } else - t->text = nasm_strdup(p); + nasm_warn(WARN_ENVIRONMENT, "nonexistent environment variable `%s'", v); + p = ""; + } + t->text = nasm_strdup(p); + t->len = nasm_last_string_len(); nasm_free(q); } } @@ -1339,20 +1344,30 @@ static char *detoken(Token * tlist, bool expand_locals) t->text = p; } } - if (t->type == TOK_WHITESPACE) + if (t->text) { + if (debug_level(2)) { + unsigned long t_len = t->len; + unsigned long s_len = strlen(t->text); + if (t_len != s_len) { + nasm_panic("assertion failed: token \"%s\" type %u len %lu has t->len %lu\n", + t->text, t->type, s_len, t_len); + t->len = s_len; + } + } + len += t->len; + } else if (t->type == TOK_WHITESPACE) { len++; - else if (t->text) - len += strlen(t->text); + } } p = line = nasm_malloc(len + 1); list_for_each(t, tlist) { - if (t->type == TOK_WHITESPACE) { - *p++ = ' '; - } else if (t->text) { + if (t->text) { memcpy(p, t->text, t->len); p += t->len; + } else if (t->type == TOK_WHITESPACE) { + *p++ = ' '; } } *p = '\0'; @@ -5157,10 +5172,6 @@ static int expand_mmacro(Token * tline) */ static void pp_verror(errflags severity, const char *fmt, va_list arg) { - char buff[BUFSIZ]; - MMacro *mmac = NULL; - int delta = 0; - /* * If we're in a dead branch of IF or something like it, ignore the error. * However, because %else etc are evaluated in the state context @@ -5175,24 +5186,31 @@ static void pp_verror(errflags severity, const char *fmt, va_list arg) !emitting(istk->conds->state))) return; - /* get %macro name */ - if (!(severity & ERR_NOFILE) && istk && istk->mstk) { - mmac = istk->mstk; - /* but %rep blocks should be skipped */ - while (mmac && !mmac->name) - mmac = mmac->next_active, delta++; - } - - if (mmac) { - vsnprintf(buff, sizeof(buff), fmt, arg); + /* This doesn't make sense with the macro stack unwinding */ + if (0) { + MMacro *mmac = NULL; + int32_t delta = 0; + + /* get %macro name */ + if (!(severity & ERR_NOFILE) && istk && istk->mstk) { + mmac = istk->mstk; + /* but %rep blocks should be skipped */ + while (mmac && !mmac->name) + mmac = mmac->next_active, delta++; + } - nasm_set_verror(real_verror); - nasm_error(severity, "(%s:%d) %s", - mmac->name, mmac->lineno - delta, buff); - nasm_set_verror(pp_verror); - } else { - real_verror(severity, fmt, arg); + if (mmac) { + char *buf; + nasm_set_verror(real_verror); + buf = nasm_vasprintf(fmt, arg); + nasm_error(severity, "(%s:%"PRId32") %s", + mmac->name, mmac->lineno - delta, buf); + nasm_set_verror(pp_verror); + nasm_free(buf); + return; + } } + real_verror(severity, fmt, arg); } static Token * |