aboutsummaryrefslogtreecommitdiffstats
path: root/asm
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2016-05-25 12:06:29 -0700
committerH. Peter Anvin <hpa@zytor.com>2016-05-25 12:06:29 -0700
commite1f985c167495185c55c46e640f2607604597383 (patch)
tree3ddd11d12e6a3bf6e21c39d1342175d5488d8d39 /asm
parent22538e2b6713d6e4e05fb82c6969320a519b4c93 (diff)
downloadnasm-e1f985c167495185c55c46e640f2607604597383.tar.gz
nasm-e1f985c167495185c55c46e640f2607604597383.tar.xz
nasm-e1f985c167495185c55c46e640f2607604597383.zip
Reorganize the source code into subdirectories
Make the source code easier to understand and keep track of by organizing it into subdirectories depending on the function. Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'asm')
-rw-r--r--asm/assemble.c2976
-rw-r--r--asm/assemble.h47
-rw-r--r--asm/directiv.dat63
-rwxr-xr-xasm/directiv.pl183
-rw-r--r--asm/eval.c1015
-rw-r--r--asm/eval.h49
-rw-r--r--asm/exprlib.c186
-rw-r--r--asm/float.c951
-rw-r--r--asm/float.h54
-rw-r--r--asm/labels.c530
-rw-r--r--asm/listing.c338
-rw-r--r--asm/listing.h108
-rw-r--r--asm/nasm.c2142
-rw-r--r--asm/parser.c1168
-rw-r--r--asm/parser.h45
-rwxr-xr-xasm/phash.pl109
-rw-r--r--asm/pptok.dat95
-rwxr-xr-xasm/pptok.pl271
-rw-r--r--asm/preproc-nop.c186
-rw-r--r--asm/preproc.c5295
-rw-r--r--asm/preproc.h55
-rw-r--r--asm/quote.c479
-rw-r--r--asm/quote.h44
-rw-r--r--asm/rdstrnum.c68
-rw-r--r--asm/segalloc.c51
-rw-r--r--asm/stdscan.c344
-rw-r--r--asm/stdscan.h49
-rw-r--r--asm/strfunc.c359
-rw-r--r--asm/tokens.dat135
-rwxr-xr-xasm/tokhash.pl284
30 files changed, 17679 insertions, 0 deletions
diff --git a/asm/assemble.c b/asm/assemble.c
new file mode 100644
index 00000000..ad6bfda2
--- /dev/null
+++ b/asm/assemble.c
@@ -0,0 +1,2976 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * assemble.c code generation for the Netwide Assembler
+ *
+ * Bytecode specification
+ * ----------------------
+ *
+ *
+ * Codes Mnemonic Explanation
+ *
+ * \0 terminates the code. (Unless it's a literal of course.)
+ * \1..\4 that many literal bytes follow in the code stream
+ * \5 add 4 to the primary operand number (b, low octdigit)
+ * \6 add 4 to the secondary operand number (a, middle octdigit)
+ * \7 add 4 to both the primary and the secondary operand number
+ * \10..\13 a literal byte follows in the code stream, to be added
+ * to the register value of operand 0..3
+ * \14..\17 the position of index register operand in MIB (BND insns)
+ * \20..\23 ib a byte immediate operand, from operand 0..3
+ * \24..\27 ib,u a zero-extended byte immediate operand, from operand 0..3
+ * \30..\33 iw a word immediate operand, from operand 0..3
+ * \34..\37 iwd select between \3[0-3] and \4[0-3] depending on 16/32 bit
+ * assembly mode or the operand-size override on the operand
+ * \40..\43 id a long immediate operand, from operand 0..3
+ * \44..\47 iwdq select between \3[0-3], \4[0-3] and \5[4-7]
+ * depending on the address size of the instruction.
+ * \50..\53 rel8 a byte relative operand, from operand 0..3
+ * \54..\57 iq a qword immediate operand, from operand 0..3
+ * \60..\63 rel16 a word relative operand, from operand 0..3
+ * \64..\67 rel select between \6[0-3] and \7[0-3] depending on 16/32 bit
+ * assembly mode or the operand-size override on the operand
+ * \70..\73 rel32 a long relative operand, from operand 0..3
+ * \74..\77 seg a word constant, from the _segment_ part of operand 0..3
+ * \1ab a ModRM, calculated on EA in operand a, with the spare
+ * field the register value of operand b.
+ * \172\ab the register number from operand a in bits 7..4, with
+ * the 4-bit immediate from operand b in bits 3..0.
+ * \173\xab the register number from operand a in bits 7..4, with
+ * the value b in bits 3..0.
+ * \174..\177 the register number from operand 0..3 in bits 7..4, and
+ * an arbitrary value in bits 3..0 (assembled as zero.)
+ * \2ab a ModRM, calculated on EA in operand a, with the spare
+ * field equal to digit b.
+ *
+ * \240..\243 this instruction uses EVEX rather than REX or VEX/XOP, with the
+ * V field taken from operand 0..3.
+ * \250 this instruction uses EVEX rather than REX or VEX/XOP, with the
+ * V field set to 1111b.
+ *
+ * EVEX prefixes are followed by the sequence:
+ * \cm\wlp\tup where cm is:
+ * cc 00m mmm
+ * c = 2 for EVEX and mmmm is the M field (EVEX.P0[3:0])
+ * and wlp is:
+ * 00 wwl lpp
+ * [l0] ll = 0 (.128, .lz)
+ * [l1] ll = 1 (.256)
+ * [l2] ll = 2 (.512)
+ * [lig] ll = 3 for EVEX.L'L don't care (always assembled as 0)
+ *
+ * [w0] ww = 0 for W = 0
+ * [w1] ww = 1 for W = 1
+ * [wig] ww = 2 for W don't care (always assembled as 0)
+ * [ww] ww = 3 for W used as REX.W
+ *
+ * [p0] pp = 0 for no prefix
+ * [60] pp = 1 for legacy prefix 60
+ * [f3] pp = 2
+ * [f2] pp = 3
+ *
+ * tup is tuple type for Disp8*N from %tuple_codes in insns.pl
+ * (compressed displacement encoding)
+ *
+ * \254..\257 id,s a signed 32-bit operand to be extended to 64 bits.
+ * \260..\263 this instruction uses VEX/XOP rather than REX, with the
+ * V field taken from operand 0..3.
+ * \270 this instruction uses VEX/XOP rather than REX, with the
+ * V field set to 1111b.
+ *
+ * VEX/XOP prefixes are followed by the sequence:
+ * \tmm\wlp where mm is the M field; and wlp is:
+ * 00 wwl lpp
+ * [l0] ll = 0 for L = 0 (.128, .lz)
+ * [l1] ll = 1 for L = 1 (.256)
+ * [lig] ll = 2 for L don't care (always assembled as 0)
+ *
+ * [w0] ww = 0 for W = 0
+ * [w1 ] ww = 1 for W = 1
+ * [wig] ww = 2 for W don't care (always assembled as 0)
+ * [ww] ww = 3 for W used as REX.W
+ *
+ * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
+ *
+ * \271 hlexr instruction takes XRELEASE (F3) with or without lock
+ * \272 hlenl instruction takes XACQUIRE/XRELEASE with or without lock
+ * \273 hle instruction takes XACQUIRE/XRELEASE with lock only
+ * \274..\277 ib,s a byte immediate operand, from operand 0..3, sign-extended
+ * to the operand size (if o16/o32/o64 present) or the bit size
+ * \310 a16 indicates fixed 16-bit address size, i.e. optional 0x67.
+ * \311 a32 indicates fixed 32-bit address size, i.e. optional 0x67.
+ * \312 adf (disassembler only) invalid with non-default address size.
+ * \313 a64 indicates fixed 64-bit address size, 0x67 invalid.
+ * \314 norexb (disassembler only) invalid with REX.B
+ * \315 norexx (disassembler only) invalid with REX.X
+ * \316 norexr (disassembler only) invalid with REX.R
+ * \317 norexw (disassembler only) invalid with REX.W
+ * \320 o16 indicates fixed 16-bit operand size, i.e. optional 0x66.
+ * \321 o32 indicates fixed 32-bit operand size, i.e. optional 0x66.
+ * \322 odf indicates that this instruction is only valid when the
+ * operand size is the default (instruction to disassembler,
+ * generates no code in the assembler)
+ * \323 o64nw indicates fixed 64-bit operand size, REX on extensions only.
+ * \324 o64 indicates 64-bit operand size requiring REX prefix.
+ * \325 nohi instruction which always uses spl/bpl/sil/dil
+ * \326 nof3 instruction not valid with 0xF3 REP prefix. Hint for
+ disassembler only; for SSE instructions.
+ * \330 a literal byte follows in the code stream, to be added
+ * to the condition code value of the instruction.
+ * \331 norep instruction not valid with REP prefix. Hint for
+ * disassembler only; for SSE instructions.
+ * \332 f2i REP prefix (0xF2 byte) used as opcode extension.
+ * \333 f3i REP prefix (0xF3 byte) used as opcode extension.
+ * \334 rex.l LOCK prefix used as REX.R (used in non-64-bit mode)
+ * \335 repe disassemble a rep (0xF3 byte) prefix as repe not rep.
+ * \336 mustrep force a REP(E) prefix (0xF3) even if not specified.
+ * \337 mustrepne force a REPNE prefix (0xF2) even if not specified.
+ * \336-\337 are still listed as prefixes in the disassembler.
+ * \340 resb reserve <operand 0> bytes of uninitialized storage.
+ * Operand 0 had better be a segmentless constant.
+ * \341 wait this instruction needs a WAIT "prefix"
+ * \360 np no SSE prefix (== \364\331)
+ * \361 66 SSE prefix (== \366\331)
+ * \364 !osp operand-size prefix (0x66) not permitted
+ * \365 !asp address-size prefix (0x67) not permitted
+ * \366 operand-size prefix (0x66) used as opcode extension
+ * \367 address-size prefix (0x67) used as opcode extension
+ * \370,\371 jcc8 match only if operand 0 meets byte jump criteria.
+ * jmp8 370 is used for Jcc, 371 is used for JMP.
+ * \373 jlen assemble 0x03 if bits==16, 0x05 if bits==32;
+ * used for conditional jump over longer jump
+ * \374 vsibx|vm32x|vm64x this instruction takes an XMM VSIB memory EA
+ * \375 vsiby|vm32y|vm64y this instruction takes an YMM VSIB memory EA
+ * \376 vsibz|vm32z|vm64z this instruction takes an ZMM VSIB memory EA
+ */
+
+#include "compiler.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "assemble.h"
+#include "insns.h"
+#include "tables.h"
+#include "disp8.h"
+#include "listing.h"
+
+enum match_result {
+ /*
+ * Matching errors. These should be sorted so that more specific
+ * errors come later in the sequence.
+ */
+ MERR_INVALOP,
+ MERR_OPSIZEMISSING,
+ MERR_OPSIZEMISMATCH,
+ MERR_BRNUMMISMATCH,
+ MERR_BADCPU,
+ MERR_BADMODE,
+ MERR_BADHLE,
+ MERR_ENCMISMATCH,
+ MERR_BADBND,
+ MERR_BADREPNE,
+ /*
+ * Matching success; the conditional ones first
+ */
+ MOK_JUMP, /* Matching OK but needs jmp_match() */
+ MOK_GOOD /* Matching unconditionally OK */
+};
+
+typedef struct {
+ enum ea_type type; /* what kind of EA is this? */
+ int sib_present; /* is a SIB byte necessary? */
+ int bytes; /* # of bytes of offset needed */
+ int size; /* lazy - this is sib+bytes+1 */
+ uint8_t modrm, sib, rex, rip; /* the bytes themselves */
+ int8_t disp8; /* compressed displacement for EVEX */
+} ea;
+
+#define GEN_SIB(scale, index, base) \
+ (((scale) << 6) | ((index) << 3) | ((base)))
+
+#define GEN_MODRM(mod, reg, rm) \
+ (((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
+
+static iflag_t cpu; /* cpu level received from nasm.c */
+
+static int64_t calcsize(int32_t, int64_t, int, insn *,
+ const struct itemplate *);
+static void gencode(int32_t segment, int64_t offset, int bits,
+ insn * ins, const struct itemplate *temp,
+ int64_t insn_end);
+static enum match_result find_match(const struct itemplate **tempp,
+ insn *instruction,
+ int32_t segment, int64_t offset, int bits);
+static enum match_result matches(const struct itemplate *, insn *, int bits);
+static opflags_t regflag(const operand *);
+static int32_t regval(const operand *);
+static int rexflags(int, opflags_t, int);
+static int op_rexflags(const operand *, int);
+static int op_evexflags(const operand *, int, uint8_t);
+static void add_asp(insn *, int);
+
+static enum ea_type process_ea(operand *, ea *, int, int, opflags_t, insn *);
+
+static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
+{
+ return ins->prefixes[pos] == prefix;
+}
+
+static void assert_no_prefix(insn * ins, enum prefix_pos pos)
+{
+ if (ins->prefixes[pos])
+ nasm_error(ERR_NONFATAL, "invalid %s prefix",
+ prefix_name(ins->prefixes[pos]));
+}
+
+static const char *size_name(int size)
+{
+ switch (size) {
+ case 1:
+ return "byte";
+ case 2:
+ return "word";
+ case 4:
+ return "dword";
+ case 8:
+ return "qword";
+ case 10:
+ return "tword";
+ case 16:
+ return "oword";
+ case 32:
+ return "yword";
+ case 64:
+ return "zword";
+ default:
+ return "???";
+ }
+}
+
+static void warn_overflow(int pass, int size)
+{
+ nasm_error(ERR_WARNING | pass | ERR_WARN_NOV,
+ "%s data exceeds bounds", size_name(size));
+}
+
+static void warn_overflow_const(int64_t data, int size)
+{
+ if (overflow_general(data, size))
+ warn_overflow(ERR_PASS1, size);
+}
+
+static void warn_overflow_opd(const struct operand *o, int size)
+{
+ if (o->wrt == NO_SEG && o->segment == NO_SEG) {
+ if (overflow_general(o->offset, size))
+ warn_overflow(ERR_PASS2, size);
+ }
+}
+
+/*
+ * Size of an address relocation, or zero if not an address
+ */
+static int addrsize(enum out_type type, uint64_t size)
+{
+ switch (type) {
+ case OUT_ADDRESS:
+ return abs((int)size);
+ case OUT_REL1ADR:
+ return 1;
+ case OUT_REL2ADR:
+ return 2;
+ case OUT_REL4ADR:
+ return 4;
+ case OUT_REL8ADR:
+ return 8;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * This routine wrappers the real output format's output routine,
+ * in order to pass a copy of the data off to the listing file
+ * generator at the same time, flatten unnecessary relocations,
+ * and verify backend compatibility.
+ */
+static void out(int64_t offset, int32_t segto, const void *data,
+ enum out_type type, uint64_t size,
+ int32_t segment, int32_t wrt)
+{
+ static int32_t lineno = 0; /* static!!! */
+ static const char *lnfname = NULL;
+ uint8_t p[8];
+ int asize = addrsize(type, size); /* Address size in bytes */
+ const int amax = ofmt->maxbits >> 3; /* Maximum address size in bytes */
+
+ if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
+ /*
+ * This is a non-relocated address, and we're going to
+ * convert it into RAWDATA format.
+ */
+ uint8_t *q = p;
+
+ if (asize > 8) {
+ nasm_panic(0, "OUT_ADDRESS with size > 8");
+ return;
+ }
+
+ WRITEADDR(q, *(int64_t *)data, asize);
+ data = p;
+ type = OUT_RAWDATA;
+ size = asize;
+ asize = 0; /* No longer an address */
+ }
+
+ lfmt->output(offset, data, type, size);
+
+ /*
+ * this call to src_get determines when we call the
+ * debug-format-specific "linenum" function
+ * it updates lineno and lnfname to the current values
+ * returning 0 if "same as last time", -2 if lnfname
+ * changed, and the amount by which lineno changed,
+ * if it did. thus, these variables must be static
+ */
+
+ if (src_get(&lineno, &lnfname))
+ dfmt->linenum(lnfname, lineno, segto);
+
+ if (asize && asize > amax) {
+ if (type != OUT_ADDRESS || (int)size < 0) {
+ nasm_error(ERR_NONFATAL,
+ "%d-bit signed relocation unsupported by output format %s\n",
+ asize << 3, ofmt->shortname);
+ size = asize;
+ } else {
+ nasm_error(ERR_WARNING | ERR_WARN_ZEXTRELOC,
+ "%d-bit unsigned relocation zero-extended from %d bits\n",
+ asize << 3, ofmt->maxbits);
+ ofmt->output(segto, data, type, amax, segment, wrt);
+ size = asize - amax;
+ }
+ data = zero_buffer;
+ type = OUT_RAWDATA;
+ segment = wrt = NO_SEG;
+ }
+
+ ofmt->output(segto, data, type, size, segment, wrt);
+}
+
+static void out_imm8(int64_t offset, int32_t segment,
+ struct operand *opx, int asize)
+{
+ if (opx->segment != NO_SEG) {
+ uint64_t data = opx->offset;
+ out(offset, segment, &data, OUT_ADDRESS, asize, opx->segment, opx->wrt);
+ } else {
+ uint8_t byte = opx->offset;
+ out(offset, segment, &byte, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ }
+}
+
+static bool jmp_match(int32_t segment, int64_t offset, int bits,
+ insn * ins, const struct itemplate *temp)
+{
+ int64_t isize;
+ const uint8_t *code = temp->code;
+ uint8_t c = code[0];
+ bool is_byte;
+
+ if (((c & ~1) != 0370) || (ins->oprs[0].type & STRICT))
+ return false;
+ if (!optimizing)
+ return false;
+ if (optimizing < 0 && c == 0371)
+ return false;
+
+ isize = calcsize(segment, offset, bits, ins, temp);
+
+ if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
+ /* Be optimistic in pass 1 */
+ return true;
+
+ if (ins->oprs[0].segment != segment)
+ return false;
+
+ isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
+ is_byte = (isize >= -128 && isize <= 127); /* is it byte size? */
+
+ if (is_byte && c == 0371 && ins->prefixes[PPS_REP] == P_BND) {
+ /* jmp short (opcode eb) cannot be used with bnd prefix. */
+ ins->prefixes[PPS_REP] = P_none;
+ nasm_error(ERR_WARNING | ERR_WARN_BND | ERR_PASS2 ,
+ "jmp short does not init bnd regs - bnd prefix dropped.");
+ }
+
+ return is_byte;
+}
+
+int64_t assemble(int32_t segment, int64_t offset, int bits, iflag_t cp,
+ insn * instruction)
+{
+ const struct itemplate *temp;
+ int j;
+ enum match_result m;
+ int64_t insn_end;
+ int32_t itimes;
+ int64_t start = offset;
+ int64_t wsize; /* size for DB etc. */
+
+ cpu = cp;
+
+ wsize = idata_bytes(instruction->opcode);
+ if (wsize == -1)
+ return 0;
+
+ if (wsize) {
+ extop *e;
+ int32_t t = instruction->times;
+ if (t < 0)
+ nasm_panic(0, "instruction->times < 0 (%"PRId32") in assemble()", t);
+
+ while (t--) { /* repeat TIMES times */
+ list_for_each(e, instruction->eops) {
+ if (e->type == EOT_DB_NUMBER) {
+ if (wsize > 8) {
+ nasm_error(ERR_NONFATAL,
+ "integer supplied to a DT, DO or DY"
+ " instruction");
+ } else {
+ out(offset, segment, &e->offset,
+ OUT_ADDRESS, wsize, e->segment, e->wrt);
+ offset += wsize;
+ }
+ } else if (e->type == EOT_DB_STRING ||
+ e->type == EOT_DB_STRING_FREE) {
+ int align;
+
+ out(offset, segment, e->stringval,
+ OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
+ align = e->stringlen % wsize;
+
+ if (align) {
+ align = wsize - align;
+ out(offset, segment, zero_buffer,
+ OUT_RAWDATA, align, NO_SEG, NO_SEG);
+ }
+ offset += e->stringlen + align;
+ }
+ }
+ if (t > 0 && t == instruction->times - 1) {
+ /*
+ * Dummy call to lfmt->output to give the offset to the
+ * listing module.
+ */
+ lfmt->output(offset, NULL, OUT_RAWDATA, 0);
+ lfmt->uplevel(LIST_TIMES);
+ }
+ }
+ if (instruction->times > 1)
+ lfmt->downlevel(LIST_TIMES);
+ return offset - start;
+ }
+
+ if (instruction->opcode == I_INCBIN) {
+ const char *fname = instruction->eops->stringval;
+ FILE *fp;
+
+ fp = nasm_open_read(fname, NF_BINARY);
+ if (!fp) {
+ nasm_error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
+ fname);
+ } else if (fseek(fp, 0L, SEEK_END) < 0) {
+ nasm_error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
+ fname);
+ fclose(fp);
+ } else {
+ static char buf[4096];
+ size_t t = instruction->times;
+ size_t base = 0;
+ size_t len;
+
+ len = ftell(fp);
+ if (instruction->eops->next) {
+ base = instruction->eops->next->offset;
+ len -= base;
+ if (instruction->eops->next->next &&
+ len > (size_t)instruction->eops->next->next->offset)
+ len = (size_t)instruction->eops->next->next->offset;
+ }
+ /*
+ * Dummy call to lfmt->output to give the offset to the
+ * listing module.
+ */
+ lfmt->output(offset, NULL, OUT_RAWDATA, 0);
+ lfmt->uplevel(LIST_INCBIN);
+ while (t--) {
+ size_t l;
+
+ fseek(fp, base, SEEK_SET);
+ l = len;
+ while (l > 0) {
+ int32_t m;
+ m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
+ if (!m) {
+ /*
+ * This shouldn't happen unless the file
+ * actually changes while we are reading
+ * it.
+ */
+ nasm_error(ERR_NONFATAL,
+ "`incbin': unexpected EOF while"
+ " reading file `%s'", fname);
+ t = 0; /* Try to exit cleanly */
+ break;
+ }
+ out(offset, segment, buf, OUT_RAWDATA, m,
+ NO_SEG, NO_SEG);
+ l -= m;
+ }
+ }
+ lfmt->downlevel(LIST_INCBIN);
+ if (instruction->times > 1) {
+ /*
+ * Dummy call to lfmt->output to give the offset to the
+ * listing module.
+ */
+ lfmt->output(offset, NULL, OUT_RAWDATA, 0);
+ lfmt->uplevel(LIST_TIMES);
+ lfmt->downlevel(LIST_TIMES);
+ }
+ fclose(fp);
+ return instruction->times * len;
+ }
+ return 0; /* if we're here, there's an error */
+ }
+
+ /* Check to see if we need an address-size prefix */
+ add_asp(instruction, bits);
+
+ m = find_match(&temp, instruction, segment, offset, bits);
+
+ if (m == MOK_GOOD) {
+ /* Matches! */
+ int64_t insn_size = calcsize(segment, offset, bits, instruction, temp);
+ itimes = instruction->times;
+ if (insn_size < 0) /* shouldn't be, on pass two */
+ nasm_panic(0, "errors made it through from pass one");
+ else
+ while (itimes--) {
+ for (j = 0; j < MAXPREFIX; j++) {
+ uint8_t c = 0;
+ switch (instruction->prefixes[j]) {
+ case P_WAIT:
+ c = 0x9B;
+ break;
+ case P_LOCK:
+ c = 0xF0;
+ break;
+ case P_REPNE:
+ case P_REPNZ:
+ case P_XACQUIRE:
+ case P_BND:
+ c = 0xF2;
+ break;
+ case P_REPE:
+ case P_REPZ:
+ case P_REP:
+ case P_XRELEASE:
+ c = 0xF3;
+ break;
+ case R_CS:
+ if (bits == 64) {
+ nasm_error(ERR_WARNING | ERR_PASS2,
+ "cs segment base generated, but will be ignored in 64-bit mode");
+ }
+ c = 0x2E;
+ break;
+ case R_DS:
+ if (bits == 64) {
+ nasm_error(ERR_WARNING | ERR_PASS2,
+ "ds segment base generated, but will be ignored in 64-bit mode");
+ }
+ c = 0x3E;
+ break;
+ case R_ES:
+ if (bits == 64) {
+ nasm_error(ERR_WARNING | ERR_PASS2,
+ "es segment base generated, but will be ignored in 64-bit mode");
+ }
+ c = 0x26;
+ break;
+ case R_FS:
+ c = 0x64;
+ break;
+ case R_GS:
+ c = 0x65;
+ break;
+ case R_SS:
+ if (bits == 64) {
+ nasm_error(ERR_WARNING | ERR_PASS2,
+ "ss segment base generated, but will be ignored in 64-bit mode");
+ }
+ c = 0x36;
+ break;
+ case R_SEGR6:
+ case R_SEGR7:
+ nasm_error(ERR_NONFATAL,
+ "segr6 and segr7 cannot be used as prefixes");
+ break;
+ case P_A16:
+ if (bits == 64) {
+ nasm_error(ERR_NONFATAL,
+ "16-bit addressing is not supported "
+ "in 64-bit mode");
+ } else if (bits != 16)
+ c = 0x67;
+ break;
+ case P_A32:
+ if (bits != 32)
+ c = 0x67;
+ break;
+ case P_A64:
+ if (bits != 64) {
+ nasm_error(ERR_NONFATAL,
+ "64-bit addressing is only supported "
+ "in 64-bit mode");
+ }
+ break;
+ case P_ASP:
+ c = 0x67;
+ break;
+ case P_O16:
+ if (bits != 16)
+ c = 0x66;
+ break;
+ case P_O32:
+ if (bits == 16)
+ c = 0x66;
+ break;
+ case P_O64:
+ /* REX.W */
+ break;
+ case P_OSP:
+ c = 0x66;
+ break;
+ case P_EVEX:
+ case P_VEX3:
+ case P_VEX2:
+ case P_NOBND:
+ case P_none:
+ break;
+ default:
+ nasm_panic(0, "invalid instruction prefix");
+ }
+ if (c != 0) {
+ out(offset, segment, &c, OUT_RAWDATA, 1,
+ NO_SEG, NO_SEG);
+ offset++;
+ }
+ }
+ insn_end = offset + insn_size;
+ gencode(segment, offset, bits, instruction,
+ temp, insn_end);
+ offset += insn_size;
+ if (itimes > 0 && itimes == instruction->times - 1) {
+ /*
+ * Dummy call to lfmt->output to give the offset to the
+ * listing module.
+ */
+ lfmt->output(offset, NULL, OUT_RAWDATA, 0);
+ lfmt->uplevel(LIST_TIMES);
+ }
+ }
+ if (instruction->times > 1)
+ lfmt->downlevel(LIST_TIMES);
+ return offset - start;
+ } else {
+ /* No match */
+ switch (m) {
+ case MERR_OPSIZEMISSING:
+ nasm_error(ERR_NONFATAL, "operation size not specified");
+ break;
+ case MERR_OPSIZEMISMATCH:
+ nasm_error(ERR_NONFATAL, "mismatch in operand sizes");
+ break;
+ case MERR_BRNUMMISMATCH:
+ nasm_error(ERR_NONFATAL,
+ "mismatch in the number of broadcasting elements");
+ break;
+ case MERR_BADCPU:
+ nasm_error(ERR_NONFATAL, "no instruction for this cpu level");
+ break;
+ case MERR_BADMODE:
+ nasm_error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
+ bits);
+ break;
+ case MERR_ENCMISMATCH:
+ nasm_error(ERR_NONFATAL, "specific encoding scheme not available");
+ break;
+ case MERR_BADBND:
+ nasm_error(ERR_NONFATAL, "bnd prefix is not allowed");
+ break;
+ case MERR_BADREPNE:
+ nasm_error(ERR_NONFATAL, "%s prefix is not allowed",
+ (has_prefix(instruction, PPS_REP, P_REPNE) ?
+ "repne" : "repnz"));
+ break;
+ default:
+ nasm_error(ERR_NONFATAL,
+ "invalid combination of opcode and operands");
+ break;
+ }
+ }
+ return 0;
+}
+
+int64_t insn_size(int32_t segment, int64_t offset, int bits, iflag_t cp,
+ insn * instruction)
+{
+ const struct itemplate *temp;
+ enum match_result m;
+
+ cpu = cp;
+
+ if (instruction->opcode == I_none)
+ return 0;
+
+ if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
+ instruction->opcode == I_DD || instruction->opcode == I_DQ ||
+ instruction->opcode == I_DT || instruction->opcode == I_DO ||
+ instruction->opcode == I_DY) {
+ extop *e;
+ int32_t isize, osize, wsize;
+
+ isize = 0;
+ wsize = idata_bytes(instruction->opcode);
+
+ list_for_each(e, instruction->eops) {
+ int32_t align;
+
+ osize = 0;
+ if (e->type == EOT_DB_NUMBER) {
+ osize = 1;
+ warn_overflow_const(e->offset, wsize);
+ } else if (e->type == EOT_DB_STRING ||
+ e->type == EOT_DB_STRING_FREE)
+ osize = e->stringlen;
+
+ align = (-osize) % wsize;
+ if (align < 0)
+ align += wsize;
+ isize += osize + align;
+ }
+ return isize * instruction->times;
+ }
+
+ if (instruction->opcode == I_INCBIN) {
+ const char *fname = instruction->eops->stringval;
+ FILE *fp;
+ int64_t val = 0;
+ size_t len;
+
+ fp = nasm_open_read(fname, NF_BINARY);
+ if (!fp)
+ nasm_error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
+ fname);
+ else if (fseek(fp, 0L, SEEK_END) < 0)
+ nasm_error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
+ fname);
+ else {
+ len = ftell(fp);
+ if (instruction->eops->next) {
+ len -= instruction->eops->next->offset;
+ if (instruction->eops->next->next &&
+ len > (size_t)instruction->eops->next->next->offset) {
+ len = (size_t)instruction->eops->next->next->offset;
+ }
+ }
+ val = instruction->times * len;
+ }
+ if (fp)
+ fclose(fp);
+ return val;
+ }
+
+ /* Check to see if we need an address-size prefix */
+ add_asp(instruction, bits);
+
+ m = find_match(&temp, instruction, segment, offset, bits);
+ if (m == MOK_GOOD) {
+ /* we've matched an instruction. */
+ int64_t isize;
+ int j;
+
+ isize = calcsize(segment, offset, bits, instruction, temp);
+ if (isize < 0)
+ return -1;
+ for (j = 0; j < MAXPREFIX; j++) {
+ switch (instruction->prefixes[j]) {
+ case P_A16:
+ if (bits != 16)
+ isize++;
+ break;
+ case P_A32:
+ if (bits != 32)
+ isize++;
+ break;
+ case P_O16:
+ if (bits != 16)
+ isize++;
+ break;
+ case P_O32:
+ if (bits == 16)
+ isize++;
+ break;
+ case P_A64:
+ case P_O64:
+ case P_EVEX:
+ case P_VEX3:
+ case P_VEX2:
+ case P_NOBND:
+ case P_none:
+ break;
+ default:
+ isize++;
+ break;
+ }
+ }
+ return isize * instruction->times;
+ } else {
+ return -1; /* didn't match any instruction */
+ }
+}
+
+static void bad_hle_warn(const insn * ins, uint8_t hleok)
+{
+ enum prefixes rep_pfx = ins->prefixes[PPS_REP];
+ enum whatwarn { w_none, w_lock, w_inval } ww;
+ static const enum whatwarn warn[2][4] =
+ {
+ { w_inval, w_inval, w_none, w_lock }, /* XACQUIRE */
+ { w_inval, w_none, w_none, w_lock }, /* XRELEASE */
+ };
+ unsigned int n;
+
+ n = (unsigned int)rep_pfx - P_XACQUIRE;
+ if (n > 1)
+ return; /* Not XACQUIRE/XRELEASE */
+
+ ww = warn[n][hleok];
+ if (!is_class(MEMORY, ins->oprs[0].type))
+ ww = w_inval; /* HLE requires operand 0 to be memory */
+
+ switch (ww) {
+ case w_none:
+ break;
+
+ case w_lock:
+ if (ins->prefixes[PPS_LOCK] != P_LOCK) {
+ nasm_error(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
+ "%s with this instruction requires lock",
+ prefix_name(rep_pfx));
+ }
+ break;
+
+ case w_inval:
+ nasm_error(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
+ "%s invalid with this instruction",
+ prefix_name(rep_pfx));
+ break;
+ }
+}
+
+/* Common construct */
+#define case3(x) case (x): case (x)+1: case (x)+2
+#define case4(x) case3(x): case (x)+3
+
+static int64_t calcsize(int32_t segment, int64_t offset, int bits,
+ insn * ins, const struct itemplate *temp)
+{
+ const uint8_t *codes = temp->code;
+ int64_t length = 0;
+ uint8_t c;
+ int rex_mask = ~0;
+ int op1, op2;
+ struct operand *opx;
+ uint8_t opex = 0;
+ enum ea_type eat;
+ uint8_t hleok = 0;
+ bool lockcheck = true;
+ enum reg_enum mib_index = R_none; /* For a separate index MIB reg form */
+
+ ins->rex = 0; /* Ensure REX is reset */
+ eat = EA_SCALAR; /* Expect a scalar EA */
+ memset(ins->evex_p, 0, 3); /* Ensure EVEX is reset */
+
+ if (ins->prefixes[PPS_OSIZE] == P_O64)
+ ins->rex |= REX_W;
+
+ (void)segment; /* Don't warn that this parameter is unused */
+ (void)offset; /* Don't warn that this parameter is unused */
+
+ while (*codes) {
+ c = *codes++;
+ op1 = (c & 3) + ((opex & 1) << 2);
+ op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
+ opx = &ins->oprs[op1];
+ opex = 0; /* For the next iteration */
+
+ switch (c) {
+ case4(01):
+ codes += c, length += c;
+ break;
+
+ case3(05):
+ opex = c;
+ break;
+
+ case4(010):
+ ins->rex |=
+ op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
+ codes++, length++;
+ break;
+
+ case4(014):
+ /* this is an index reg of MIB operand */
+ mib_index = opx->basereg;
+ break;
+
+ case4(020):
+ case4(024):
+ length++;
+ break;
+
+ case4(030):
+ length += 2;
+ break;
+
+ case4(034):
+ if (opx->type & (BITS16 | BITS32 | BITS64))
+ length += (opx->type & BITS16) ? 2 : 4;
+ else
+ length += (bits == 16) ? 2 : 4;
+ break;
+
+ case4(040):
+ length += 4;
+ break;
+
+ case4(044):
+ length += ins->addr_size >> 3;
+ break;
+
+ case4(050):
+ length++;
+ break;
+
+ case4(054):
+ length += 8; /* MOV reg64/imm */
+ break;
+
+ case4(060):
+ length += 2;
+ break;
+
+ case4(064):
+ if (opx->type & (BITS16 | BITS32 | BITS64))
+ length += (opx->type & BITS16) ? 2 : 4;
+ else
+ length += (bits == 16) ? 2 : 4;
+ break;
+
+ case4(070):
+ length += 4;
+ break;
+
+ case4(074):
+ length += 2;
+ break;
+
+ case 0172:
+ case 0173:
+ codes++;
+ length++;
+ break;
+
+ case4(0174):
+ length++;
+ break;
+
+ case4(0240):
+ ins->rex |= REX_EV;
+ ins->vexreg = regval(opx);
+ ins->evex_p[2] |= op_evexflags(opx, EVEX_P2VP, 2); /* High-16 NDS */
+ ins->vex_cm = *codes++;
+ ins->vex_wlp = *codes++;
+ ins->evex_tuple = (*codes++ - 0300);
+ break;
+
+ case 0250:
+ ins->rex |= REX_EV;
+ ins->vexreg = 0;
+ ins->vex_cm = *codes++;
+ ins->vex_wlp = *codes++;
+ ins->evex_tuple = (*codes++ - 0300);
+ break;
+
+ case4(0254):
+ length += 4;
+ break;
+
+ case4(0260):
+ ins->rex |= REX_V;
+ ins->vexreg = regval(opx);
+ ins->vex_cm = *codes++;
+ ins->vex_wlp = *codes++;
+ break;
+
+ case 0270:
+ ins->rex |= REX_V;
+ ins->vexreg = 0;
+ ins->vex_cm = *codes++;
+ ins->vex_wlp = *codes++;
+ break;
+
+ case3(0271):
+ hleok = c & 3;
+ break;
+
+ case4(0274):
+ length++;
+ break;
+
+ case4(0300):
+ break;
+
+ case 0310:
+ if (bits == 64)
+ return -1;
+ length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
+ break;
+
+ case 0311:
+ length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
+ break;
+
+ case 0312:
+ break;
+
+ case 0313:
+ if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
+ has_prefix(ins, PPS_ASIZE, P_A32))
+ return -1;
+ break;
+
+ case4(0314):
+ break;
+
+ case 0320:
+ {
+ enum prefixes pfx = ins->prefixes[PPS_OSIZE];
+ if (pfx == P_O16)
+ break;
+ if (pfx != P_none)
+ nasm_error(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
+ else
+ ins->prefixes[PPS_OSIZE] = P_O16;
+ break;
+ }
+
+ case 0321:
+ {
+ enum prefixes pfx = ins->prefixes[PPS_OSIZE];
+ if (pfx == P_O32)
+ break;
+ if (pfx != P_none)
+ nasm_error(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
+ else
+ ins->prefixes[PPS_OSIZE] = P_O32;
+ break;
+ }
+
+ case 0322:
+ break;
+
+ case 0323:
+ rex_mask &= ~REX_W;
+ break;
+
+ case 0324:
+ ins->rex |= REX_W;
+ break;
+
+ case 0325:
+ ins->rex |= REX_NH;
+ break;
+
+ case 0326:
+ break;
+
+ case 0330:
+ codes++, length++;
+ break;
+
+ case 0331:
+ break;
+
+ case 0332:
+ case 0333:
+ length++;
+ break;
+
+ case 0334:
+ ins->rex |= REX_L;
+ break;
+
+ case 0335:
+ break;
+
+ case 0336:
+ if (!ins->prefixes[PPS_REP])
+ ins->prefixes[PPS_REP] = P_REP;
+ break;
+
+ case 0337:
+ if (!ins->prefixes[PPS_REP])
+ ins->prefixes[PPS_REP] = P_REPNE;
+ break;
+
+ case 0340:
+ if (ins->oprs[0].segment != NO_SEG)
+ nasm_error(ERR_NONFATAL, "attempt to reserve non-constant"
+ " quantity of BSS space");
+ else
+ length += ins->oprs[0].offset;
+ break;
+
+ case 0341:
+ if (!ins->prefixes[PPS_WAIT])
+ ins->prefixes[PPS_WAIT] = P_WAIT;
+ break;
+
+ case 0360:
+ break;
+
+ case 0361:
+ length++;
+ break;
+
+ case 0364:
+ case 0365:
+ break;
+
+ case 0366:
+ case 0367:
+ length++;
+ break;
+
+ case 0370:
+ case 0371:
+ break;
+
+ case 0373:
+ length++;
+ break;
+
+ case 0374:
+ eat = EA_XMMVSIB;
+ break;
+
+ case 0375:
+ eat = EA_YMMVSIB;
+ break;
+
+ case 0376:
+ eat = EA_ZMMVSIB;
+ break;
+
+ case4(0100):
+ case4(0110):
+ case4(0120):
+ case4(0130):
+ case4(0200):
+ case4(0204):
+ case4(0210):
+ case4(0214):
+ case4(0220):
+ case4(0224):
+ case4(0230):
+ case4(0234):
+ {
+ ea ea_data;
+ int rfield;
+ opflags_t rflags;
+ struct operand *opy = &ins->oprs[op2];
+ struct operand *op_er_sae;
+
+ ea_data.rex = 0; /* Ensure ea.REX is initially 0 */
+
+ if (c <= 0177) {
+ /* pick rfield from operand b (opx) */
+ rflags = regflag(opx);
+ rfield = nasm_regvals[opx->basereg];
+ } else {
+ rflags = 0;
+ rfield = c & 7;
+ }
+
+ /* EVEX.b1 : evex_brerop contains the operand position */
+ op_er_sae = (ins->evex_brerop >= 0 ?
+ &ins->oprs[ins->evex_brerop] : NULL);
+
+ if (op_er_sae && (op_er_sae->decoflags & (ER | SAE))) {
+ /* set EVEX.b */
+ ins->evex_p[2] |= EVEX_P2B;
+ if (op_er_sae->decoflags & ER) {
+ /* set EVEX.RC (rounding control) */
+ ins->evex_p[2] |= ((ins->evex_rm - BRC_RN) << 5)
+ & EVEX_P2RC;
+ }
+ } else {
+ /* set EVEX.L'L (vector length) */
+ ins->evex_p[2] |= ((ins->vex_wlp << (5 - 2)) & EVEX_P2LL);
+ ins->evex_p[1] |= ((ins->vex_wlp << (7 - 4)) & EVEX_P1W);
+ if (opy->decoflags & BRDCAST_MASK) {
+ /* set EVEX.b */
+ ins->evex_p[2] |= EVEX_P2B;
+ }
+ }
+
+ if (itemp_has(temp, IF_MIB)) {
+ opy->eaflags |= EAF_MIB;
+ /*
+ * if a separate form of MIB (ICC style) is used,
+ * the index reg info is merged into mem operand
+ */
+ if (mib_index != R_none) {
+ opy->indexreg = mib_index;
+ opy->scale = 1;
+ opy->hintbase = mib_index;
+ opy->hinttype = EAH_NOTBASE;
+ }
+ }
+
+ if (process_ea(opy, &ea_data, bits,
+ rfield, rflags, ins) != eat) {
+ nasm_error(ERR_NONFATAL, "invalid effective address");
+ return -1;
+ } else {
+ ins->rex |= ea_data.rex;
+ length += ea_data.size;
+ }
+ }
+ break;
+
+ default:
+ nasm_panic(0, "internal instruction table corrupt"
+ ": instruction code \\%o (0x%02X) given", c, c);
+ break;
+ }
+ }
+
+ ins->rex &= rex_mask;
+
+ if (ins->rex & REX_NH) {
+ if (ins->rex & REX_H) {
+ nasm_error(ERR_NONFATAL, "instruction cannot use high registers");
+ return -1;
+ }
+ ins->rex &= ~REX_P; /* Don't force REX prefix due to high reg */
+ }
+
+ switch (ins->prefixes[PPS_VEX]) {
+ case P_EVEX:
+ if (!(ins->rex & REX_EV))
+ return -1;
+ break;
+ case P_VEX3:
+ case P_VEX2:
+ if (!(ins->rex & REX_V))
+ return -1;
+ break;
+ default:
+ break;
+ }
+
+ if (ins->rex & (REX_V | REX_EV)) {
+ int bad32 = REX_R|REX_W|REX_X|REX_B;
+
+ if (ins->rex & REX_H) {
+ nasm_error(ERR_NONFATAL, "cannot use high register in AVX instruction");
+ return -1;
+ }
+ switch (ins->vex_wlp & 060) {
+ case 000:
+ case 040:
+ ins->rex &= ~REX_W;
+ break;
+ case 020:
+ ins->rex |= REX_W;
+ bad32 &= ~REX_W;
+ break;
+ case 060:
+ /* Follow REX_W */
+ break;
+ }
+
+ if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
+ nasm_error(ERR_NONFATAL, "invalid operands in non-64-bit mode");
+ return -1;
+ } else if (!(ins->rex & REX_EV) &&
+ ((ins->vexreg > 15) || (ins->evex_p[0] & 0xf0))) {
+ nasm_error(ERR_NONFATAL, "invalid high-16 register in non-AVX-512");
+ return -1;
+ }
+ if (ins->rex & REX_EV)
+ length += 4;
+ else if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)) ||
+ ins->prefixes[PPS_VEX] == P_VEX3)
+ length += 3;
+ else
+ length += 2;
+ } else if (ins->rex & REX_MASK) {
+ if (ins->rex & REX_H) {
+ nasm_error(ERR_NONFATAL, "cannot use high register in rex instruction");
+ return -1;
+ } else if (bits == 64) {
+ length++;
+ } else if ((ins->rex & REX_L) &&
+ !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
+ iflag_ffs(&cpu) >= IF_X86_64) {
+ /* LOCK-as-REX.R */
+ assert_no_prefix(ins, PPS_LOCK);
+ lockcheck = false; /* Already errored, no need for warning */
+ length++;
+ } else {
+ nasm_error(ERR_NONFATAL, "invalid operands in non-64-bit mode");
+ return -1;
+ }
+ }
+
+ if (has_prefix(ins, PPS_LOCK, P_LOCK) && lockcheck &&
+ (!itemp_has(temp,IF_LOCK) || !is_class(MEMORY, ins->oprs[0].type))) {
+ nasm_error(ERR_WARNING | ERR_WARN_LOCK | ERR_PASS2 ,
+ "instruction is not lockable");
+ }
+
+ bad_hle_warn(ins, hleok);
+
+ /*
+ * when BND prefix is set by DEFAULT directive,
+ * BND prefix is added to every appropriate instruction line
+ * unless it is overridden by NOBND prefix.
+ */
+ if (globalbnd &&
+ (itemp_has(temp, IF_BND) && !has_prefix(ins, PPS_REP, P_NOBND)))
+ ins->prefixes[PPS_REP] = P_BND;
+
+ return length;
+}
+
+static inline unsigned int emit_rex(insn *ins, int32_t segment, int64_t offset, int bits)
+{
+ if (bits == 64) {
+ if ((ins->rex & REX_MASK) &&
+ !(ins->rex & (REX_V | REX_EV)) &&
+ !ins->rex_done) {
+ int rex = (ins->rex & REX_MASK) | REX_P;
+ out(offset, segment, &rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ ins->rex_done = true;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static void gencode(int32_t segment, int64_t offset, int bits,
+ insn * ins, const struct itemplate *temp,
+ int64_t insn_end)
+{
+ uint8_t c;
+ uint8_t bytes[4];
+ int64_t size;
+ int64_t data;
+ int op1, op2;
+ struct operand *opx;
+ const uint8_t *codes = temp->code;
+ uint8_t opex = 0;
+ enum ea_type eat = EA_SCALAR;
+
+ ins->rex_done = false;
+
+ while (*codes) {
+ c = *codes++;
+ op1 = (c & 3) + ((opex & 1) << 2);
+ op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
+ opx = &ins->oprs[op1];
+ opex = 0; /* For the next iteration */
+
+ switch (c) {
+ case 01:
+ case 02:
+ case 03:
+ case 04:
+ offset += emit_rex(ins, segment, offset, bits);
+ out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
+ codes += c;
+ offset += c;
+ break;
+
+ case 05:
+ case 06:
+ case 07:
+ opex = c;
+ break;
+
+ case4(010):
+ offset += emit_rex(ins, segment, offset, bits);
+ bytes[0] = *codes++ + (regval(opx) & 7);
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+
+ case4(014):
+ break;
+
+ case4(020):
+ if (opx->offset < -256 || opx->offset > 255) {
+ nasm_error(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
+ "byte value exceeds bounds");
+ }
+ out_imm8(offset, segment, opx, -1);
+ offset += 1;
+ break;
+
+ case4(024):
+ if (opx->offset < 0 || opx->offset > 255)
+ nasm_error(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
+ "unsigned byte value exceeds bounds");
+ out_imm8(offset, segment, opx, 1);
+ offset += 1;
+ break;
+
+ case4(030):
+ warn_overflow_opd(opx, 2);
+ data = opx->offset;
+ out(offset, segment, &data, OUT_ADDRESS, 2,
+ opx->segment, opx->wrt);
+ offset += 2;
+ break;
+
+ case4(034):
+ if (opx->type & (BITS16 | BITS32))
+ size = (opx->type & BITS16) ? 2 : 4;
+ else
+ size = (bits == 16) ? 2 : 4;
+ warn_overflow_opd(opx, size);
+ data = opx->offset;
+ out(offset, segment, &data, OUT_ADDRESS, size,
+ opx->segment, opx->wrt);
+ offset += size;
+ break;
+
+ case4(040):
+ warn_overflow_opd(opx, 4);
+ data = opx->offset;
+ out(offset, segment, &data, OUT_ADDRESS, 4,
+ opx->segment, opx->wrt);
+ offset += 4;
+ break;
+
+ case4(044):
+ data = opx->offset;
+ size = ins->addr_size >> 3;
+ warn_overflow_opd(opx, size);
+ out(offset, segment, &data, OUT_ADDRESS, size,
+ opx->segment, opx->wrt);
+ offset += size;
+ break;
+
+ case4(050):
+ if (opx->segment != segment) {
+ data = opx->offset;
+ out(offset, segment, &data,
+ OUT_REL1ADR, insn_end - offset,
+ opx->segment, opx->wrt);
+ } else {
+ data = opx->offset - insn_end;
+ if (data > 127 || data < -128)
+ nasm_error(ERR_NONFATAL, "short jump is out of range");
+ out(offset, segment, &data,
+ OUT_ADDRESS, 1, NO_SEG, NO_SEG);
+ }
+ offset += 1;
+ break;
+
+ case4(054):
+ data = (int64_t)opx->offset;
+ out(offset, segment, &data, OUT_ADDRESS, 8,
+ opx->segment, opx->wrt);
+ offset += 8;
+ break;
+
+ case4(060):
+ if (opx->segment != segment) {
+ data = opx->offset;
+ out(offset, segment, &data,
+ OUT_REL2ADR, insn_end - offset,
+ opx->segment, opx->wrt);
+ } else {
+ data = opx->offset - insn_end;
+ out(offset, segment, &data,
+ OUT_ADDRESS, 2, NO_SEG, NO_SEG);
+ }
+ offset += 2;
+ break;
+
+ case4(064):
+ if (opx->type & (BITS16 | BITS32 | BITS64))
+ size = (opx->type & BITS16) ? 2 : 4;
+ else
+ size = (bits == 16) ? 2 : 4;
+ if (opx->segment != segment) {
+ data = opx->offset;
+ out(offset, segment, &data,
+ size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
+ insn_end - offset, opx->segment, opx->wrt);
+ } else {
+ data = opx->offset - insn_end;
+ out(offset, segment, &data,
+ OUT_ADDRESS, size, NO_SEG, NO_SEG);
+ }
+ offset += size;
+ break;
+
+ case4(070):
+ if (opx->segment != segment) {
+ data = opx->offset;
+ out(offset, segment, &data,
+ OUT_REL4ADR, insn_end - offset,
+ opx->segment, opx->wrt);
+ } else {
+ data = opx->offset - insn_end;
+ out(offset, segment, &data,
+ OUT_ADDRESS, 4, NO_SEG, NO_SEG);
+ }
+ offset += 4;
+ break;
+
+ case4(074):
+ if (opx->segment == NO_SEG)
+ nasm_error(ERR_NONFATAL, "value referenced by FAR is not"
+ " relocatable");
+ data = 0;
+ out(offset, segment, &data, OUT_ADDRESS, 2,
+ ofmt->segbase(1 + opx->segment),
+ opx->wrt);
+ offset += 2;
+ break;
+
+ case 0172:
+ c = *codes++;
+ opx = &ins->oprs[c >> 3];
+ bytes[0] = nasm_regvals[opx->basereg] << 4;
+ opx = &ins->oprs[c & 7];
+ if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
+ nasm_error(ERR_NONFATAL,
+ "non-absolute expression not permitted as argument %d",
+ c & 7);
+ } else {
+ if (opx->offset & ~15) {
+ nasm_error(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
+ "four-bit argument exceeds bounds");
+ }
+ bytes[0] |= opx->offset & 15;
+ }
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset++;
+ break;
+
+ case 0173:
+ c = *codes++;
+ opx = &ins->oprs[c >> 4];
+ bytes[0] = nasm_regvals[opx->basereg] << 4;
+ bytes[0] |= c & 15;
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset++;
+ break;
+
+ case4(0174):
+ bytes[0] = nasm_regvals[opx->basereg] << 4;
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset++;
+ break;
+
+ case4(0254):
+ data = opx->offset;
+ if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
+ (int32_t)data != (int64_t)data) {
+ nasm_error(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
+ "signed dword immediate exceeds bounds");
+ }
+ out(offset, segment, &data, OUT_ADDRESS, -4,
+ opx->segment, opx->wrt);
+ offset += 4;
+ break;
+
+ case4(0240):
+ case 0250:
+ codes += 3;
+ ins->evex_p[2] |= op_evexflags(&ins->oprs[0],
+ EVEX_P2Z | EVEX_P2AAA, 2);
+ ins->evex_p[2] ^= EVEX_P2VP; /* 1's complement */
+ bytes[0] = 0x62;
+ /* EVEX.X can be set by either REX or EVEX for different reasons */
+ bytes[1] = ((((ins->rex & 7) << 5) |
+ (ins->evex_p[0] & (EVEX_P0X | EVEX_P0RP))) ^ 0xf0) |
+ (ins->vex_cm & EVEX_P0MM);
+ bytes[2] = ((ins->rex & REX_W) << (7 - 3)) |
+ ((~ins->vexreg & 15) << 3) |
+ (1 << 2) | (ins->vex_wlp & 3);
+ bytes[3] = ins->evex_p[2];
+ out(offset, segment, &bytes, OUT_RAWDATA, 4, NO_SEG, NO_SEG);
+ offset += 4;
+ break;
+
+ case4(0260):
+ case 0270:
+ codes += 2;
+ if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)) ||
+ ins->prefixes[PPS_VEX] == P_VEX3) {
+ bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
+ bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
+ bytes[2] = ((ins->rex & REX_W) << (7-3)) |
+ ((~ins->vexreg & 15)<< 3) | (ins->vex_wlp & 07);
+ out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
+ offset += 3;
+ } else {
+ bytes[0] = 0xc5;
+ bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
+ ((~ins->vexreg & 15) << 3) | (ins->vex_wlp & 07);
+ out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
+ offset += 2;
+ }
+ break;
+
+ case 0271:
+ case 0272:
+ case 0273:
+ break;
+
+ case4(0274):
+ {
+ uint64_t uv, um;
+ int s;
+
+ if (ins->rex & REX_W)
+ s = 64;
+ else if (ins->prefixes[PPS_OSIZE] == P_O16)
+ s = 16;
+ else if (ins->prefixes[PPS_OSIZE] == P_O32)
+ s = 32;
+ else
+ s = bits;
+
+ um = (uint64_t)2 << (s-1);
+ uv = opx->offset;
+
+ if (uv > 127 && uv < (uint64_t)-128 &&
+ (uv < um-128 || uv > um-1)) {
+ /* If this wasn't explicitly byte-sized, warn as though we
+ * had fallen through to the imm16/32/64 case.
+ */
+ nasm_error(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
+ "%s value exceeds bounds",
+ (opx->type & BITS8) ? "signed byte" :
+ s == 16 ? "word" :
+ s == 32 ? "dword" :
+ "signed dword");
+ }
+ if (opx->segment != NO_SEG) {
+ data = uv;
+ out(offset, segment, &data, OUT_ADDRESS, 1,
+ opx->segment, opx->wrt);
+ } else {
+ bytes[0] = uv;
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
+ NO_SEG);
+ }
+ offset += 1;
+ break;
+ }
+
+ case4(0300):
+ break;
+
+ case 0310:
+ if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
+ *bytes = 0x67;
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset += 1;
+ } else
+ offset += 0;
+ break;
+
+ case 0311:
+ if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
+ *bytes = 0x67;
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset += 1;
+ } else
+ offset += 0;
+ break;
+
+ case 0312:
+ break;
+
+ case 0313:
+ ins->rex = 0;
+ break;
+
+ case4(0314):
+ break;
+
+ case 0320:
+ case 0321:
+ break;
+
+ case 0322:
+ case 0323:
+ break;
+
+ case 0324:
+ ins->rex |= REX_W;
+ break;
+
+ case 0325:
+ break;
+
+ case 0326:
+ break;
+
+ case 0330:
+ *bytes = *codes++ ^ get_cond_opcode(ins->condition);
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+
+ case 0331:
+ break;
+
+ case 0332:
+ case 0333:
+ *bytes = c - 0332 + 0xF2;
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+
+ case 0334:
+ if (ins->rex & REX_R) {
+ *bytes = 0xF0;
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset += 1;
+ }
+ ins->rex &= ~(REX_L|REX_R);
+ break;
+
+ case 0335:
+ break;
+
+ case 0336:
+ case 0337:
+ break;
+
+ case 0340:
+ if (ins->oprs[0].segment != NO_SEG)
+ nasm_panic(0, "non-constant BSS size in pass two");
+ else {
+ int64_t size = ins->oprs[0].offset;
+ if (size > 0)
+ out(offset, segment, NULL,
+ OUT_RESERVE, size, NO_SEG, NO_SEG);
+ offset += size;
+ }
+ break;
+
+ case 0341:
+ break;
+
+ case 0360:
+ break;
+
+ case 0361:
+ bytes[0] = 0x66;
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+
+ case 0364:
+ case 0365:
+ break;
+
+ case 0366:
+ case 0367:
+ *bytes = c - 0366 + 0x66;
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+
+ case3(0370):
+ break;
+
+ case 0373:
+ *bytes = bits == 16 ? 3 : 5;
+ out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+
+ case 0374:
+ eat = EA_XMMVSIB;
+ break;
+
+ case 0375:
+ eat = EA_YMMVSIB;
+ break;
+
+ case 0376:
+ eat = EA_ZMMVSIB;
+ break;
+
+ case4(0100):
+ case4(0110):
+ case4(0120):
+ case4(0130):
+ case4(0200):
+ case4(0204):
+ case4(0210):
+ case4(0214):
+ case4(0220):
+ case4(0224):
+ case4(0230):
+ case4(0234):
+ {
+ ea ea_data;
+ int rfield;
+ opflags_t rflags;
+ uint8_t *p;
+ int32_t s;
+ struct operand *opy = &ins->oprs[op2];
+
+ if (c <= 0177) {
+ /* pick rfield from operand b (opx) */
+ rflags = regflag(opx);
+ rfield = nasm_regvals[opx->basereg];
+ } else {
+ /* rfield is constant */
+ rflags = 0;
+ rfield = c & 7;
+ }
+
+ if (process_ea(opy, &ea_data, bits,
+ rfield, rflags, ins) != eat)
+ nasm_error(ERR_NONFATAL, "invalid effective address");
+
+ p = bytes;
+ *p++ = ea_data.modrm;
+ if (ea_data.sib_present)
+ *p++ = ea_data.sib;
+
+ s = p - bytes;
+ out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
+
+ /*
+ * Make sure the address gets the right offset in case
+ * the line breaks in the .lst file (BR 1197827)
+ */
+ offset += s;
+ s = 0;
+
+ if (ea_data.bytes) {
+ /* use compressed displacement, if available */
+ data = ea_data.disp8 ? ea_data.disp8 : opy->offset;
+ s += ea_data.bytes;
+ if (ea_data.rip) {
+ if (opy->segment == segment) {
+ data -= insn_end;
+ if (overflow_signed(data, ea_data.bytes))
+ warn_overflow(ERR_PASS2, ea_data.bytes);
+ out(offset, segment, &data, OUT_ADDRESS,
+ ea_data.bytes, NO_SEG, NO_SEG);
+ } else {
+ /* overflow check in linker? */
+ out(offset, segment, &data, OUT_REL4ADR,
+ insn_end - offset, opy->segment, opy->wrt);
+ }
+ } else {
+ int asize = ins->addr_size >> 3;
+ int atype = ea_data.bytes;
+
+ if (overflow_general(data, asize) ||
+ signed_bits(data, ins->addr_size) !=
+ signed_bits(data, ea_data.bytes << 3))
+ warn_overflow(ERR_PASS2, ea_data.bytes);
+
+ if (asize > ea_data.bytes) {
+ /*
+ * If the address isn't the full width of
+ * the address size, treat is as signed...
+ */
+ atype = -atype;
+ }
+
+ out(offset, segment, &data, OUT_ADDRESS,
+ atype, opy->segment, opy->wrt);
+ }
+ }
+ offset += s;
+ }
+ break;
+
+ default:
+ nasm_panic(0, "internal instruction table corrupt"
+ ": instruction code \\%o (0x%02X) given", c, c);
+ break;
+ }
+ }
+}
+
+static opflags_t regflag(const operand * o)
+{
+ if (!is_register(o->basereg))
+ nasm_panic(0, "invalid operand passed to regflag()");
+ return nasm_reg_flags[o->basereg];
+}
+
+static int32_t regval(const operand * o)
+{
+ if (!is_register(o->basereg))
+ nasm_panic(0, "invalid operand passed to regval()");
+ return nasm_regvals[o->basereg];
+}
+
+static int op_rexflags(const operand * o, int mask)
+{
+ opflags_t flags;
+ int val;
+
+ if (!is_register(o->basereg))
+ nasm_panic(0, "invalid operand passed to op_rexflags()");
+
+ flags = nasm_reg_flags[o->basereg];
+ val = nasm_regvals[o->basereg];
+
+ return rexflags(val, flags, mask);
+}
+
+static int rexflags(int val, opflags_t flags, int mask)
+{
+ int rex = 0;
+
+ if (val >= 0 && (val & 8))
+ rex |= REX_B|REX_X|REX_R;
+ if (flags & BITS64)
+ rex |= REX_W;
+ if (!(REG_HIGH & ~flags)) /* AH, CH, DH, BH */
+ rex |= REX_H;
+ else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
+ rex |= REX_P;
+
+ return rex & mask;
+}
+
+static int evexflags(int val, decoflags_t deco,
+ int mask, uint8_t byte)
+{
+ int evex = 0;
+
+ switch (byte) {
+ case 0:
+ if (val >= 0 && (val & 16))
+ evex |= (EVEX_P0RP | EVEX_P0X);
+ break;
+ case 2:
+ if (val >= 0 && (val & 16))
+ evex |= EVEX_P2VP;
+ if (deco & Z)
+ evex |= EVEX_P2Z;
+ if (deco & OPMASK_MASK)
+ evex |= deco & EVEX_P2AAA;
+ break;
+ }
+ return evex & mask;
+}
+
+static int op_evexflags(const operand * o, int mask, uint8_t byte)
+{
+ int val;
+
+ val = nasm_regvals[o->basereg];
+
+ return evexflags(val, o->decoflags, mask, byte);
+}
+
+static enum match_result find_match(const struct itemplate **tempp,
+ insn *instruction,
+ int32_t segment, int64_t offset, int bits)
+{
+ const struct itemplate *temp;
+ enum match_result m, merr;
+ opflags_t xsizeflags[MAX_OPERANDS];
+ bool opsizemissing = false;
+ int8_t broadcast = instruction->evex_brerop;
+ int i;
+
+ /* broadcasting uses a different data element size */
+ for (i = 0; i < instruction->operands; i++)
+ if (i == broadcast)
+ xsizeflags[i] = instruction->oprs[i].decoflags & BRSIZE_MASK;
+ else
+ xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
+
+ merr = MERR_INVALOP;
+
+ for (temp = nasm_instructions[instruction->opcode];
+ temp->opcode != I_none; temp++) {
+ m = matches(temp, instruction, bits);
+ if (m == MOK_JUMP) {
+ if (jmp_match(segment, offset, bits, instruction, temp))
+ m = MOK_GOOD;
+ else
+ m = MERR_INVALOP;
+ } else if (m == MERR_OPSIZEMISSING && !itemp_has(temp, IF_SX)) {
+ /*
+ * Missing operand size and a candidate for fuzzy matching...
+ */
+ for (i = 0; i < temp->operands; i++)
+ if (i == broadcast)
+ xsizeflags[i] |= temp->deco[i] & BRSIZE_MASK;
+ else
+ xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
+ opsizemissing = true;
+ }
+ if (m > merr)
+ merr = m;
+ if (merr == MOK_GOOD)
+ goto done;
+ }
+
+ /* No match, but see if we can get a fuzzy operand size match... */
+ if (!opsizemissing)
+ goto done;
+
+ for (i = 0; i < instruction->operands; i++) {
+ /*
+ * We ignore extrinsic operand sizes on registers, so we should
+ * never try to fuzzy-match on them. This also resolves the case
+ * when we have e.g. "xmmrm128" in two different positions.
+ */
+ if (is_class(REGISTER, instruction->oprs[i].type))
+ continue;
+
+ /* This tests if xsizeflags[i] has more than one bit set */
+ if ((xsizeflags[i] & (xsizeflags[i]-1)))
+ goto done; /* No luck */
+
+ if (i == broadcast) {
+ instruction->oprs[i].decoflags |= xsizeflags[i];
+ instruction->oprs[i].type |= (xsizeflags[i] == BR_BITS32 ?
+ BITS32 : BITS64);
+ } else {
+ instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
+ }
+ }
+
+ /* Try matching again... */
+ for (temp = nasm_instructions[instruction->opcode];
+ temp->opcode != I_none; temp++) {
+ m = matches(temp, instruction, bits);
+ if (m == MOK_JUMP) {
+ if (jmp_match(segment, offset, bits, instruction, temp))
+ m = MOK_GOOD;
+ else
+ m = MERR_INVALOP;
+ }
+ if (m > merr)
+ merr = m;
+ if (merr == MOK_GOOD)
+ goto done;
+ }
+
+done:
+ *tempp = temp;
+ return merr;
+}
+
+static uint8_t get_broadcast_num(opflags_t opflags, opflags_t brsize)
+{
+ opflags_t opsize = opflags & SIZE_MASK;
+ uint8_t brcast_num;
+
+ /*
+ * Due to discontinuity between BITS64 and BITS128 (BITS80),
+ * this cannot be a simple arithmetic calculation.
+ */
+ if (brsize > BITS64)
+ nasm_error(ERR_FATAL,
+ "size of broadcasting element is greater than 64 bits");
+
+ switch (opsize) {
+ case BITS64:
+ brcast_num = BITS64 / brsize;
+ break;
+ default:
+ brcast_num = (opsize / BITS128) * (BITS64 / brsize) * 2;
+ break;
+ }
+
+ return brcast_num;
+}
+
+static enum match_result matches(const struct itemplate *itemp,
+ insn *instruction, int bits)
+{
+ opflags_t size[MAX_OPERANDS], asize;
+ bool opsizemissing = false;
+ int i, oprs;
+
+ /*
+ * Check the opcode
+ */
+ if (itemp->opcode != instruction->opcode)
+ return MERR_INVALOP;
+
+ /*
+ * Count the operands
+ */
+ if (itemp->operands != instruction->operands)
+ return MERR_INVALOP;
+
+ /*
+ * Is it legal?
+ */
+ if (!(optimizing > 0) && itemp_has(itemp, IF_OPT))
+ return MERR_INVALOP;
+
+ /*
+ * {evex} available?
+ */
+ switch (instruction->prefixes[PPS_VEX]) {
+ case P_EVEX:
+ if (!itemp_has(itemp, IF_EVEX))
+ return MERR_ENCMISMATCH;
+ break;
+ case P_VEX3:
+ case P_VEX2:
+ if (!itemp_has(itemp, IF_VEX))
+ return MERR_ENCMISMATCH;
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Check that no spurious colons or TOs are present
+ */
+ for (i = 0; i < itemp->operands; i++)
+ if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
+ return MERR_INVALOP;
+
+ /*
+ * Process size flags
+ */
+ switch (itemp_smask(itemp)) {
+ case IF_GENBIT(IF_SB):
+ asize = BITS8;
+ break;
+ case IF_GENBIT(IF_SW):
+ asize = BITS16;
+ break;
+ case IF_GENBIT(IF_SD):
+ asize = BITS32;
+ break;
+ case IF_GENBIT(IF_SQ):
+ asize = BITS64;
+ break;
+ case IF_GENBIT(IF_SO):
+ asize = BITS128;
+ break;
+ case IF_GENBIT(IF_SY):
+ asize = BITS256;
+ break;
+ case IF_GENBIT(IF_SZ):
+ asize = BITS512;
+ break;
+ case IF_GENBIT(IF_SIZE):
+ switch (bits) {
+ case 16:
+ asize = BITS16;
+ break;
+ case 32:
+ asize = BITS32;
+ break;
+ case 64:
+ asize = BITS64;
+ break;
+ default:
+ asize = 0;
+ break;
+ }
+ break;
+ default:
+ asize = 0;
+ break;
+ }
+
+ if (itemp_armask(itemp)) {
+ /* S- flags only apply to a specific operand */
+ i = itemp_arg(itemp);
+ memset(size, 0, sizeof size);
+ size[i] = asize;
+ } else {
+ /* S- flags apply to all operands */
+ for (i = 0; i < MAX_OPERANDS; i++)
+ size[i] = asize;
+ }
+
+ /*
+ * Check that the operand flags all match up,
+ * it's a bit tricky so lets be verbose:
+ *
+ * 1) Find out the size of operand. If instruction
+ * doesn't have one specified -- we're trying to
+ * guess it either from template (IF_S* flag) or
+ * from code bits.
+ *
+ * 2) If template operand do not match the instruction OR
+ * template has an operand size specified AND this size differ
+ * from which instruction has (perhaps we got it from code bits)
+ * we are:
+ * a) Check that only size of instruction and operand is differ
+ * other characteristics do match
+ * b) Perhaps it's a register specified in instruction so
+ * for such a case we just mark that operand as "size
+ * missing" and this will turn on fuzzy operand size
+ * logic facility (handled by a caller)
+ */
+ for (i = 0; i < itemp->operands; i++) {
+ opflags_t type = instruction->oprs[i].type;
+ decoflags_t deco = instruction->oprs[i].decoflags;
+ bool is_broadcast = deco & BRDCAST_MASK;
+ uint8_t brcast_num = 0;
+ opflags_t template_opsize, insn_opsize;
+
+ if (!(type & SIZE_MASK))
+ type |= size[i];
+
+ insn_opsize = type & SIZE_MASK;
+ if (!is_broadcast) {
+ template_opsize = itemp->opd[i] & SIZE_MASK;
+ } else {
+ decoflags_t deco_brsize = itemp->deco[i] & BRSIZE_MASK;
+ /*
+ * when broadcasting, the element size depends on
+ * the instruction type. decorator flag should match.
+ */
+
+ if (deco_brsize) {
+ template_opsize = (deco_brsize == BR_BITS32 ? BITS32 : BITS64);
+ /* calculate the proper number : {1to<brcast_num>} */
+ brcast_num = get_broadcast_num(itemp->opd[i], template_opsize);
+ } else {
+ template_opsize = 0;
+ }
+ }
+
+ if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
+ (deco & ~itemp->deco[i] & ~BRNUM_MASK)) {
+ return MERR_INVALOP;
+ } else if (template_opsize) {
+ if (template_opsize != insn_opsize) {
+ if (insn_opsize) {
+ return MERR_INVALOP;
+ } else if (!is_class(REGISTER, type)) {
+ /*
+ * Note: we don't honor extrinsic operand sizes for registers,
+ * so "missing operand size" for a register should be
+ * considered a wildcard match rather than an error.
+ */
+ opsizemissing = true;
+ }
+ } else if (is_broadcast &&
+ (brcast_num !=
+ (2U << ((deco & BRNUM_MASK) >> BRNUM_SHIFT)))) {
+ /*
+ * broadcasting opsize matches but the number of repeated memory
+ * element does not match.
+ * if 64b double precision float is broadcasted to ymm (256b),
+ * broadcasting decorator must be {1to4}.
+ */
+ return MERR_BRNUMMISMATCH;
+ }
+ }
+ }
+
+ if (opsizemissing)
+ return MERR_OPSIZEMISSING;
+
+ /*
+ * Check operand sizes
+ */
+ if (itemp_has(itemp, IF_SM) || itemp_has(itemp, IF_SM2)) {
+ oprs = (itemp_has(itemp, IF_SM2) ? 2 : itemp->operands);
+ for (i = 0; i < oprs; i++) {
+ asize = itemp->opd[i] & SIZE_MASK;
+ if (asize) {
+ for (i = 0; i < oprs; i++)
+ size[i] = asize;
+ break;
+ }
+ }
+ } else {
+ oprs = itemp->operands;
+ }
+
+ for (i = 0; i < itemp->operands; i++) {
+ if (!(itemp->opd[i] & SIZE_MASK) &&
+ (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
+ return MERR_OPSIZEMISMATCH;
+ }
+
+ /*
+ * Check template is okay at the set cpu level
+ */
+ if (iflag_cmp_cpu_level(&insns_flags[itemp->iflag_idx], &cpu) > 0)
+ return MERR_BADCPU;
+
+ /*
+ * Verify the appropriate long mode flag.
+ */
+ if (itemp_has(itemp, (bits == 64 ? IF_NOLONG : IF_LONG)))
+ return MERR_BADMODE;
+
+ /*
+ * If we have a HLE prefix, look for the NOHLE flag
+ */
+ if (itemp_has(itemp, IF_NOHLE) &&
+ (has_prefix(instruction, PPS_REP, P_XACQUIRE) ||
+ has_prefix(instruction, PPS_REP, P_XRELEASE)))
+ return MERR_BADHLE;
+
+ /*
+ * Check if special handling needed for Jumps
+ */
+ if ((itemp->code[0] & ~1) == 0370)
+ return MOK_JUMP;
+
+ /*
+ * Check if BND prefix is allowed.
+ * Other 0xF2 (REPNE/REPNZ) prefix is prohibited.
+ */
+ if (!itemp_has(itemp, IF_BND) &&
+ (has_prefix(instruction, PPS_REP, P_BND) ||
+ has_prefix(instruction, PPS_REP, P_NOBND)))
+ return MERR_BADBND;
+ else if (itemp_has(itemp, IF_BND) &&
+ (has_prefix(instruction, PPS_REP, P_REPNE) ||
+ has_prefix(instruction, PPS_REP, P_REPNZ)))
+ return MERR_BADREPNE;
+
+ return MOK_GOOD;
+}
+
+/*
+ * Check if ModR/M.mod should/can be 01.
+ * - EAF_BYTEOFFS is set
+ * - offset can fit in a byte when EVEX is not used
+ * - offset can be compressed when EVEX is used
+ */
+#define IS_MOD_01() (input->eaflags & EAF_BYTEOFFS || \
+ (o >= -128 && o <= 127 && \
+ seg == NO_SEG && !forw_ref && \
+ !(input->eaflags & EAF_WORDOFFS) && \
+ !(ins->rex & REX_EV)) || \
+ (ins->rex & REX_EV && \
+ is_disp8n(input, ins, &output->disp8)))
+
+static enum ea_type process_ea(operand *input, ea *output, int bits,
+ int rfield, opflags_t rflags, insn *ins)
+{
+ bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
+ int addrbits = ins->addr_size;
+ int eaflags = input->eaflags;
+
+ output->type = EA_SCALAR;
+ output->rip = false;
+ output->disp8 = 0;
+
+ /* REX flags for the rfield operand */
+ output->rex |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
+ /* EVEX.R' flag for the REG operand */
+ ins->evex_p[0] |= evexflags(rfield, 0, EVEX_P0RP, 0);
+
+ if (is_class(REGISTER, input->type)) {
+ /*
+ * It's a direct register.
+ */
+ if (!is_register(input->basereg))
+ goto err;
+
+ if (!is_reg_class(REG_EA, input->basereg))
+ goto err;
+
+ /* broadcasting is not available with a direct register operand. */
+ if (input->decoflags & BRDCAST_MASK) {
+ nasm_error(ERR_NONFATAL, "Broadcasting not allowed from a register");
+ goto err;
+ }
+
+ output->rex |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
+ ins->evex_p[0] |= op_evexflags(input, EVEX_P0X, 0);
+ output->sib_present = false; /* no SIB necessary */
+ output->bytes = 0; /* no offset necessary either */
+ output->modrm = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]);
+ } else {
+ /*
+ * It's a memory reference.
+ */
+
+ /* Embedded rounding or SAE is not available with a mem ref operand. */
+ if (input->decoflags & (ER | SAE)) {
+ nasm_error(ERR_NONFATAL,
+ "Embedded rounding is available only with reg-reg op.");
+ return -1;
+ }
+
+ if (input->basereg == -1 &&
+ (input->indexreg == -1 || input->scale == 0)) {
+ /*
+ * It's a pure offset.
+ */
+ if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
+ input->segment == NO_SEG) {
+ nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
+ input->type &= ~IP_REL;
+ input->type |= MEMORY;
+ }
+
+ if (bits == 64 &&
+ !(IP_REL & ~input->type) && (eaflags & EAF_MIB)) {
+ nasm_error(ERR_NONFATAL, "RIP-relative addressing is prohibited for mib.");
+ return -1;
+ }
+
+ if (eaflags & EAF_BYTEOFFS ||
+ (eaflags & EAF_WORDOFFS &&
+ input->disp_size != (addrbits != 16 ? 32 : 16))) {
+ nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
+ }
+
+ if (bits == 64 && (~input->type & IP_REL)) {
+ output->sib_present = true;
+ output->sib = GEN_SIB(0, 4, 5);
+ output->bytes = 4;
+ output->modrm = GEN_MODRM(0, rfield, 4);
+ output->rip = false;
+ } else {
+ output->sib_present = false;
+ output->bytes = (addrbits != 16 ? 4 : 2);
+ output->modrm = GEN_MODRM(0, rfield, (addrbits != 16 ? 5 : 6));
+ output->rip = bits == 64;
+ }
+ } else {
+ /*
+ * It's an indirection.
+ */
+ int i = input->indexreg, b = input->basereg, s = input->scale;
+ int32_t seg = input->segment;
+ int hb = input->hintbase, ht = input->hinttype;
+ int t, it, bt; /* register numbers */
+ opflags_t x, ix, bx; /* register flags */
+
+ if (s == 0)
+ i = -1; /* make this easy, at least */
+
+ if (is_register(i)) {
+ it = nasm_regvals[i];
+ ix = nasm_reg_flags[i];
+ } else {
+ it = -1;
+ ix = 0;
+ }
+
+ if (is_register(b)) {
+ bt = nasm_regvals[b];
+ bx = nasm_reg_flags[b];
+ } else {
+ bt = -1;
+ bx = 0;
+ }
+
+ /* if either one are a vector register... */
+ if ((ix|bx) & (XMMREG|YMMREG|ZMMREG) & ~REG_EA) {
+ opflags_t sok = BITS32 | BITS64;
+ int32_t o = input->offset;
+ int mod, scale, index, base;
+
+ /*
+ * For a vector SIB, one has to be a vector and the other,
+ * if present, a GPR. The vector must be the index operand.
+ */
+ if (it == -1 || (bx & (XMMREG|YMMREG|ZMMREG) & ~REG_EA)) {
+ if (s == 0)
+ s = 1;
+ else if (s != 1)
+ goto err;
+
+ t = bt, bt = it, it = t;
+ x = bx, bx = ix, ix = x;
+ }
+
+ if (bt != -1) {
+ if (REG_GPR & ~bx)
+ goto err;
+ if (!(REG64 & ~bx) || !(REG32 & ~bx))
+ sok &= bx;
+ else
+ goto err;
+ }
+
+ /*
+ * While we're here, ensure the user didn't specify
+ * WORD or QWORD
+ */
+ if (input->disp_size == 16 || input->disp_size == 64)
+ goto err;
+
+ if (addrbits == 16 ||
+ (addrbits == 32 && !(sok & BITS32)) ||
+ (addrbits == 64 && !(sok & BITS64)))
+ goto err;
+
+ output->type = ((ix & ZMMREG & ~REG_EA) ? EA_ZMMVSIB
+ : ((ix & YMMREG & ~REG_EA)
+ ? EA_YMMVSIB : EA_XMMVSIB));
+
+ output->rex |= rexflags(it, ix, REX_X);
+ output->rex |= rexflags(bt, bx, REX_B);
+ ins->evex_p[2] |= evexflags(it, 0, EVEX_P2VP, 2);
+
+ index = it & 7; /* it is known to be != -1 */
+
+ switch (s) {
+ case 1:
+ scale = 0;
+ break;
+ case 2:
+ scale = 1;
+ break;
+ case 4:
+ scale = 2;
+ break;
+ case 8:
+ scale = 3;
+ break;
+ default: /* then what the smeg is it? */
+ goto err; /* panic */
+ }
+
+ if (bt == -1) {
+ base = 5;
+ mod = 0;
+ } else {
+ base = (bt & 7);
+ if (base != REG_NUM_EBP && o == 0 &&
+ seg == NO_SEG && !forw_ref &&
+ !(eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
+ mod = 0;
+ else if (IS_MOD_01())
+ mod = 1;
+ else
+ mod = 2;
+ }
+
+ output->sib_present = true;
+ output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
+ output->modrm = GEN_MODRM(mod, rfield, 4);
+ output->sib = GEN_SIB(scale, index, base);
+ } else if ((ix|bx) & (BITS32|BITS64)) {
+ /*
+ * it must be a 32/64-bit memory reference. Firstly we have
+ * to check that all registers involved are type E/Rxx.
+ */
+ opflags_t sok = BITS32 | BITS64;
+ int32_t o = input->offset;
+
+ if (it != -1) {
+ if (!(REG64 & ~ix) || !(REG32 & ~ix))
+ sok &= ix;
+ else
+ goto err;
+ }
+
+ if (bt != -1) {
+ if (REG_GPR & ~bx)
+ goto err; /* Invalid register */
+ if (~sok & bx & SIZE_MASK)
+ goto err; /* Invalid size */
+ sok &= bx;
+ }
+
+ /*
+ * While we're here, ensure the user didn't specify
+ * WORD or QWORD
+ */
+ if (input->disp_size == 16 || input->disp_size == 64)
+ goto err;
+
+ if (addrbits == 16 ||
+ (addrbits == 32 && !(sok & BITS32)) ||
+ (addrbits == 64 && !(sok & BITS64)))
+ goto err;
+
+ /* now reorganize base/index */
+ if (s == 1 && bt != it && bt != -1 && it != -1 &&
+ ((hb == b && ht == EAH_NOTBASE) ||
+ (hb == i && ht == EAH_MAKEBASE))) {
+ /* swap if hints say so */
+ t = bt, bt = it, it = t;
+ x = bx, bx = ix, ix = x;
+ }
+
+ if (bt == -1 && s == 1 && !(hb == i && ht == EAH_NOTBASE)) {
+ /* make single reg base, unless hint */
+ bt = it, bx = ix, it = -1, ix = 0;
+ }
+ if (eaflags & EAF_MIB) {
+ /* only for mib operands */
+ if (it == -1 && (hb == b && ht == EAH_NOTBASE)) {
+ /*
+ * make a single reg index [reg*1].
+ * gas uses this form for an explicit index register.
+ */
+ it = bt, ix = bx, bt = -1, bx = 0, s = 1;
+ }
+ if ((ht == EAH_SUMMED) && bt == -1) {
+ /* separate once summed index into [base, index] */
+ bt = it, bx = ix, s--;
+ }
+ } else {
+ if (((s == 2 && it != REG_NUM_ESP &&
+ (!(eaflags & EAF_TIMESTWO) || (ht == EAH_SUMMED))) ||
+ s == 3 || s == 5 || s == 9) && bt == -1) {
+ /* convert 3*EAX to EAX+2*EAX */
+ bt = it, bx = ix, s--;
+ }
+ if (it == -1 && (bt & 7) != REG_NUM_ESP &&
+ (eaflags & EAF_TIMESTWO) &&
+ (hb == b && ht == EAH_NOTBASE)) {
+ /*
+ * convert [NOSPLIT EAX*1]
+ * to sib format with 0x0 displacement - [EAX*1+0].
+ */
+ it = bt, ix = bx, bt = -1, bx = 0, s = 1;
+ }
+ }
+ if (s == 1 && it == REG_NUM_ESP) {
+ /* swap ESP into base if scale is 1 */
+ t = it, it = bt, bt = t;
+ x = ix, ix = bx, bx = x;
+ }
+ if (it == REG_NUM_ESP ||
+ (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
+ goto err; /* wrong, for various reasons */
+
+ output->rex |= rexflags(it, ix, REX_X);
+ output->rex |= rexflags(bt, bx, REX_B);
+
+ if (it == -1 && (bt & 7) != REG_NUM_ESP) {
+ /* no SIB needed */
+ int mod, rm;
+
+ if (bt == -1) {
+ rm = 5;
+ mod = 0;
+ } else {
+ rm = (bt & 7);
+ if (rm != REG_NUM_EBP && o == 0 &&
+ seg == NO_SEG && !forw_ref &&
+ !(eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
+ mod = 0;
+ else if (IS_MOD_01())
+ mod = 1;
+ else
+ mod = 2;
+ }
+
+ output->sib_present = false;
+ output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
+ output->modrm = GEN_MODRM(mod, rfield, rm);
+ } else {
+ /* we need a SIB */
+ int mod, scale, index, base;
+
+ if (it == -1)
+ index = 4, s = 1;
+ else
+ index = (it & 7);
+
+ switch (s) {
+ case 1:
+ scale = 0;
+ break;
+ case 2:
+ scale = 1;
+ break;
+ case 4:
+ scale = 2;
+ break;
+ case 8:
+ scale = 3;
+ break;
+ default: /* then what the smeg is it? */
+ goto err; /* panic */
+ }
+
+ if (bt == -1) {
+ base = 5;
+ mod = 0;
+ } else {
+ base = (bt & 7);
+ if (base != REG_NUM_EBP && o == 0 &&
+ seg == NO_SEG && !forw_ref &&
+ !(eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
+ mod = 0;
+ else if (IS_MOD_01())
+ mod = 1;
+ else
+ mod = 2;
+ }
+
+ output->sib_present = true;
+ output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
+ output->modrm = GEN_MODRM(mod, rfield, 4);
+ output->sib = GEN_SIB(scale, index, base);
+ }
+ } else { /* it's 16-bit */
+ int mod, rm;
+ int16_t o = input->offset;
+
+ /* check for 64-bit long mode */
+ if (addrbits == 64)
+ goto err;
+
+ /* check all registers are BX, BP, SI or DI */
+ if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
+ (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
+ goto err;
+
+ /* ensure the user didn't specify DWORD/QWORD */
+ if (input->disp_size == 32 || input->disp_size == 64)
+ goto err;
+
+ if (s != 1 && i != -1)
+ goto err; /* no can do, in 16-bit EA */
+ if (b == -1 && i != -1) {
+ int tmp = b;
+ b = i;
+ i = tmp;
+ } /* swap */
+ if ((b == R_SI || b == R_DI) && i != -1) {
+ int tmp = b;
+ b = i;
+ i = tmp;
+ }
+ /* have BX/BP as base, SI/DI index */
+ if (b == i)
+ goto err; /* shouldn't ever happen, in theory */
+ if (i != -1 && b != -1 &&
+ (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
+ goto err; /* invalid combinations */
+ if (b == -1) /* pure offset: handled above */
+ goto err; /* so if it gets to here, panic! */
+
+ rm = -1;
+ if (i != -1)
+ switch (i * 256 + b) {
+ case R_SI * 256 + R_BX:
+ rm = 0;
+ break;
+ case R_DI * 256 + R_BX:
+ rm = 1;
+ break;
+ case R_SI * 256 + R_BP:
+ rm = 2;
+ break;
+ case R_DI * 256 + R_BP:
+ rm = 3;
+ break;
+ } else
+ switch (b) {
+ case R_SI:
+ rm = 4;
+ break;
+ case R_DI:
+ rm = 5;
+ break;
+ case R_BP:
+ rm = 6;
+ break;
+ case R_BX:
+ rm = 7;
+ break;
+ }
+ if (rm == -1) /* can't happen, in theory */
+ goto err; /* so panic if it does */
+
+ if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
+ !(eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
+ mod = 0;
+ else if (IS_MOD_01())
+ mod = 1;
+ else
+ mod = 2;
+
+ output->sib_present = false; /* no SIB - it's 16-bit */
+ output->bytes = mod; /* bytes of offset needed */
+ output->modrm = GEN_MODRM(mod, rfield, rm);
+ }
+ }
+ }
+
+ output->size = 1 + output->sib_present + output->bytes;
+ return output->type;
+
+err:
+ return output->type = EA_INVALID;
+}
+
+static void add_asp(insn *ins, int addrbits)
+{
+ int j, valid;
+ int defdisp;
+
+ valid = (addrbits == 64) ? 64|32 : 32|16;
+
+ switch (ins->prefixes[PPS_ASIZE]) {
+ case P_A16:
+ valid &= 16;
+ break;
+ case P_A32:
+ valid &= 32;
+ break;
+ case P_A64:
+ valid &= 64;
+ break;
+ case P_ASP:
+ valid &= (addrbits == 32) ? 16 : 32;
+ break;
+ default:
+ break;
+ }
+
+ for (j = 0; j < ins->operands; j++) {
+ if (is_class(MEMORY, ins->oprs[j].type)) {
+ opflags_t i, b;
+
+ /* Verify as Register */
+ if (!is_register(ins->oprs[j].indexreg))
+ i = 0;
+ else
+ i = nasm_reg_flags[ins->oprs[j].indexreg];
+
+ /* Verify as Register */
+ if (!is_register(ins->oprs[j].basereg))
+ b = 0;
+ else
+ b = nasm_reg_flags[ins->oprs[j].basereg];
+
+ if (ins->oprs[j].scale == 0)
+ i = 0;
+
+ if (!i && !b) {
+ int ds = ins->oprs[j].disp_size;
+ if ((addrbits != 64 && ds > 8) ||
+ (addrbits == 64 && ds == 16))
+ valid &= ds;
+ } else {
+ if (!(REG16 & ~b))
+ valid &= 16;
+ if (!(REG32 & ~b))
+ valid &= 32;
+ if (!(REG64 & ~b))
+ valid &= 64;
+
+ if (!(REG16 & ~i))
+ valid &= 16;
+ if (!(REG32 & ~i))
+ valid &= 32;
+ if (!(REG64 & ~i))
+ valid &= 64;
+ }
+ }
+ }
+
+ if (valid & addrbits) {
+ ins->addr_size = addrbits;
+ } else if (valid & ((addrbits == 32) ? 16 : 32)) {
+ /* Add an address size prefix */
+ ins->prefixes[PPS_ASIZE] = (addrbits == 32) ? P_A16 : P_A32;;
+ ins->addr_size = (addrbits == 32) ? 16 : 32;
+ } else {
+ /* Impossible... */
+ nasm_error(ERR_NONFATAL, "impossible combination of address sizes");
+ ins->addr_size = addrbits; /* Error recovery */
+ }
+
+ defdisp = ins->addr_size == 16 ? 16 : 32;
+
+ for (j = 0; j < ins->operands; j++) {
+ if (!(MEM_OFFS & ~ins->oprs[j].type) &&
+ (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
+ /*
+ * mem_offs sizes must match the address size; if not,
+ * strip the MEM_OFFS bit and match only EA instructions
+ */
+ ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
+ }
+ }
+}
diff --git a/asm/assemble.h b/asm/assemble.h
new file mode 100644
index 00000000..f85037ad
--- /dev/null
+++ b/asm/assemble.h
@@ -0,0 +1,47 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * assemble.h header file for assemble.c
+ */
+
+#ifndef NASM_ASSEMBLE_H
+#define NASM_ASSEMBLE_H
+
+#include "iflag.h"
+
+int64_t insn_size(int32_t segment, int64_t offset, int bits, iflag_t cp,
+ insn * instruction);
+int64_t assemble(int32_t segment, int64_t offset, int bits, iflag_t cp,
+ insn * instruction);
+#endif
diff --git a/asm/directiv.dat b/asm/directiv.dat
new file mode 100644
index 00000000..a8d9261c
--- /dev/null
+++ b/asm/directiv.dat
@@ -0,0 +1,63 @@
+;; --------------------------------------------------------------------------
+;;
+;; Copyright 1996-2009 The NASM Authors - All Rights Reserved
+;; See the file AUTHORS included with the NASM distribution for
+;; the specific copyright holders.
+;;
+;; Redistribution and use in source and binary forms, with or without
+;; modification, are permitted provided that the following
+;; conditions are met:
+;;
+;; * Redistributions of source code must retain the above copyright
+;; notice, this list of conditions and the following disclaimer.
+;; * Redistributions in binary form must reproduce the above
+;; copyright notice, this list of conditions and the following
+;; disclaimer in the documentation and/or other materials provided
+;; with the distribution.
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+;; CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+;; INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+;; MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+;; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+;; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+;; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+;; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+;; EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;
+;; --------------------------------------------------------------------------
+;;
+;; List of global NASM directives (including backend-specific ones)
+;;
+
+; --- Global directives
+absolute
+bits
+common
+cpu
+debug
+default
+extern
+float
+global
+list
+section
+segment
+warning
+sectalign
+
+; --- Format-specific directives
+export ; outcoff, outobj
+group ; outobj
+import ; outobj
+library ; outrdf2
+map ; outbin
+module ; outrdf2
+org ; outbin
+osabi ; outelf
+safeseh ; outcoff
+uppercase ; outieee, outobj
diff --git a/asm/directiv.pl b/asm/directiv.pl
new file mode 100755
index 00000000..b567aff9
--- /dev/null
+++ b/asm/directiv.pl
@@ -0,0 +1,183 @@
+#!/usr/bin/perl
+## --------------------------------------------------------------------------
+##
+## Copyright 1996-2009 The NASM Authors - All Rights Reserved
+## See the file AUTHORS included with the NASM distribution for
+## the specific copyright holders.
+##
+## Redistribution and use in source and binary forms, with or without
+## modification, are permitted provided that the following
+## conditions are met:
+##
+## * Redistributions of source code must retain the above copyright
+## notice, this list of conditions and the following disclaimer.
+## * Redistributions in binary form must reproduce the above
+## copyright notice, this list of conditions and the following
+## disclaimer in the documentation and/or other materials provided
+## with the distribution.
+##
+## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+##
+## --------------------------------------------------------------------------
+
+#
+# Generate a perfect hash for directive parsing
+#
+# Usage:
+# directiv.pl h directiv.dat directiv.h (to generate C header)
+# directiv.pl c directiv.dat directiv.c (to generate C source)
+#
+
+require 'phash.ph';
+
+my($output, $directives_dat, $outfile) = @ARGV;
+
+@directives = ();
+@specials = ('none', 'unknown');
+
+open(DD, "< ${directives_dat}\0")
+ or die "$0: cannot open: ${directives_dat}: $!\n";
+while (defined($line = <DD>)) {
+ chomp $line;
+ if ($line =~ /^\s*([[:alnum:]]+)\s*(|[\;\#].*)$/) {
+ push(@directives, $1);
+ }
+}
+close(DD);
+
+if ($output eq 'h') {
+ open(H, "> ${outfile}\0")
+ or die "$0: cannot create: ${outfile}: $!\n";
+
+ print H "/*\n";
+ print H " * This file is generated from directiv.dat\n";
+ print H " * by directiv.pl; do not edit.\n";
+ print H " */\n";
+ print H "\n";
+
+ print H "#ifndef NASM_DIRECTIVES_H\n";
+ print H "#define NASM_DIRECTIVES_H\n";
+ print H "\n";
+
+ $c = '{';
+ print H "enum directives ";
+ foreach $d (@specials) {
+ print H "$c\n D_$d";
+ $c = ',';
+ }
+ foreach $d (@directives) {
+ print H "$c\n D_\U$d";
+ $c = ',';
+ }
+ print H "\n};\n\n";
+ printf H "extern const char * const directives[%d];\n",
+ scalar(@directives)+scalar(@specials);
+ print H "enum directives find_directive(const char *token);\n\n";
+ print H "#endif /* NASM_DIRECTIVES_H */\n";
+} elsif ($output eq 'c') {
+ %directive = ();
+ $n = 0;
+ foreach $d (@directives) {
+ if (exists($directive{$d})) {
+ die "$0: $directives_dat: duplicate directive: $d\n";
+ }
+ $directive{$d} = $n++; # This is zero-based, unlike the enum!
+ }
+
+ @hashinfo = gen_perfect_hash(\%directive);
+ if (!@hashinfo) {
+ die "$0: no hash found\n";
+ }
+
+ # Paranoia...
+ verify_hash_table(\%directive, \@hashinfo);
+
+ ($n, $sv, $g) = @hashinfo;
+
+ die if ($n & ($n-1));
+
+ open(C, "> ${outfile}\0")
+ or die "$0: cannot create: ${directives_c}: $!\n";
+
+ print C "/*\n";
+ print C " * This file is generated from directiv.dat\n";
+ print C " * by directiv.pl; do not edit.\n";
+ print C " */\n";
+ print C "\n";
+
+ print C "#include \"compiler.h\"\n";
+ print C "#include <string.h>\n";
+ print C "#include \"nasm.h\"\n";
+ print C "#include \"hashtbl.h\"\n";
+ print C "#include \"directiv.h\"\n";
+ print C "\n";
+
+ printf C "const char * const directives[%d] =\n",
+ scalar(@directives)+scalar(@specials);
+ $c = '{';
+ foreach $d (@specials) {
+ print C "$c\n NULL";
+ $c = ',';
+ }
+ foreach $d (@directives) {
+ print C "$c\n \"$d\"";
+ $c = ',';
+ }
+ print C "\n};\n\n";
+
+ print C "enum directives find_directive(const char *token)\n";
+ print C "{\n";
+
+ # Put a large value in unused slots. This makes it extremely unlikely
+ # that any combination that involves unused slot will pass the range test.
+ # This speeds up rejection of unrecognized tokens, i.e. identifiers.
+ print C "#define UNUSED (65535/3)\n";
+
+ print C " static const int16_t hash1[$n] = {\n";
+ for ($i = 0; $i < $n; $i++) {
+ my $h = ${$g}[$i*2+0];
+ print C " ", defined($h) ? $h : 'UNUSED', ",\n";
+ }
+ print C " };\n";
+
+ print C " static const int16_t hash2[$n] = {\n";
+ for ($i = 0; $i < $n; $i++) {
+ my $h = ${$g}[$i*2+1];
+ print C " ", defined($h) ? $h : 'UNUSED', ",\n";
+ }
+ print C " };\n";
+
+ print C " uint32_t k1, k2;\n";
+ print C " uint64_t crc;\n";
+ # For correct overflow behavior, "ix" should be unsigned of the same
+ # width as the hash arrays.
+ print C " uint16_t ix;\n";
+ print C "\n";
+ printf C " crc = crc64i(UINT64_C(0x%08x%08x), token);\n",
+ $$sv[0], $$sv[1];
+ print C " k1 = (uint32_t)crc;\n";
+ print C " k2 = (uint32_t)(crc >> 32);\n";
+ print C "\n";
+ printf C " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
+ printf C " if (ix >= %d)\n", scalar(@directives);
+ print C " return D_unknown;\n";
+ print C "\n";
+ printf C " ix += %d;\n", scalar(@specials);
+ print C " if (nasm_stricmp(token, directives[ix]))\n";
+ print C " return D_unknown;\n";
+ print C "\n";
+ print C " return ix;\n";
+ print C "}\n";
+}
diff --git a/asm/eval.c b/asm/eval.c
new file mode 100644
index 00000000..029d8a04
--- /dev/null
+++ b/asm/eval.c
@@ -0,0 +1,1015 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * eval.c expression evaluator for the Netwide Assembler
+ */
+
+#include "compiler.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "eval.h"
+#include "labels.h"
+#include "float.h"
+
+#define TEMPEXPRS_DELTA 128
+#define TEMPEXPR_DELTA 8
+
+static scanner scan; /* Address of scanner routine */
+
+static expr **tempexprs = NULL;
+static int ntempexprs;
+static int tempexprs_size = 0;
+
+static expr *tempexpr;
+static int ntempexpr;
+static int tempexpr_size;
+
+static struct tokenval *tokval; /* The current token */
+static int i; /* The t_type of tokval */
+
+static void *scpriv;
+static int *opflags;
+
+static struct eval_hints *hint;
+
+extern int in_abs_seg; /* ABSOLUTE segment flag */
+extern int32_t abs_seg; /* ABSOLUTE segment */
+extern int32_t abs_offset; /* ABSOLUTE segment offset */
+
+/*
+ * Unimportant cleanup is done to avoid confusing people who are trying
+ * to debug real memory leaks
+ */
+void eval_cleanup(void)
+{
+ while (ntempexprs)
+ nasm_free(tempexprs[--ntempexprs]);
+ nasm_free(tempexprs);
+}
+
+/*
+ * Construct a temporary expression.
+ */
+static void begintemp(void)
+{
+ tempexpr = NULL;
+ tempexpr_size = ntempexpr = 0;
+}
+
+static void addtotemp(int32_t type, int64_t value)
+{
+ while (ntempexpr >= tempexpr_size) {
+ tempexpr_size += TEMPEXPR_DELTA;
+ tempexpr = nasm_realloc(tempexpr,
+ tempexpr_size * sizeof(*tempexpr));
+ }
+ tempexpr[ntempexpr].type = type;
+ tempexpr[ntempexpr++].value = value;
+}
+
+static expr *finishtemp(void)
+{
+ addtotemp(0L, 0L); /* terminate */
+ while (ntempexprs >= tempexprs_size) {
+ tempexprs_size += TEMPEXPRS_DELTA;
+ tempexprs = nasm_realloc(tempexprs,
+ tempexprs_size * sizeof(*tempexprs));
+ }
+ return tempexprs[ntempexprs++] = tempexpr;
+}
+
+/*
+ * Add two vector datatypes. We have some bizarre behaviour on far-
+ * absolute segment types: we preserve them during addition _only_
+ * if one of the segments is a truly pure scalar.
+ */
+static expr *add_vectors(expr * p, expr * q)
+{
+ int preserve;
+
+ preserve = is_really_simple(p) || is_really_simple(q);
+
+ begintemp();
+
+ while (p->type && q->type &&
+ p->type < EXPR_SEGBASE + SEG_ABS &&
+ q->type < EXPR_SEGBASE + SEG_ABS) {
+ int lasttype;
+
+ if (p->type > q->type) {
+ addtotemp(q->type, q->value);
+ lasttype = q++->type;
+ } else if (p->type < q->type) {
+ addtotemp(p->type, p->value);
+ lasttype = p++->type;
+ } else { /* *p and *q have same type */
+ int64_t sum = p->value + q->value;
+ if (sum) {
+ addtotemp(p->type, sum);
+ if (hint)
+ hint->type = EAH_SUMMED;
+ }
+ lasttype = p->type;
+ p++, q++;
+ }
+ if (lasttype == EXPR_UNKNOWN) {
+ return finishtemp();
+ }
+ }
+ while (p->type && (preserve || p->type < EXPR_SEGBASE + SEG_ABS)) {
+ addtotemp(p->type, p->value);
+ p++;
+ }
+ while (q->type && (preserve || q->type < EXPR_SEGBASE + SEG_ABS)) {
+ addtotemp(q->type, q->value);
+ q++;
+ }
+
+ return finishtemp();
+}
+
+/*
+ * Multiply a vector by a scalar. Strip far-absolute segment part
+ * if present.
+ *
+ * Explicit treatment of UNKNOWN is not required in this routine,
+ * since it will silently do the Right Thing anyway.
+ *
+ * If `affect_hints' is set, we also change the hint type to
+ * NOTBASE if a MAKEBASE hint points at a register being
+ * multiplied. This allows [eax*1+ebx] to hint EBX rather than EAX
+ * as the base register.
+ */
+static expr *scalar_mult(expr * vect, int64_t scalar, int affect_hints)
+{
+ expr *p = vect;
+
+ while (p->type && p->type < EXPR_SEGBASE + SEG_ABS) {
+ p->value = scalar * (p->value);
+ if (hint && hint->type == EAH_MAKEBASE &&
+ p->type == hint->base && affect_hints)
+ hint->type = EAH_NOTBASE;
+ p++;
+ }
+ p->type = 0;
+
+ return vect;
+}
+
+static expr *scalarvect(int64_t scalar)
+{
+ begintemp();
+ addtotemp(EXPR_SIMPLE, scalar);
+ return finishtemp();
+}
+
+static expr *unknown_expr(void)
+{
+ begintemp();
+ addtotemp(EXPR_UNKNOWN, 1L);
+ return finishtemp();
+}
+
+/*
+ * The SEG operator: calculate the segment part of a relocatable
+ * value. Return NULL, as usual, if an error occurs. Report the
+ * error too.
+ */
+static expr *segment_part(expr * e)
+{
+ int32_t seg;
+
+ if (is_unknown(e))
+ return unknown_expr();
+
+ if (!is_reloc(e)) {
+ nasm_error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
+ return NULL;
+ }
+
+ seg = reloc_seg(e);
+ if (seg == NO_SEG) {
+ nasm_error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
+ return NULL;
+ } else if (seg & SEG_ABS) {
+ return scalarvect(seg & ~SEG_ABS);
+ } else if (seg & 1) {
+ nasm_error(ERR_NONFATAL, "SEG applied to something which"
+ " is already a segment base");
+ return NULL;
+ } else {
+ int32_t base = ofmt->segbase(seg + 1);
+
+ begintemp();
+ addtotemp((base == NO_SEG ? EXPR_UNKNOWN : EXPR_SEGBASE + base),
+ 1L);
+ return finishtemp();
+ }
+}
+
+/*
+ * Recursive-descent parser. Called with a single boolean operand,
+ * which is true if the evaluation is critical (i.e. unresolved
+ * symbols are an error condition). Must update the global `i' to
+ * reflect the token after the parsed string. May return NULL.
+ *
+ * evaluate() should report its own errors: on return it is assumed
+ * that if NULL has been returned, the error has already been
+ * reported.
+ */
+
+/*
+ * Grammar parsed is:
+ *
+ * expr : bexpr [ WRT expr6 ]
+ * bexpr : rexp0 or expr0 depending on relative-mode setting
+ * rexp0 : rexp1 [ {||} rexp1...]
+ * rexp1 : rexp2 [ {^^} rexp2...]
+ * rexp2 : rexp3 [ {&&} rexp3...]
+ * rexp3 : expr0 [ {=,==,<>,!=,<,>,<=,>=} expr0 ]
+ * expr0 : expr1 [ {|} expr1...]
+ * expr1 : expr2 [ {^} expr2...]
+ * expr2 : expr3 [ {&} expr3...]
+ * expr3 : expr4 [ {<<,>>} expr4...]
+ * expr4 : expr5 [ {+,-} expr5...]
+ * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
+ * expr6 : { ~,+,-,IFUNC,SEG } expr6
+ * | (bexpr)
+ * | symbol
+ * | $
+ * | number
+ */
+
+static expr *rexp0(int), *rexp1(int), *rexp2(int), *rexp3(int);
+
+static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int);
+static expr *expr4(int), *expr5(int), *expr6(int);
+
+static expr *(*bexpr) (int);
+
+static expr *rexp0(int critical)
+{
+ expr *e, *f;
+
+ e = rexp1(critical);
+ if (!e)
+ return NULL;
+
+ while (i == TOKEN_DBL_OR) {
+ i = scan(scpriv, tokval);
+ f = rexp1(critical);
+ if (!f)
+ return NULL;
+ if (!(is_simple(e) || is_just_unknown(e)) ||
+ !(is_simple(f) || is_just_unknown(f))) {
+ nasm_error(ERR_NONFATAL, "`|' operator may only be applied to"
+ " scalar values");
+ }
+
+ if (is_just_unknown(e) || is_just_unknown(f))
+ e = unknown_expr();
+ else
+ e = scalarvect((int64_t)(reloc_value(e) || reloc_value(f)));
+ }
+ return e;
+}
+
+static expr *rexp1(int critical)
+{
+ expr *e, *f;
+
+ e = rexp2(critical);
+ if (!e)
+ return NULL;
+
+ while (i == TOKEN_DBL_XOR) {
+ i = scan(scpriv, tokval);
+ f = rexp2(critical);
+ if (!f)
+ return NULL;
+ if (!(is_simple(e) || is_just_unknown(e)) ||
+ !(is_simple(f) || is_just_unknown(f))) {
+ nasm_error(ERR_NONFATAL, "`^' operator may only be applied to"
+ " scalar values");
+ }
+
+ if (is_just_unknown(e) || is_just_unknown(f))
+ e = unknown_expr();
+ else
+ e = scalarvect((int64_t)(!reloc_value(e) ^ !reloc_value(f)));
+ }
+ return e;
+}
+
+static expr *rexp2(int critical)
+{
+ expr *e, *f;
+
+ e = rexp3(critical);
+ if (!e)
+ return NULL;
+ while (i == TOKEN_DBL_AND) {
+ i = scan(scpriv, tokval);
+ f = rexp3(critical);
+ if (!f)
+ return NULL;
+ if (!(is_simple(e) || is_just_unknown(e)) ||
+ !(is_simple(f) || is_just_unknown(f))) {
+ nasm_error(ERR_NONFATAL, "`&' operator may only be applied to"
+ " scalar values");
+ }
+ if (is_just_unknown(e) || is_just_unknown(f))
+ e = unknown_expr();
+ else
+ e = scalarvect((int64_t)(reloc_value(e) && reloc_value(f)));
+ }
+ return e;
+}
+
+static expr *rexp3(int critical)
+{
+ expr *e, *f;
+ int64_t v;
+
+ e = expr0(critical);
+ if (!e)
+ return NULL;
+
+ while (i == TOKEN_EQ || i == TOKEN_LT || i == TOKEN_GT ||
+ i == TOKEN_NE || i == TOKEN_LE || i == TOKEN_GE) {
+ int j = i;
+ i = scan(scpriv, tokval);
+ f = expr0(critical);
+ if (!f)
+ return NULL;
+
+ e = add_vectors(e, scalar_mult(f, -1L, false));
+
+ switch (j) {
+ case TOKEN_EQ:
+ case TOKEN_NE:
+ if (is_unknown(e))
+ v = -1; /* means unknown */
+ else if (!is_really_simple(e) || reloc_value(e) != 0)
+ v = (j == TOKEN_NE); /* unequal, so return true if NE */
+ else
+ v = (j == TOKEN_EQ); /* equal, so return true if EQ */
+ break;
+ default:
+ if (is_unknown(e))
+ v = -1; /* means unknown */
+ else if (!is_really_simple(e)) {
+ nasm_error(ERR_NONFATAL,
+ "`%s': operands differ by a non-scalar",
+ (j == TOKEN_LE ? "<=" : j == TOKEN_LT ? "<" : j ==
+ TOKEN_GE ? ">=" : ">"));
+ v = 0; /* must set it to _something_ */
+ } else {
+ int64_t vv = reloc_value(e);
+ if (vv == 0)
+ v = (j == TOKEN_LE || j == TOKEN_GE);
+ else if (vv > 0)
+ v = (j == TOKEN_GE || j == TOKEN_GT);
+ else /* vv < 0 */
+ v = (j == TOKEN_LE || j == TOKEN_LT);
+ }
+ break;
+ }
+
+ if (v == -1)
+ e = unknown_expr();
+ else
+ e = scalarvect(v);
+ }
+ return e;
+}
+
+static expr *expr0(int critical)
+{
+ expr *e, *f;
+
+ e = expr1(critical);
+ if (!e)
+ return NULL;
+
+ while (i == '|') {
+ i = scan(scpriv, tokval);
+ f = expr1(critical);
+ if (!f)
+ return NULL;
+ if (!(is_simple(e) || is_just_unknown(e)) ||
+ !(is_simple(f) || is_just_unknown(f))) {
+ nasm_error(ERR_NONFATAL, "`|' operator may only be applied to"
+ " scalar values");
+ }
+ if (is_just_unknown(e) || is_just_unknown(f))
+ e = unknown_expr();
+ else
+ e = scalarvect(reloc_value(e) | reloc_value(f));
+ }
+ return e;
+}
+
+static expr *expr1(int critical)
+{
+ expr *e, *f;
+
+ e = expr2(critical);
+ if (!e)
+ return NULL;
+
+ while (i == '^') {
+ i = scan(scpriv, tokval);
+ f = expr2(critical);
+ if (!f)
+ return NULL;
+ if (!(is_simple(e) || is_just_unknown(e)) ||
+ !(is_simple(f) || is_just_unknown(f))) {
+ nasm_error(ERR_NONFATAL, "`^' operator may only be applied to"
+ " scalar values");
+ }
+ if (is_just_unknown(e) || is_just_unknown(f))
+ e = unknown_expr();
+ else
+ e = scalarvect(reloc_value(e) ^ reloc_value(f));
+ }
+ return e;
+}
+
+static expr *expr2(int critical)
+{
+ expr *e, *f;
+
+ e = expr3(critical);
+ if (!e)
+ return NULL;
+
+ while (i == '&') {
+ i = scan(scpriv, tokval);
+ f = expr3(critical);
+ if (!f)
+ return NULL;
+ if (!(is_simple(e) || is_just_unknown(e)) ||
+ !(is_simple(f) || is_just_unknown(f))) {
+ nasm_error(ERR_NONFATAL, "`&' operator may only be applied to"
+ " scalar values");
+ }
+ if (is_just_unknown(e) || is_just_unknown(f))
+ e = unknown_expr();
+ else
+ e = scalarvect(reloc_value(e) & reloc_value(f));
+ }
+ return e;
+}
+
+static expr *expr3(int critical)
+{
+ expr *e, *f;
+
+ e = expr4(critical);
+ if (!e)
+ return NULL;
+
+ while (i == TOKEN_SHL || i == TOKEN_SHR) {
+ int j = i;
+ i = scan(scpriv, tokval);
+ f = expr4(critical);
+ if (!f)
+ return NULL;
+ if (!(is_simple(e) || is_just_unknown(e)) ||
+ !(is_simple(f) || is_just_unknown(f))) {
+ nasm_error(ERR_NONFATAL, "shift operator may only be applied to"
+ " scalar values");
+ } else if (is_just_unknown(e) || is_just_unknown(f)) {
+ e = unknown_expr();
+ } else
+ switch (j) {
+ case TOKEN_SHL:
+ e = scalarvect(reloc_value(e) << reloc_value(f));
+ break;
+ case TOKEN_SHR:
+ e = scalarvect(((uint64_t)reloc_value(e)) >>
+ reloc_value(f));
+ break;
+ }
+ }
+ return e;
+}
+
+static expr *expr4(int critical)
+{
+ expr *e, *f;
+
+ e = expr5(critical);
+ if (!e)
+ return NULL;
+ while (i == '+' || i == '-') {
+ int j = i;
+ i = scan(scpriv, tokval);
+ f = expr5(critical);
+ if (!f)
+ return NULL;
+ switch (j) {
+ case '+':
+ e = add_vectors(e, f);
+ break;
+ case '-':
+ e = add_vectors(e, scalar_mult(f, -1L, false));
+ break;
+ }
+ }
+ return e;
+}
+
+static expr *expr5(int critical)
+{
+ expr *e, *f;
+
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ while (i == '*' || i == '/' || i == '%' ||
+ i == TOKEN_SDIV || i == TOKEN_SMOD) {
+ int j = i;
+ i = scan(scpriv, tokval);
+ f = expr6(critical);
+ if (!f)
+ return NULL;
+ if (j != '*' && (!(is_simple(e) || is_just_unknown(e)) ||
+ !(is_simple(f) || is_just_unknown(f)))) {
+ nasm_error(ERR_NONFATAL, "division operator may only be applied to"
+ " scalar values");
+ return NULL;
+ }
+ if (j != '*' && !is_unknown(f) && reloc_value(f) == 0) {
+ nasm_error(ERR_NONFATAL, "division by zero");
+ return NULL;
+ }
+ switch (j) {
+ case '*':
+ if (is_simple(e))
+ e = scalar_mult(f, reloc_value(e), true);
+ else if (is_simple(f))
+ e = scalar_mult(e, reloc_value(f), true);
+ else if (is_just_unknown(e) && is_just_unknown(f))
+ e = unknown_expr();
+ else {
+ nasm_error(ERR_NONFATAL, "unable to multiply two "
+ "non-scalar objects");
+ return NULL;
+ }
+ break;
+ case '/':
+ if (is_just_unknown(e) || is_just_unknown(f))
+ e = unknown_expr();
+ else
+ e = scalarvect(((uint64_t)reloc_value(e)) /
+ ((uint64_t)reloc_value(f)));
+ break;
+ case '%':
+ if (is_just_unknown(e) || is_just_unknown(f))
+ e = unknown_expr();
+ else
+ e = scalarvect(((uint64_t)reloc_value(e)) %
+ ((uint64_t)reloc_value(f)));
+ break;
+ case TOKEN_SDIV:
+ if (is_just_unknown(e) || is_just_unknown(f))
+ e = unknown_expr();
+ else
+ e = scalarvect(((int64_t)reloc_value(e)) /
+ ((int64_t)reloc_value(f)));
+ break;
+ case TOKEN_SMOD:
+ if (is_just_unknown(e) || is_just_unknown(f))
+ e = unknown_expr();
+ else
+ e = scalarvect(((int64_t)reloc_value(e)) %
+ ((int64_t)reloc_value(f)));
+ break;
+ }
+ }
+ return e;
+}
+
+static expr *eval_floatize(enum floatize type)
+{
+ uint8_t result[16], *p; /* Up to 128 bits */
+ static const struct {
+ int bytes, start, len;
+ } formats[] = {
+ { 1, 0, 1 }, /* FLOAT_8 */
+ { 2, 0, 2 }, /* FLOAT_16 */
+ { 4, 0, 4 }, /* FLOAT_32 */
+ { 8, 0, 8 }, /* FLOAT_64 */
+ { 10, 0, 8 }, /* FLOAT_80M */
+ { 10, 8, 2 }, /* FLOAT_80E */
+ { 16, 0, 8 }, /* FLOAT_128L */
+ { 16, 8, 8 }, /* FLOAT_128H */
+ };
+ int sign = 1;
+ int64_t val;
+ int j;
+
+ i = scan(scpriv, tokval);
+ if (i != '(') {
+ nasm_error(ERR_NONFATAL, "expecting `('");
+ return NULL;
+ }
+ i = scan(scpriv, tokval);
+ if (i == '-' || i == '+') {
+ sign = (i == '-') ? -1 : 1;
+ i = scan(scpriv, tokval);
+ }
+ if (i != TOKEN_FLOAT) {
+ nasm_error(ERR_NONFATAL, "expecting floating-point number");
+ return NULL;
+ }
+ if (!float_const(tokval->t_charptr, sign, result, formats[type].bytes))
+ return NULL;
+ i = scan(scpriv, tokval);
+ if (i != ')') {
+ nasm_error(ERR_NONFATAL, "expecting `)'");
+ return NULL;
+ }
+
+ p = result+formats[type].start+formats[type].len;
+ val = 0;
+ for (j = formats[type].len; j; j--) {
+ p--;
+ val = (val << 8) + *p;
+ }
+
+ begintemp();
+ addtotemp(EXPR_SIMPLE, val);
+
+ i = scan(scpriv, tokval);
+ return finishtemp();
+}
+
+static expr *eval_strfunc(enum strfunc type)
+{
+ char *string;
+ size_t string_len;
+ int64_t val;
+ bool parens, rn_warn;
+
+ parens = false;
+ i = scan(scpriv, tokval);
+ if (i == '(') {
+ parens = true;
+ i = scan(scpriv, tokval);
+ }
+ if (i != TOKEN_STR) {
+ nasm_error(ERR_NONFATAL, "expecting string");
+ return NULL;
+ }
+ string_len = string_transform(tokval->t_charptr, tokval->t_inttwo,
+ &string, type);
+ if (string_len == (size_t)-1) {
+ nasm_error(ERR_NONFATAL, "invalid string for transform");
+ return NULL;
+ }
+
+ val = readstrnum(string, string_len, &rn_warn);
+ if (parens) {
+ i = scan(scpriv, tokval);
+ if (i != ')') {
+ nasm_error(ERR_NONFATAL, "expecting `)'");
+ return NULL;
+ }
+ }
+
+ if (rn_warn)
+ nasm_error(ERR_WARNING|ERR_PASS1, "character constant too long");
+
+ begintemp();
+ addtotemp(EXPR_SIMPLE, val);
+
+ i = scan(scpriv, tokval);
+ return finishtemp();
+}
+
+static int64_t eval_ifunc(int64_t val, enum ifunc func)
+{
+ int errtype;
+ uint64_t uval = (uint64_t)val;
+ int64_t rv;
+
+ switch (func) {
+ case IFUNC_ILOG2E:
+ case IFUNC_ILOG2W:
+ errtype = (func == IFUNC_ILOG2E) ? ERR_NONFATAL : ERR_WARNING;
+
+ if (!is_power2(uval))
+ nasm_error(errtype, "ilog2 argument is not a power of two");
+ /* fall through */
+ case IFUNC_ILOG2F:
+ rv = ilog2_64(uval);
+ break;
+
+ case IFUNC_ILOG2C:
+ rv = (uval < 2) ? 0 : ilog2_64(uval-1) + 1;
+ break;
+
+ default:
+ nasm_panic(0, "invalid IFUNC token %d", func);
+ rv = 0;
+ break;
+ }
+
+ return rv;
+}
+
+static expr *expr6(int critical)
+{
+ int32_t type;
+ expr *e;
+ int32_t label_seg;
+ int64_t label_ofs;
+ int64_t tmpval;
+ bool rn_warn;
+ char *scope;
+
+ switch (i) {
+ case '-':
+ i = scan(scpriv, tokval);
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ return scalar_mult(e, -1L, false);
+
+ case '+':
+ i = scan(scpriv, tokval);
+ return expr6(critical);
+
+ case '~':
+ i = scan(scpriv, tokval);
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ if (is_just_unknown(e))
+ return unknown_expr();
+ else if (!is_simple(e)) {
+ nasm_error(ERR_NONFATAL, "`~' operator may only be applied to"
+ " scalar values");
+ return NULL;
+ }
+ return scalarvect(~reloc_value(e));
+
+ case '!':
+ i = scan(scpriv, tokval);
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ if (is_just_unknown(e))
+ return unknown_expr();
+ else if (!is_simple(e)) {
+ nasm_error(ERR_NONFATAL, "`!' operator may only be applied to"
+ " scalar values");
+ return NULL;
+ }
+ return scalarvect(!reloc_value(e));
+
+ case TOKEN_IFUNC:
+ {
+ enum ifunc func = tokval->t_integer;
+ i = scan(scpriv, tokval);
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ if (is_just_unknown(e))
+ return unknown_expr();
+ else if (!is_simple(e)) {
+ nasm_error(ERR_NONFATAL, "function may only be applied to"
+ " scalar values");
+ return NULL;
+ }
+ return scalarvect(eval_ifunc(reloc_value(e), func));
+ }
+
+ case TOKEN_SEG:
+ i = scan(scpriv, tokval);
+ e = expr6(critical);
+ if (!e)
+ return NULL;
+ e = segment_part(e);
+ if (!e)
+ return NULL;
+ if (is_unknown(e) && critical) {
+ nasm_error(ERR_NONFATAL, "unable to determine segment base");
+ return NULL;
+ }
+ return e;
+
+ case TOKEN_FLOATIZE:
+ return eval_floatize(tokval->t_integer);
+
+ case TOKEN_STRFUNC:
+ return eval_strfunc(tokval->t_integer);
+
+ case '(':
+ i = scan(scpriv, tokval);
+ e = bexpr(critical);
+ if (!e)
+ return NULL;
+ if (i != ')') {
+ nasm_error(ERR_NONFATAL, "expecting `)'");
+ return NULL;
+ }
+ i = scan(scpriv, tokval);
+ return e;
+
+ case TOKEN_NUM:
+ case TOKEN_STR:
+ case TOKEN_REG:
+ case TOKEN_ID:
+ case TOKEN_INSN: /* Opcodes that occur here are really labels */
+ case TOKEN_HERE:
+ case TOKEN_BASE:
+ case TOKEN_DECORATOR:
+ begintemp();
+ switch (i) {
+ case TOKEN_NUM:
+ addtotemp(EXPR_SIMPLE, tokval->t_integer);
+ break;
+ case TOKEN_STR:
+ tmpval = readstrnum(tokval->t_charptr, tokval->t_inttwo, &rn_warn);
+ if (rn_warn)
+ nasm_error(ERR_WARNING|ERR_PASS1, "character constant too long");
+ addtotemp(EXPR_SIMPLE, tmpval);
+ break;
+ case TOKEN_REG:
+ addtotemp(tokval->t_integer, 1L);
+ if (hint && hint->type == EAH_NOHINT)
+ hint->base = tokval->t_integer, hint->type = EAH_MAKEBASE;
+ break;
+ case TOKEN_ID:
+ case TOKEN_INSN:
+ case TOKEN_HERE:
+ case TOKEN_BASE:
+ /*
+ * If !location.known, this indicates that no
+ * symbol, Here or Base references are valid because we
+ * are in preprocess-only mode.
+ */
+ if (!location.known) {
+ nasm_error(ERR_NONFATAL,
+ "%s not supported in preprocess-only mode",
+ (i == TOKEN_HERE ? "`$'" :
+ i == TOKEN_BASE ? "`$$'" :
+ "symbol references"));
+ addtotemp(EXPR_UNKNOWN, 1L);
+ break;
+ }
+
+ type = EXPR_SIMPLE; /* might get overridden by UNKNOWN */
+ if (i == TOKEN_BASE) {
+ label_seg = in_abs_seg ? abs_seg : location.segment;
+ label_ofs = 0;
+ } else if (i == TOKEN_HERE) {
+ label_seg = in_abs_seg ? abs_seg : location.segment;
+ label_ofs = in_abs_seg ? abs_offset : location.offset;
+ } else {
+ if (!lookup_label(tokval->t_charptr, &label_seg, &label_ofs)) {
+ scope = local_scope(tokval->t_charptr);
+ if (critical == 2) {
+ nasm_error(ERR_NONFATAL, "symbol `%s%s' undefined",
+ scope,tokval->t_charptr);
+ return NULL;
+ } else if (critical == 1) {
+ nasm_error(ERR_NONFATAL,
+ "symbol `%s%s' not defined before use",
+ scope,tokval->t_charptr);
+ return NULL;
+ } else {
+ if (opflags)
+ *opflags |= OPFLAG_FORWARD;
+ type = EXPR_UNKNOWN;
+ label_seg = NO_SEG;
+ label_ofs = 1;
+ }
+ }
+ if (opflags && is_extern(tokval->t_charptr))
+ *opflags |= OPFLAG_EXTERN;
+ }
+ addtotemp(type, label_ofs);
+ if (label_seg != NO_SEG)
+ addtotemp(EXPR_SEGBASE + label_seg, 1L);
+ break;
+ case TOKEN_DECORATOR:
+ addtotemp(EXPR_RDSAE, tokval->t_integer);
+ break;
+ }
+ i = scan(scpriv, tokval);
+ return finishtemp();
+
+ default:
+ nasm_error(ERR_NONFATAL, "expression syntax error");
+ return NULL;
+ }
+}
+
+expr *evaluate(scanner sc, void *scprivate, struct tokenval *tv,
+ int *fwref, int critical, struct eval_hints *hints)
+{
+ expr *e;
+ expr *f = NULL;
+
+ hint = hints;
+ if (hint)
+ hint->type = EAH_NOHINT;
+
+ if (critical & CRITICAL) {
+ critical &= ~CRITICAL;
+ bexpr = rexp0;
+ } else
+ bexpr = expr0;
+
+ scan = sc;
+ scpriv = scprivate;
+ tokval = tv;
+ opflags = fwref;
+
+ if (tokval->t_type == TOKEN_INVALID)
+ i = scan(scpriv, tokval);
+ else
+ i = tokval->t_type;
+
+ while (ntempexprs) /* initialize temporary storage */
+ nasm_free(tempexprs[--ntempexprs]);
+
+ e = bexpr(critical);
+ if (!e)
+ return NULL;
+
+ if (i == TOKEN_WRT) {
+ i = scan(scpriv, tokval); /* eat the WRT */
+ f = expr6(critical);
+ if (!f)
+ return NULL;
+ }
+ e = scalar_mult(e, 1L, false); /* strip far-absolute segment part */
+ if (f) {
+ expr *g;
+ if (is_just_unknown(f))
+ g = unknown_expr();
+ else {
+ int64_t value;
+ begintemp();
+ if (!is_reloc(f)) {
+ nasm_error(ERR_NONFATAL, "invalid right-hand operand to WRT");
+ return NULL;
+ }
+ value = reloc_seg(f);
+ if (value == NO_SEG)
+ value = reloc_value(f) | SEG_ABS;
+ else if (!(value & SEG_ABS) && !(value % 2) && critical) {
+ nasm_error(ERR_NONFATAL, "invalid right-hand operand to WRT");
+ return NULL;
+ }
+ addtotemp(EXPR_WRT, value);
+ g = finishtemp();
+ }
+ e = add_vectors(e, g);
+ }
+ return e;
+}
diff --git a/asm/eval.h b/asm/eval.h
new file mode 100644
index 00000000..7af17eb8
--- /dev/null
+++ b/asm/eval.h
@@ -0,0 +1,49 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * eval.h header file for eval.c
+ */
+
+#ifndef NASM_EVAL_H
+#define NASM_EVAL_H
+
+/*
+ * The evaluator itself.
+ */
+expr *evaluate(scanner sc, void *scprivate, struct tokenval *tv,
+ int *fwref, int critical, struct eval_hints *hints);
+
+void eval_cleanup(void);
+
+#endif
diff --git a/asm/exprlib.c b/asm/exprlib.c
new file mode 100644
index 00000000..7eb3436c
--- /dev/null
+++ b/asm/exprlib.c
@@ -0,0 +1,186 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * exprlib.c
+ *
+ * Library routines to manipulate expression data types.
+ */
+
+#include "nasm.h"
+
+/*
+ * Return true if the argument is a simple scalar. (Or a far-
+ * absolute, which counts.)
+ */
+int is_simple(expr * vect)
+{
+ while (vect->type && !vect->value)
+ vect++;
+ if (!vect->type)
+ return 1;
+ if (vect->type != EXPR_SIMPLE)
+ return 0;
+ do {
+ vect++;
+ } while (vect->type && !vect->value);
+ if (vect->type && vect->type < EXPR_SEGBASE + SEG_ABS)
+ return 0;
+ return 1;
+}
+
+/*
+ * Return true if the argument is a simple scalar, _NOT_ a far-
+ * absolute.
+ */
+int is_really_simple(expr * vect)
+{
+ while (vect->type && !vect->value)
+ vect++;
+ if (!vect->type)
+ return 1;
+ if (vect->type != EXPR_SIMPLE)
+ return 0;
+ do {
+ vect++;
+ } while (vect->type && !vect->value);
+ if (vect->type)
+ return 0;
+ return 1;
+}
+
+/*
+ * Return true if the argument is relocatable (i.e. a simple
+ * scalar, plus at most one segment-base, plus possibly a WRT).
+ */
+int is_reloc(expr * vect)
+{
+ while (vect->type && !vect->value) /* skip initial value-0 terms */
+ vect++;
+ if (!vect->type) /* trivially return true if nothing */
+ return 1; /* is present apart from value-0s */
+ if (vect->type < EXPR_SIMPLE) /* false if a register is present */
+ return 0;
+ if (vect->type == EXPR_SIMPLE) { /* skip over a pure number term... */
+ do {
+ vect++;
+ } while (vect->type && !vect->value);
+ if (!vect->type) /* ...returning true if that's all */
+ return 1;
+ }
+ if (vect->type == EXPR_WRT) { /* skip over a WRT term... */
+ do {
+ vect++;
+ } while (vect->type && !vect->value);
+ if (!vect->type) /* ...returning true if that's all */
+ return 1;
+ }
+ if (vect->value != 0 && vect->value != 1)
+ return 0; /* segment base multiplier non-unity */
+ do { /* skip over _one_ seg-base term... */
+ vect++;
+ } while (vect->type && !vect->value);
+ if (!vect->type) /* ...returning true if that's all */
+ return 1;
+ return 0; /* And return false if there's more */
+}
+
+/*
+ * Return true if the argument contains an `unknown' part.
+ */
+int is_unknown(expr * vect)
+{
+ while (vect->type && vect->type < EXPR_UNKNOWN)
+ vect++;
+ return (vect->type == EXPR_UNKNOWN);
+}
+
+/*
+ * Return true if the argument contains nothing but an `unknown'
+ * part.
+ */
+int is_just_unknown(expr * vect)
+{
+ while (vect->type && !vect->value)
+ vect++;
+ return (vect->type == EXPR_UNKNOWN);
+}
+
+/*
+ * Return the scalar part of a relocatable vector. (Including
+ * simple scalar vectors - those qualify as relocatable.)
+ */
+int64_t reloc_value(expr * vect)
+{
+ while (vect->type && !vect->value)
+ vect++;
+ if (!vect->type)
+ return 0;
+ if (vect->type == EXPR_SIMPLE)
+ return vect->value;
+ else
+ return 0;
+}
+
+/*
+ * Return the segment number of a relocatable vector, or NO_SEG for
+ * simple scalars.
+ */
+int32_t reloc_seg(expr * vect)
+{
+ while (vect->type && (vect->type == EXPR_WRT || !vect->value))
+ vect++;
+ if (vect->type == EXPR_SIMPLE) {
+ do {
+ vect++;
+ } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
+ }
+ if (!vect->type)
+ return NO_SEG;
+ else
+ return vect->type - EXPR_SEGBASE;
+}
+
+/*
+ * Return the WRT segment number of a relocatable vector, or NO_SEG
+ * if no WRT part is present.
+ */
+int32_t reloc_wrt(expr * vect)
+{
+ while (vect->type && vect->type < EXPR_WRT)
+ vect++;
+ if (vect->type == EXPR_WRT) {
+ return vect->value;
+ } else
+ return NO_SEG;
+}
diff --git a/asm/float.c b/asm/float.c
new file mode 100644
index 00000000..6cd14561
--- /dev/null
+++ b/asm/float.c
@@ -0,0 +1,951 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * float.c floating-point constant support for the Netwide Assembler
+ */
+
+#include "compiler.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nasm.h"
+#include "float.h"
+
+/*
+ * -----------------
+ * local variables
+ * -----------------
+ */
+static bool daz = false; /* denormals as zero */
+static enum float_round rc = FLOAT_RC_NEAR; /* rounding control */
+
+/*
+ * -----------
+ * constants
+ * -----------
+ */
+
+/* "A limb is like a digit but bigger */
+typedef uint32_t fp_limb;
+typedef uint64_t fp_2limb;
+
+#define LIMB_BITS 32
+#define LIMB_BYTES (LIMB_BITS/8)
+#define LIMB_TOP_BIT ((fp_limb)1 << (LIMB_BITS-1))
+#define LIMB_MASK ((fp_limb)(~0))
+#define LIMB_ALL_BYTES ((fp_limb)0x01010101)
+#define LIMB_BYTE(x) ((x)*LIMB_ALL_BYTES)
+
+/* 112 bits + 64 bits for accuracy + 16 bits for rounding */
+#define MANT_LIMBS 6
+
+/* 52 digits fit in 176 bits because 10^53 > 2^176 > 10^52 */
+#define MANT_DIGITS 52
+
+/* the format and the argument list depend on MANT_LIMBS */
+#define MANT_FMT "%08x_%08x_%08x_%08x_%08x_%08x"
+#define MANT_ARG SOME_ARG(mant, 0)
+
+#define SOME_ARG(a,i) (a)[(i)+0], (a)[(i)+1], (a)[(i)+2], \
+ (a)[(i)+3], (a)[(i)+4], (a)[(i)+5]
+
+/*
+ * ---------------------------------------------------------------------------
+ * emit a printf()-like debug message... but only if DEBUG_FLOAT was defined
+ * ---------------------------------------------------------------------------
+ */
+
+#ifdef DEBUG_FLOAT
+#define dprintf(x) printf x
+#else
+#define dprintf(x) do { } while (0)
+#endif
+
+/*
+ * ---------------------------------------------------------------------------
+ * multiply
+ * ---------------------------------------------------------------------------
+ */
+static int float_multiply(fp_limb *to, fp_limb *from)
+{
+ fp_2limb temp[MANT_LIMBS * 2];
+ int i, j;
+
+ /*
+ * guaranteed that top bit of 'from' is set -- so we only have
+ * to worry about _one_ bit shift to the left
+ */
+ dprintf(("%s=" MANT_FMT "\n", "mul1", SOME_ARG(to, 0)));
+ dprintf(("%s=" MANT_FMT "\n", "mul2", SOME_ARG(from, 0)));
+
+ memset(temp, 0, sizeof temp);
+
+ for (i = 0; i < MANT_LIMBS; i++) {
+ for (j = 0; j < MANT_LIMBS; j++) {
+ fp_2limb n;
+ n = (fp_2limb) to[i] * (fp_2limb) from[j];
+ temp[i + j] += n >> LIMB_BITS;
+ temp[i + j + 1] += (fp_limb)n;
+ }
+ }
+
+ for (i = MANT_LIMBS * 2; --i;) {
+ temp[i - 1] += temp[i] >> LIMB_BITS;
+ temp[i] &= LIMB_MASK;
+ }
+
+ dprintf(("%s=" MANT_FMT "_" MANT_FMT "\n", "temp", SOME_ARG(temp, 0),
+ SOME_ARG(temp, MANT_LIMBS)));
+
+ if (temp[0] & LIMB_TOP_BIT) {
+ for (i = 0; i < MANT_LIMBS; i++) {
+ to[i] = temp[i] & LIMB_MASK;
+ }
+ dprintf(("%s=" MANT_FMT " (%i)\n", "prod", SOME_ARG(to, 0), 0));
+ return 0;
+ } else {
+ for (i = 0; i < MANT_LIMBS; i++) {
+ to[i] = (temp[i] << 1) + !!(temp[i + 1] & LIMB_TOP_BIT);
+ }
+ dprintf(("%s=" MANT_FMT " (%i)\n", "prod", SOME_ARG(to, 0), -1));
+ return -1;
+ }
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * read an exponent; returns INT32_MAX on error
+ * ---------------------------------------------------------------------------
+ */
+static int32_t read_exponent(const char *string, int32_t max)
+{
+ int32_t i = 0;
+ bool neg = false;
+
+ if (*string == '+') {
+ string++;
+ } else if (*string == '-') {
+ neg = true;
+ string++;
+ }
+ while (*string) {
+ if (*string >= '0' && *string <= '9') {
+ i = (i * 10) + (*string - '0');
+
+ /*
+ * To ensure that underflows and overflows are
+ * handled properly we must avoid wraparounds of
+ * the signed integer value that is used to hold
+ * the exponent. Therefore we cap the exponent at
+ * +/-5000, which is slightly more/less than
+ * what's required for normal and denormal numbers
+ * in single, double, and extended precision, but
+ * sufficient to avoid signed integer wraparound.
+ */
+ if (i > max)
+ i = max;
+ } else if (*string == '_') {
+ /* do nothing */
+ } else {
+ nasm_error(ERR_NONFATAL|ERR_PASS1,
+ "invalid character in floating-point constant %s: '%c'",
+ "exponent", *string);
+ return INT32_MAX;
+ }
+ string++;
+ }
+
+ return neg ? -i : i;
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * convert
+ * ---------------------------------------------------------------------------
+ */
+static bool ieee_flconvert(const char *string, fp_limb *mant,
+ int32_t * exponent)
+{
+ char digits[MANT_DIGITS];
+ char *p, *q, *r;
+ fp_limb mult[MANT_LIMBS], bit;
+ fp_limb *m;
+ int32_t tenpwr, twopwr;
+ int32_t extratwos;
+ bool started, seendot, warned;
+
+ warned = false;
+ p = digits;
+ tenpwr = 0;
+ started = seendot = false;
+
+ while (*string && *string != 'E' && *string != 'e') {
+ if (*string == '.') {
+ if (!seendot) {
+ seendot = true;
+ } else {
+ nasm_error(ERR_NONFATAL|ERR_PASS1,
+ "too many periods in floating-point constant");
+ return false;
+ }
+ } else if (*string >= '0' && *string <= '9') {
+ if (*string == '0' && !started) {
+ if (seendot) {
+ tenpwr--;
+ }
+ } else {
+ started = true;
+ if (p < digits + sizeof(digits)) {
+ *p++ = *string - '0';
+ } else {
+ if (!warned) {
+ nasm_error(ERR_WARNING|ERR_WARN_FL_TOOLONG|ERR_PASS1,
+ "floating-point constant significand contains "
+ "more than %i digits", MANT_DIGITS);
+ warned = true;
+ }
+ }
+ if (!seendot) {
+ tenpwr++;
+ }
+ }
+ } else if (*string == '_') {
+ /* do nothing */
+ } else {
+ nasm_error(ERR_NONFATAL|ERR_PASS1,
+ "invalid character in floating-point constant %s: '%c'",
+ "significand", *string);
+ return false;
+ }
+ string++;
+ }
+
+ if (*string) {
+ int32_t e;
+
+ string++; /* eat the E */
+ e = read_exponent(string, 5000);
+ if (e == INT32_MAX)
+ return false;
+ tenpwr += e;
+ }
+
+ /*
+ * At this point, the memory interval [digits,p) contains a
+ * series of decimal digits zzzzzzz, such that our number X
+ * satisfies X = 0.zzzzzzz * 10^tenpwr.
+ */
+ q = digits;
+ dprintf(("X = 0."));
+ while (q < p) {
+ dprintf(("%c", *q + '0'));
+ q++;
+ }
+ dprintf((" * 10^%i\n", tenpwr));
+
+ /*
+ * Now convert [digits,p) to our internal representation.
+ */
+ bit = LIMB_TOP_BIT;
+ for (m = mant; m < mant + MANT_LIMBS; m++) {
+ *m = 0;
+ }
+ m = mant;
+ q = digits;
+ started = false;
+ twopwr = 0;
+ while (m < mant + MANT_LIMBS) {
+ fp_limb carry = 0;
+ while (p > q && !p[-1]) {
+ p--;
+ }
+ if (p <= q) {
+ break;
+ }
+ for (r = p; r-- > q;) {
+ int32_t i;
+ i = 2 * *r + carry;
+ if (i >= 10) {
+ carry = 1;
+ i -= 10;
+ } else {
+ carry = 0;
+ }
+ *r = i;
+ }
+ if (carry) {
+ *m |= bit;
+ started = true;
+ }
+ if (started) {
+ if (bit == 1) {
+ bit = LIMB_TOP_BIT;
+ m++;
+ } else {
+ bit >>= 1;
+ }
+ } else {
+ twopwr--;
+ }
+ }
+ twopwr += tenpwr;
+
+ /*
+ * At this point, the 'mant' array contains the first frac-
+ * tional places of a base-2^16 real number which when mul-
+ * tiplied by 2^twopwr and 5^tenpwr gives X.
+ */
+ dprintf(("X = " MANT_FMT " * 2^%i * 5^%i\n", MANT_ARG, twopwr,
+ tenpwr));
+
+ /*
+ * Now multiply 'mant' by 5^tenpwr.
+ */
+ if (tenpwr < 0) { /* mult = 5^-1 = 0.2 */
+ for (m = mult; m < mult + MANT_LIMBS - 1; m++) {
+ *m = LIMB_BYTE(0xcc);
+ }
+ mult[MANT_LIMBS - 1] = LIMB_BYTE(0xcc)+1;
+ extratwos = -2;
+ tenpwr = -tenpwr;
+
+ /*
+ * If tenpwr was 1000...000b, then it becomes 1000...000b. See
+ * the "ANSI C" comment below for more details on that case.
+ *
+ * Because we already truncated tenpwr to +5000...-5000 inside
+ * the exponent parsing code, this shouldn't happen though.
+ */
+ } else if (tenpwr > 0) { /* mult = 5^+1 = 5.0 */
+ mult[0] = (fp_limb)5 << (LIMB_BITS-3); /* 0xA000... */
+ for (m = mult + 1; m < mult + MANT_LIMBS; m++) {
+ *m = 0;
+ }
+ extratwos = 3;
+ } else {
+ extratwos = 0;
+ }
+ while (tenpwr) {
+ dprintf(("loop=" MANT_FMT " * 2^%i * 5^%i (%i)\n", MANT_ARG,
+ twopwr, tenpwr, extratwos));
+ if (tenpwr & 1) {
+ dprintf(("mant*mult\n"));
+ twopwr += extratwos + float_multiply(mant, mult);
+ }
+ dprintf(("mult*mult\n"));
+ extratwos = extratwos * 2 + float_multiply(mult, mult);
+ tenpwr >>= 1;
+
+ /*
+ * In ANSI C, the result of right-shifting a signed integer is
+ * considered implementation-specific. To ensure that the loop
+ * terminates even if tenpwr was 1000...000b to begin with, we
+ * manually clear the MSB, in case a 1 was shifted in.
+ *
+ * Because we already truncated tenpwr to +5000...-5000 inside
+ * the exponent parsing code, this shouldn't matter; neverthe-
+ * less it is the right thing to do here.
+ */
+ tenpwr &= (uint32_t) - 1 >> 1;
+ }
+
+ /*
+ * At this point, the 'mant' array contains the first frac-
+ * tional places of a base-2^16 real number in [0.5,1) that
+ * when multiplied by 2^twopwr gives X. Or it contains zero
+ * of course. We are done.
+ */
+ *exponent = twopwr;
+ return true;
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * operations of specific bits
+ * ---------------------------------------------------------------------------
+ */
+
+/* Set a bit, using *bigendian* bit numbering (0 = MSB) */
+static void set_bit(fp_limb *mant, int bit)
+{
+ mant[bit/LIMB_BITS] |= LIMB_TOP_BIT >> (bit & (LIMB_BITS-1));
+}
+
+/* Test a single bit */
+static int test_bit(const fp_limb *mant, int bit)
+{
+ return (mant[bit/LIMB_BITS] >> (~bit & (LIMB_BITS-1))) & 1;
+}
+
+/* Report if the mantissa value is all zero */
+static bool is_zero(const fp_limb *mant)
+{
+ int i;
+
+ for (i = 0; i < MANT_LIMBS; i++)
+ if (mant[i])
+ return false;
+
+ return true;
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * round a mantissa off after i words
+ * ---------------------------------------------------------------------------
+ */
+
+#define ROUND_COLLECT_BITS \
+ do { \
+ m = mant[i] & (2*bit-1); \
+ for (j = i+1; j < MANT_LIMBS; j++) \
+ m = m | mant[j]; \
+ } while (0)
+
+#define ROUND_ABS_DOWN \
+ do { \
+ mant[i] &= ~(bit-1); \
+ for (j = i+1; j < MANT_LIMBS; j++) \
+ mant[j] = 0; \
+ return false; \
+ } while (0)
+
+#define ROUND_ABS_UP \
+ do { \
+ mant[i] = (mant[i] & ~(bit-1)) + bit; \
+ for (j = i+1; j < MANT_LIMBS; j++) \
+ mant[j] = 0; \
+ while (i > 0 && !mant[i]) \
+ ++mant[--i]; \
+ return !mant[0]; \
+ } while (0)
+
+static bool ieee_round(bool minus, fp_limb *mant, int bits)
+{
+ fp_limb m = 0;
+ int32_t j;
+ int i = bits / LIMB_BITS;
+ int p = bits % LIMB_BITS;
+ fp_limb bit = LIMB_TOP_BIT >> p;
+
+ if (rc == FLOAT_RC_NEAR) {
+ if (mant[i] & bit) {
+ mant[i] &= ~bit;
+ ROUND_COLLECT_BITS;
+ mant[i] |= bit;
+ if (m) {
+ ROUND_ABS_UP;
+ } else {
+ if (test_bit(mant, bits-1)) {
+ ROUND_ABS_UP;
+ } else {
+ ROUND_ABS_DOWN;
+ }
+ }
+ } else {
+ ROUND_ABS_DOWN;
+ }
+ } else if (rc == FLOAT_RC_ZERO ||
+ rc == (minus ? FLOAT_RC_UP : FLOAT_RC_DOWN)) {
+ ROUND_ABS_DOWN;
+ } else {
+ /* rc == (minus ? FLOAT_RC_DOWN : FLOAT_RC_UP) */
+ /* Round toward +/- infinity */
+ ROUND_COLLECT_BITS;
+ if (m) {
+ ROUND_ABS_UP;
+ } else {
+ ROUND_ABS_DOWN;
+ }
+ }
+ return false;
+}
+
+/* Returns a value >= 16 if not a valid hex digit */
+static unsigned int hexval(char c)
+{
+ unsigned int v = (unsigned char) c;
+
+ if (v >= '0' && v <= '9')
+ return v - '0';
+ else
+ return (v|0x20) - 'a' + 10;
+}
+
+/* Handle floating-point numbers with radix 2^bits and binary exponent */
+static bool ieee_flconvert_bin(const char *string, int bits,
+ fp_limb *mant, int32_t *exponent)
+{
+ static const int log2tbl[16] =
+ { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
+ fp_limb mult[MANT_LIMBS + 1], *mp;
+ int ms;
+ int32_t twopwr;
+ bool seendot, seendigit;
+ unsigned char c;
+ const int radix = 1 << bits;
+ fp_limb v;
+
+ twopwr = 0;
+ seendot = seendigit = false;
+ ms = 0;
+ mp = NULL;
+
+ memset(mult, 0, sizeof mult);
+
+ while ((c = *string++) != '\0') {
+ if (c == '.') {
+ if (!seendot)
+ seendot = true;
+ else {
+ nasm_error(ERR_NONFATAL|ERR_PASS1,
+ "too many periods in floating-point constant");
+ return false;
+ }
+ } else if ((v = hexval(c)) < (unsigned int)radix) {
+ if (!seendigit && v) {
+ int l = log2tbl[v];
+
+ seendigit = true;
+ mp = mult;
+ ms = (LIMB_BITS-1)-l;
+
+ twopwr = seendot ? twopwr-bits+l : l+1-bits;
+ }
+
+ if (seendigit) {
+ if (ms <= 0) {
+ *mp |= v >> -ms;
+ mp++;
+ if (mp > &mult[MANT_LIMBS])
+ mp = &mult[MANT_LIMBS]; /* Guard slot */
+ ms += LIMB_BITS;
+ }
+ *mp |= v << ms;
+ ms -= bits;
+
+ if (!seendot)
+ twopwr += bits;
+ } else {
+ if (seendot)
+ twopwr -= bits;
+ }
+ } else if (c == 'p' || c == 'P') {
+ int32_t e;
+ e = read_exponent(string, 20000);
+ if (e == INT32_MAX)
+ return false;
+ twopwr += e;
+ break;
+ } else if (c == '_') {
+ /* ignore */
+ } else {
+ nasm_error(ERR_NONFATAL|ERR_PASS1,
+ "floating-point constant: `%c' is invalid character", c);
+ return false;
+ }
+ }
+
+ if (!seendigit) {
+ memset(mant, 0, MANT_LIMBS*sizeof(fp_limb)); /* Zero */
+ *exponent = 0;
+ } else {
+ memcpy(mant, mult, MANT_LIMBS*sizeof(fp_limb));
+ *exponent = twopwr;
+ }
+
+ return true;
+}
+
+/*
+ * Shift a mantissa to the right by i bits.
+ */
+static void ieee_shr(fp_limb *mant, int i)
+{
+ fp_limb n, m;
+ int j = 0;
+ int sr, sl, offs;
+
+ sr = i % LIMB_BITS; sl = LIMB_BITS-sr;
+ offs = i/LIMB_BITS;
+
+ if (sr == 0) {
+ if (offs)
+ for (j = MANT_LIMBS-1; j >= offs; j--)
+ mant[j] = mant[j-offs];
+ } else {
+ n = mant[MANT_LIMBS-1-offs] >> sr;
+ for (j = MANT_LIMBS-1; j > offs; j--) {
+ m = mant[j-offs-1];
+ mant[j] = (m << sl) | n;
+ n = m >> sr;
+ }
+ mant[j--] = n;
+ }
+ while (j >= 0)
+ mant[j--] = 0;
+}
+
+/* Produce standard IEEE formats, with implicit or explicit integer
+ bit; this makes the following assumptions:
+
+ - the sign bit is the MSB, followed by the exponent,
+ followed by the integer bit if present.
+ - the sign bit plus exponent fit in 16 bits.
+ - the exponent bias is 2^(n-1)-1 for an n-bit exponent */
+
+struct ieee_format {
+ int bytes;
+ int mantissa; /* Fractional bits in the mantissa */
+ int explicit; /* Explicit integer */
+ int exponent; /* Bits in the exponent */
+};
+
+/*
+ * The 16- and 128-bit formats are expected to be in IEEE 754r.
+ * AMD SSE5 uses the 16-bit format.
+ *
+ * The 32- and 64-bit formats are the original IEEE 754 formats.
+ *
+ * The 80-bit format is x87-specific, but widely used.
+ *
+ * The 8-bit format appears to be the consensus 8-bit floating-point
+ * format. It is apparently used in graphics applications.
+ */
+static const struct ieee_format ieee_8 = { 1, 3, 0, 4 };
+static const struct ieee_format ieee_16 = { 2, 10, 0, 5 };
+static const struct ieee_format ieee_32 = { 4, 23, 0, 8 };
+static const struct ieee_format ieee_64 = { 8, 52, 0, 11 };
+static const struct ieee_format ieee_80 = { 10, 63, 1, 15 };
+static const struct ieee_format ieee_128 = { 16, 112, 0, 15 };
+
+/* Types of values we can generate */
+enum floats {
+ FL_ZERO,
+ FL_DENORMAL,
+ FL_NORMAL,
+ FL_INFINITY,
+ FL_QNAN,
+ FL_SNAN
+};
+
+static int to_packed_bcd(const char *str, const char *p,
+ int s, uint8_t *result,
+ const struct ieee_format *fmt)
+{
+ int n = 0;
+ char c;
+ int tv = -1;
+
+ if (fmt != &ieee_80) {
+ nasm_error(ERR_NONFATAL|ERR_PASS1,
+ "packed BCD requires an 80-bit format");
+ return 0;
+ }
+
+ while (p >= str) {
+ c = *p--;
+ if (c >= '0' && c <= '9') {
+ if (tv < 0) {
+ if (n == 9) {
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "packed BCD truncated to 18 digits");
+ }
+ tv = c-'0';
+ } else {
+ if (n < 9)
+ *result++ = tv + ((c-'0') << 4);
+ n++;
+ tv = -1;
+ }
+ } else if (c == '_') {
+ /* do nothing */
+ } else {
+ nasm_error(ERR_NONFATAL|ERR_PASS1,
+ "invalid character `%c' in packed BCD constant", c);
+ return 0;
+ }
+ }
+ if (tv >= 0) {
+ if (n < 9)
+ *result++ = tv;
+ n++;
+ }
+ while (n < 9) {
+ *result++ = 0;
+ n++;
+ }
+ *result = (s < 0) ? 0x80 : 0;
+
+ return 1; /* success */
+}
+
+static int to_float(const char *str, int s, uint8_t *result,
+ const struct ieee_format *fmt)
+{
+ fp_limb mant[MANT_LIMBS];
+ int32_t exponent = 0;
+ const int32_t expmax = 1 << (fmt->exponent - 1);
+ fp_limb one_mask = LIMB_TOP_BIT >>
+ ((fmt->exponent+fmt->explicit) % LIMB_BITS);
+ const int one_pos = (fmt->exponent+fmt->explicit)/LIMB_BITS;
+ int i;
+ int shift;
+ enum floats type;
+ bool ok;
+ const bool minus = s < 0;
+ const int bits = fmt->bytes * 8;
+ const char *strend;
+
+ if (!str[0]) {
+ nasm_panic(0,
+ "internal errror: empty string passed to float_const");
+ return 0;
+ }
+
+ strend = strchr(str, '\0');
+ if (strend[-1] == 'P' || strend[-1] == 'p')
+ return to_packed_bcd(str, strend-2, s, result, fmt);
+
+ if (str[0] == '_') {
+ /* Special tokens */
+
+ switch (str[2]) {
+ case 'n': /* __nan__ */
+ case 'N':
+ case 'q': /* __qnan__ */
+ case 'Q':
+ type = FL_QNAN;
+ break;
+ case 's': /* __snan__ */
+ case 'S':
+ type = FL_SNAN;
+ break;
+ case 'i': /* __infinity__ */
+ case 'I':
+ type = FL_INFINITY;
+ break;
+ default:
+ nasm_error(ERR_NONFATAL|ERR_PASS1,
+ "internal error: unknown FP constant token `%s'\n", str);
+ type = FL_QNAN;
+ break;
+ }
+ } else {
+ if (str[0] == '0') {
+ switch (str[1]) {
+ case 'x': case 'X':
+ case 'h': case 'H':
+ ok = ieee_flconvert_bin(str+2, 4, mant, &exponent);
+ break;
+ case 'o': case 'O':
+ case 'q': case 'Q':
+ ok = ieee_flconvert_bin(str+2, 3, mant, &exponent);
+ break;
+ case 'b': case 'B':
+ case 'y': case 'Y':
+ ok = ieee_flconvert_bin(str+2, 1, mant, &exponent);
+ break;
+ case 'd': case 'D':
+ case 't': case 'T':
+ ok = ieee_flconvert(str+2, mant, &exponent);
+ break;
+ case 'p': case 'P':
+ return to_packed_bcd(str+2, strend-1, s, result, fmt);
+ default:
+ /* Leading zero was just a zero? */
+ ok = ieee_flconvert(str, mant, &exponent);
+ break;
+ }
+ } else if (str[0] == '$') {
+ ok = ieee_flconvert_bin(str+1, 4, mant, &exponent);
+ } else {
+ ok = ieee_flconvert(str, mant, &exponent);
+ }
+
+ if (!ok) {
+ type = FL_QNAN;
+ } else if (mant[0] & LIMB_TOP_BIT) {
+ /*
+ * Non-zero.
+ */
+ exponent--;
+ if (exponent >= 2 - expmax && exponent <= expmax) {
+ type = FL_NORMAL;
+ } else if (exponent > 0) {
+ if (pass0 == 1)
+ nasm_error(ERR_WARNING|ERR_WARN_FL_OVERFLOW|ERR_PASS1,
+ "overflow in floating-point constant");
+ type = FL_INFINITY;
+ } else {
+ /* underflow or denormal; the denormal code handles
+ actual underflow. */
+ type = FL_DENORMAL;
+ }
+ } else {
+ /* Zero */
+ type = FL_ZERO;
+ }
+ }
+
+ switch (type) {
+ case FL_ZERO:
+ zero:
+ memset(mant, 0, sizeof mant);
+ break;
+
+ case FL_DENORMAL:
+ {
+ shift = -(exponent + expmax - 2 - fmt->exponent)
+ + fmt->explicit;
+ ieee_shr(mant, shift);
+ ieee_round(minus, mant, bits);
+ if (mant[one_pos] & one_mask) {
+ /* One's position is set, we rounded up into normal range */
+ exponent = 1;
+ if (!fmt->explicit)
+ mant[one_pos] &= ~one_mask; /* remove explicit one */
+ mant[0] |= exponent << (LIMB_BITS-1 - fmt->exponent);
+ } else {
+ if (daz || is_zero(mant)) {
+ /* Flush denormals to zero */
+ nasm_error(ERR_WARNING|ERR_WARN_FL_UNDERFLOW|ERR_PASS1,
+ "underflow in floating-point constant");
+ goto zero;
+ } else {
+ nasm_error(ERR_WARNING|ERR_WARN_FL_DENORM|ERR_PASS1,
+ "denormal floating-point constant");
+ }
+ }
+ break;
+ }
+
+ case FL_NORMAL:
+ exponent += expmax - 1;
+ ieee_shr(mant, fmt->exponent+fmt->explicit);
+ ieee_round(minus, mant, bits);
+ /* did we scale up by one? */
+ if (test_bit(mant, fmt->exponent+fmt->explicit-1)) {
+ ieee_shr(mant, 1);
+ exponent++;
+ if (exponent >= (expmax << 1)-1) {
+ nasm_error(ERR_WARNING|ERR_WARN_FL_OVERFLOW|ERR_PASS1,
+ "overflow in floating-point constant");
+ type = FL_INFINITY;
+ goto overflow;
+ }
+ }
+
+ if (!fmt->explicit)
+ mant[one_pos] &= ~one_mask; /* remove explicit one */
+ mant[0] |= exponent << (LIMB_BITS-1 - fmt->exponent);
+ break;
+
+ case FL_INFINITY:
+ case FL_QNAN:
+ case FL_SNAN:
+ overflow:
+ memset(mant, 0, sizeof mant);
+ mant[0] = (((fp_limb)1 << fmt->exponent)-1)
+ << (LIMB_BITS-1 - fmt->exponent);
+ if (fmt->explicit)
+ mant[one_pos] |= one_mask;
+ if (type == FL_QNAN)
+ set_bit(mant, fmt->exponent+fmt->explicit+1);
+ else if (type == FL_SNAN)
+ set_bit(mant, fmt->exponent+fmt->explicit+fmt->mantissa);
+ break;
+ }
+
+ mant[0] |= minus ? LIMB_TOP_BIT : 0;
+
+ for (i = fmt->bytes - 1; i >= 0; i--)
+ *result++ = mant[i/LIMB_BYTES] >> (((LIMB_BYTES-1)-(i%LIMB_BYTES))*8);
+
+ return 1; /* success */
+}
+
+int float_const(const char *number, int sign, uint8_t *result, int bytes)
+{
+ switch (bytes) {
+ case 1:
+ return to_float(number, sign, result, &ieee_8);
+ case 2:
+ return to_float(number, sign, result, &ieee_16);
+ case 4:
+ return to_float(number, sign, result, &ieee_32);
+ case 8:
+ return to_float(number, sign, result, &ieee_64);
+ case 10:
+ return to_float(number, sign, result, &ieee_80);
+ case 16:
+ return to_float(number, sign, result, &ieee_128);
+ default:
+ nasm_panic(0, "strange value %d passed to float_const", bytes);
+ return 0;
+ }
+}
+
+/* Set floating-point options */
+int float_option(const char *option)
+{
+ if (!nasm_stricmp(option, "daz")) {
+ daz = true;
+ return 0;
+ } else if (!nasm_stricmp(option, "nodaz")) {
+ daz = false;
+ return 0;
+ } else if (!nasm_stricmp(option, "near")) {
+ rc = FLOAT_RC_NEAR;
+ return 0;
+ } else if (!nasm_stricmp(option, "down")) {
+ rc = FLOAT_RC_DOWN;
+ return 0;
+ } else if (!nasm_stricmp(option, "up")) {
+ rc = FLOAT_RC_UP;
+ return 0;
+ } else if (!nasm_stricmp(option, "zero")) {
+ rc = FLOAT_RC_ZERO;
+ return 0;
+ } else if (!nasm_stricmp(option, "default")) {
+ rc = FLOAT_RC_NEAR;
+ daz = false;
+ return 0;
+ } else {
+ return -1; /* Unknown option */
+ }
+}
diff --git a/asm/float.h b/asm/float.h
new file mode 100644
index 00000000..b07e542a
--- /dev/null
+++ b/asm/float.h
@@ -0,0 +1,54 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * float.h header file for the floating-point constant module of
+ * the Netwide Assembler
+ */
+
+#ifndef NASM_FLOAT_H
+#define NASM_FLOAT_H
+
+#include "nasm.h"
+
+enum float_round {
+ FLOAT_RC_NEAR,
+ FLOAT_RC_ZERO,
+ FLOAT_RC_DOWN,
+ FLOAT_RC_UP
+};
+
+int float_const(const char *string, int sign, uint8_t *result, int bytes);
+int float_option(const char *option);
+
+#endif
diff --git a/asm/labels.c b/asm/labels.c
new file mode 100644
index 00000000..a8df4780
--- /dev/null
+++ b/asm/labels.c
@@ -0,0 +1,530 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * labels.c label handling for the Netwide Assembler
+ */
+
+#include "compiler.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "hashtbl.h"
+#include "labels.h"
+
+/*
+ * A local label is one that begins with exactly one period. Things
+ * that begin with _two_ periods are NASM-specific things.
+ *
+ * If TASM compatibility is enabled, a local label can also begin with
+ * @@, so @@local is a TASM compatible local label. Note that we only
+ * check for the first @ symbol, although TASM requires both.
+ */
+#define islocal(l) \
+ (tasm_compatible_mode ? \
+ (((l)[0] == '.' || (l)[0] == '@') && (l)[1] != '.') : \
+ ((l)[0] == '.' && (l)[1] != '.'))
+#define islocalchar(c) \
+ (tasm_compatible_mode ? \
+ ((c) == '.' || (c) == '@') : \
+ ((c) == '.'))
+
+#define LABEL_BLOCK 128 /* no. of labels/block */
+#define LBLK_SIZE (LABEL_BLOCK * sizeof(union label))
+
+#define END_LIST -3 /* don't clash with NO_SEG! */
+#define END_BLOCK -2
+#define BOGUS_VALUE -4
+
+#define PERMTS_SIZE 16384 /* size of text blocks */
+#if (PERMTS_SIZE < IDLEN_MAX)
+ #error "IPERMTS_SIZE must be greater than or equal to IDLEN_MAX"
+#endif
+
+/* values for label.defn.is_global */
+#define DEFINED_BIT 1
+#define GLOBAL_BIT 2
+#define EXTERN_BIT 4
+#define COMMON_BIT 8
+
+#define NOT_DEFINED_YET 0
+#define TYPE_MASK 3
+#define LOCAL_SYMBOL (DEFINED_BIT)
+#define GLOBAL_PLACEHOLDER (GLOBAL_BIT)
+#define GLOBAL_SYMBOL (DEFINED_BIT | GLOBAL_BIT)
+
+union label { /* actual label structures */
+ struct {
+ int32_t segment;
+ int64_t offset;
+ char *label, *special;
+ int is_global, is_norm;
+ } defn;
+ struct {
+ int32_t movingon;
+ int64_t dummy;
+ union label *next;
+ } admin;
+};
+
+struct permts { /* permanent text storage */
+ struct permts *next; /* for the linked list */
+ int size, usage; /* size and used space in ... */
+ char data[PERMTS_SIZE]; /* ... the data block itself */
+};
+
+extern int64_t global_offset_changed; /* defined in nasm.c */
+
+static struct hash_table ltab; /* labels hash table */
+static union label *ldata; /* all label data blocks */
+static union label *lfree; /* labels free block */
+static struct permts *perm_head; /* start of perm. text storage */
+static struct permts *perm_tail; /* end of perm. text storage */
+
+static void init_block(union label *blk);
+static char *perm_copy(const char *string);
+
+static char *prevlabel;
+
+static bool initialized = false;
+
+char lprefix[PREFIX_MAX] = { 0 };
+char lpostfix[PREFIX_MAX] = { 0 };
+
+/*
+ * Emit a symdef to the output and the debug format backends.
+ */
+static void out_symdef(char *name, int32_t segment, int64_t offset,
+ int is_global, char *special)
+{
+ ofmt->symdef(name, segment, offset, is_global, special);
+
+ /*
+ * NASM special symbols are not passed to the debug format; none
+ * of the current backends want to see them.
+ */
+ if (!(name[0] == '.' && name[1] == '.' && name[2] != '@'))
+ dfmt->debug_deflabel(name, segment, offset, is_global, special);
+}
+
+/*
+ * Internal routine: finds the `union label' corresponding to the
+ * given label name. Creates a new one, if it isn't found, and if
+ * `create' is true.
+ */
+static union label *find_label(char *label, int create)
+{
+ char *prev;
+ int prevlen, len;
+ union label *lptr, **lpp;
+ char label_str[IDLEN_MAX];
+ struct hash_insert ip;
+
+ if (islocal(label)) {
+ prev = prevlabel;
+ prevlen = strlen(prev);
+ len = strlen(label);
+ if (prevlen + len >= IDLEN_MAX) {
+ nasm_error(ERR_NONFATAL, "identifier length exceed %i bytes",
+ IDLEN_MAX);
+ return NULL;
+ }
+ memcpy(label_str, prev, prevlen);
+ memcpy(label_str+prevlen, label, len+1);
+ label = label_str;
+ } else {
+ prev = "";
+ prevlen = 0;
+ }
+
+ lpp = (union label **) hash_find(&ltab, label, &ip);
+ lptr = lpp ? *lpp : NULL;
+
+ if (lptr || !create)
+ return lptr;
+
+ /* Create a new label... */
+ if (lfree->admin.movingon == END_BLOCK) {
+ /*
+ * must allocate a new block
+ */
+ lfree->admin.next = (union label *)nasm_malloc(LBLK_SIZE);
+ lfree = lfree->admin.next;
+ init_block(lfree);
+ }
+
+ lfree->admin.movingon = BOGUS_VALUE;
+ lfree->defn.label = perm_copy(label);
+ lfree->defn.special = NULL;
+ lfree->defn.is_global = NOT_DEFINED_YET;
+
+ hash_add(&ip, lfree->defn.label, lfree);
+ return lfree++;
+}
+
+bool lookup_label(char *label, int32_t *segment, int64_t *offset)
+{
+ union label *lptr;
+
+ if (!initialized)
+ return false;
+
+ lptr = find_label(label, 0);
+ if (lptr && (lptr->defn.is_global & DEFINED_BIT)) {
+ *segment = lptr->defn.segment;
+ *offset = lptr->defn.offset;
+ return true;
+ }
+
+ return false;
+}
+
+bool is_extern(char *label)
+{
+ union label *lptr;
+
+ if (!initialized)
+ return false;
+
+ lptr = find_label(label, 0);
+ return (lptr && (lptr->defn.is_global & EXTERN_BIT));
+}
+
+void redefine_label(char *label, int32_t segment, int64_t offset, char *special,
+ bool is_norm, bool isextrn)
+{
+ union label *lptr;
+ int exi;
+
+ /* This routine possibly ought to check for phase errors. Most assemblers
+ * check for phase errors at this point. I don't know whether phase errors
+ * are even possible, nor whether they are checked somewhere else
+ */
+
+ (void)special; /* Don't warn that this parameter is unused */
+ (void)is_norm; /* Don't warn that this parameter is unused */
+ (void)isextrn; /* Don't warn that this parameter is unused */
+
+#ifdef DEBUG
+#if DEBUG < 3
+ if (!strncmp(label, "debugdump", 9))
+#endif
+ nasm_error(ERR_DEBUG, "redefine_label (%s, %"PRIx32", %"PRIx64", %s, %d, %d)",
+ label, segment, offset, special, is_norm, isextrn);
+#endif
+
+ lptr = find_label(label, 1);
+ if (!lptr)
+ nasm_panic(0, "can't find label `%s' on pass two", label);
+
+ if (!islocal(label)) {
+ if (!islocalchar(*label) && lptr->defn.is_norm)
+ prevlabel = lptr->defn.label;
+ }
+
+ if (lptr->defn.offset != offset)
+ global_offset_changed++;
+
+ lptr->defn.offset = offset;
+ lptr->defn.segment = segment;
+
+ if (pass0 == 1) {
+ exi = !!(lptr->defn.is_global & GLOBAL_BIT);
+ if (exi) {
+ char *xsymbol;
+ int slen;
+ slen = strlen(lprefix);
+ slen += strlen(lptr->defn.label);
+ slen += strlen(lpostfix);
+ slen++; /* room for that null char */
+ xsymbol = nasm_malloc(slen);
+ snprintf(xsymbol, slen, "%s%s%s", lprefix, lptr->defn.label,
+ lpostfix);
+
+ out_symdef(xsymbol, segment, offset, exi,
+ special ? special : lptr->defn.special);
+ /** nasm_free(xsymbol); ! outobj.c stores the pointer; ouch!!! **/
+ } else {
+ if ((lptr->defn.is_global & (GLOBAL_BIT | EXTERN_BIT)) != EXTERN_BIT) {
+ out_symdef(lptr->defn.label, segment, offset, exi,
+ special ? special : lptr->defn.special);
+ }
+ }
+ } /* if (pass0 == 1) */
+}
+
+void define_label(char *label, int32_t segment, int64_t offset, char *special,
+ bool is_norm, bool isextrn)
+{
+ union label *lptr;
+ int exi;
+
+#ifdef DEBUG
+#if DEBUG<3
+ if (!strncmp(label, "debugdump", 9))
+#endif
+ nasm_error(ERR_DEBUG, "define_label (%s, %"PRIx32", %"PRIx64", %s, %d, %d)",
+ label, segment, offset, special, is_norm, isextrn);
+#endif
+ lptr = find_label(label, 1);
+ if (!lptr)
+ return;
+ if (lptr->defn.is_global & DEFINED_BIT) {
+ nasm_error(ERR_NONFATAL, "symbol `%s' redefined", label);
+ return;
+ }
+ lptr->defn.is_global |= DEFINED_BIT;
+ if (isextrn)
+ lptr->defn.is_global |= EXTERN_BIT;
+
+ if (!islocalchar(label[0]) && is_norm) {
+ /* not local, but not special either */
+ prevlabel = lptr->defn.label;
+ } else if (islocal(label) && !*prevlabel) {
+ nasm_error(ERR_NONFATAL, "attempt to define a local label before any"
+ " non-local labels");
+ }
+
+ lptr->defn.segment = segment;
+ lptr->defn.offset = offset;
+ lptr->defn.is_norm = (!islocalchar(label[0]) && is_norm);
+
+ if (pass0 == 1 || (!is_norm && !isextrn && (segment > 0) && (segment & 1))) {
+ exi = !!(lptr->defn.is_global & GLOBAL_BIT);
+ if (exi) {
+ char *xsymbol;
+ int slen;
+ slen = strlen(lprefix);
+ slen += strlen(lptr->defn.label);
+ slen += strlen(lpostfix);
+ slen++; /* room for that null char */
+ xsymbol = nasm_malloc(slen);
+ snprintf(xsymbol, slen, "%s%s%s", lprefix, lptr->defn.label,
+ lpostfix);
+
+ out_symdef(xsymbol, segment, offset, exi,
+ special ? special : lptr->defn.special);
+ /** nasm_free(xsymbol); ! outobj.c stores the pointer; ouch!!! **/
+ } else {
+ if ((lptr->defn.is_global & (GLOBAL_BIT | EXTERN_BIT)) != EXTERN_BIT) {
+ out_symdef(lptr->defn.label, segment, offset, exi,
+ special ? special : lptr->defn.special);
+ }
+ }
+ } /* if (pass0 == 1) */
+}
+
+void define_common(char *label, int32_t segment, int32_t size, char *special)
+{
+ union label *lptr;
+
+ lptr = find_label(label, 1);
+ if (!lptr)
+ return;
+ if ((lptr->defn.is_global & DEFINED_BIT) &&
+ (passn == 1 || !(lptr->defn.is_global & COMMON_BIT))) {
+ nasm_error(ERR_NONFATAL, "symbol `%s' redefined", label);
+ return;
+ }
+ lptr->defn.is_global |= DEFINED_BIT|COMMON_BIT;
+
+ if (!islocalchar(label[0])) {
+ prevlabel = lptr->defn.label;
+ } else {
+ nasm_error(ERR_NONFATAL, "attempt to define a local label as a "
+ "common variable");
+ return;
+ }
+
+ lptr->defn.segment = segment;
+ lptr->defn.offset = 0;
+
+ if (pass0 == 0)
+ return;
+
+ out_symdef(lptr->defn.label, segment, size, 2,
+ special ? special : lptr->defn.special);
+}
+
+void declare_as_global(char *label, char *special)
+{
+ union label *lptr;
+
+ if (islocal(label)) {
+ nasm_error(ERR_NONFATAL, "attempt to declare local symbol `%s' as"
+ " global", label);
+ return;
+ }
+ lptr = find_label(label, 1);
+ if (!lptr)
+ return;
+ switch (lptr->defn.is_global & TYPE_MASK) {
+ case NOT_DEFINED_YET:
+ lptr->defn.is_global = GLOBAL_PLACEHOLDER;
+ lptr->defn.special = special ? perm_copy(special) : NULL;
+ break;
+ case GLOBAL_PLACEHOLDER: /* already done: silently ignore */
+ case GLOBAL_SYMBOL:
+ break;
+ case LOCAL_SYMBOL:
+ if (!(lptr->defn.is_global & EXTERN_BIT)) {
+ nasm_error(ERR_WARNING, "symbol `%s': GLOBAL directive "
+ "after symbol definition is an experimental feature", label);
+ lptr->defn.is_global = GLOBAL_SYMBOL;
+ }
+ break;
+ }
+}
+
+int init_labels(void)
+{
+ hash_init(&ltab, HASH_LARGE);
+
+ ldata = lfree = (union label *)nasm_malloc(LBLK_SIZE);
+ init_block(lfree);
+
+ perm_head = perm_tail =
+ (struct permts *)nasm_malloc(sizeof(struct permts));
+
+ perm_head->next = NULL;
+ perm_head->size = PERMTS_SIZE;
+ perm_head->usage = 0;
+
+ prevlabel = "";
+
+ initialized = true;
+
+ return 0;
+}
+
+void cleanup_labels(void)
+{
+ union label *lptr, *lhold;
+
+ initialized = false;
+
+ hash_free(&ltab);
+
+ lptr = lhold = ldata;
+ while (lptr) {
+ lptr = &lptr[LABEL_BLOCK-1];
+ lptr = lptr->admin.next;
+ nasm_free(lhold);
+ lhold = lptr;
+ }
+
+ while (perm_head) {
+ perm_tail = perm_head;
+ perm_head = perm_head->next;
+ nasm_free(perm_tail);
+ }
+}
+
+static void init_block(union label *blk)
+{
+ int j;
+
+ for (j = 0; j < LABEL_BLOCK - 1; j++)
+ blk[j].admin.movingon = END_LIST;
+ blk[LABEL_BLOCK - 1].admin.movingon = END_BLOCK;
+ blk[LABEL_BLOCK - 1].admin.next = NULL;
+}
+
+static char *perm_copy(const char *string)
+{
+ char *p;
+ int len = strlen(string)+1;
+
+ nasm_assert(len <= PERMTS_SIZE);
+
+ if (perm_tail->size - perm_tail->usage < len) {
+ perm_tail->next =
+ (struct permts *)nasm_malloc(sizeof(struct permts));
+ perm_tail = perm_tail->next;
+ perm_tail->next = NULL;
+ perm_tail->size = PERMTS_SIZE;
+ perm_tail->usage = 0;
+ }
+ p = perm_tail->data + perm_tail->usage;
+ memcpy(p, string, len);
+ perm_tail->usage += len;
+
+ return p;
+}
+
+char *local_scope(char *label)
+{
+ return islocal(label) ? prevlabel : "";
+}
+
+/*
+ * Notes regarding bug involving redefinition of external segments.
+ *
+ * Up to and including v0.97, the following code didn't work. From 0.97
+ * developers release 2 onwards, it will generate an error.
+ *
+ * EXTERN extlabel
+ * newlabel EQU extlabel + 1
+ *
+ * The results of allowing this code through are that two import records
+ * are generated, one for 'extlabel' and one for 'newlabel'.
+ *
+ * The reason for this is an inadequacy in the defined interface between
+ * the label manager and the output formats. The problem lies in how the
+ * output format driver tells that a label is an external label for which
+ * a label import record must be produced. Most (all except bin?) produce
+ * the record if the segment number of the label is not one of the internal
+ * segments that the output driver is producing.
+ *
+ * A simple fix to this would be to make the output formats keep track of
+ * which symbols they've produced import records for, and make them not
+ * produce import records for segments that are already defined.
+ *
+ * The best way, which is slightly harder but reduces duplication of code
+ * and should therefore make the entire system smaller and more stable is
+ * to change the interface between assembler, define_label(), and
+ * the output module. The changes that are needed are:
+ *
+ * The semantics of the 'isextern' flag passed to define_label() need
+ * examining. This information may or may not tell us what we need to
+ * know (ie should we be generating an import record at this point for this
+ * label). If these aren't the semantics, the semantics should be changed
+ * to this.
+ *
+ * The output module interface needs changing, so that the `isextern' flag
+ * is passed to the module, so that it can be easily tested for.
+ */
diff --git a/asm/listing.c b/asm/listing.c
new file mode 100644
index 00000000..084386dc
--- /dev/null
+++ b/asm/listing.c
@@ -0,0 +1,338 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * listing.c listing file generator for the Netwide Assembler
+ */
+
+#include "compiler.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "listing.h"
+
+#define LIST_MAX_LEN 216 /* something sensible */
+#define LIST_INDENT 40
+#define LIST_HEXBIT 18
+
+typedef struct MacroInhibit MacroInhibit;
+
+static struct MacroInhibit {
+ MacroInhibit *next;
+ int level;
+ int inhibiting;
+} *mistack;
+
+static char xdigit[] = "0123456789ABCDEF";
+
+#define HEX(a,b) (*(a)=xdigit[((b)>>4)&15],(a)[1]=xdigit[(b)&15]);
+
+static char listline[LIST_MAX_LEN];
+static bool listlinep;
+
+static char listerror[LIST_MAX_LEN];
+
+static char listdata[2 * LIST_INDENT]; /* we need less than that actually */
+static int32_t listoffset;
+
+static int32_t listlineno;
+
+static int32_t listp;
+
+static int suppress; /* for INCBIN & TIMES special cases */
+
+static int listlevel, listlevel_e;
+
+static FILE *listfp;
+
+static void list_emit(void)
+{
+ int i;
+
+ if (!listlinep && !listdata[0])
+ return;
+
+ fprintf(listfp, "%6"PRId32" ", listlineno);
+
+ if (listdata[0])
+ fprintf(listfp, "%08"PRIX32" %-*s", listoffset, LIST_HEXBIT + 1,
+ listdata);
+ else
+ fprintf(listfp, "%*s", LIST_HEXBIT + 10, "");
+
+ if (listlevel_e)
+ fprintf(listfp, "%s<%d>", (listlevel < 10 ? " " : ""),
+ listlevel_e);
+ else if (listlinep)
+ fprintf(listfp, " ");
+
+ if (listlinep)
+ fprintf(listfp, " %s", listline);
+
+ putc('\n', listfp);
+ listlinep = false;
+ listdata[0] = '\0';
+
+ if (listerror[0]) {
+ fprintf(listfp, "%6"PRId32" ", listlineno);
+ for (i = 0; i < LIST_HEXBIT; i++)
+ putc('*', listfp);
+
+ if (listlevel_e)
+ fprintf(listfp, " %s<%d>", (listlevel < 10 ? " " : ""),
+ listlevel_e);
+ else
+ fprintf(listfp, " ");
+
+ fprintf(listfp, " %s\n", listerror);
+ listerror[0] = '\0';
+ }
+}
+
+static void list_init(const char *fname)
+{
+ if (!fname || fname[0] == '\0') {
+ listfp = NULL;
+ return;
+ }
+
+ listfp = nasm_open_write(fname, NF_TEXT);
+ if (!listfp) {
+ nasm_error(ERR_NONFATAL, "unable to open listing file `%s'",
+ fname);
+ return;
+ }
+
+ *listline = '\0';
+ listlineno = 0;
+ *listerror = '\0';
+ listp = true;
+ listlevel = 0;
+ suppress = 0;
+ mistack = nasm_malloc(sizeof(MacroInhibit));
+ mistack->next = NULL;
+ mistack->level = 0;
+ mistack->inhibiting = true;
+}
+
+static void list_cleanup(void)
+{
+ if (!listp)
+ return;
+
+ while (mistack) {
+ MacroInhibit *temp = mistack;
+ mistack = temp->next;
+ nasm_free(temp);
+ }
+
+ list_emit();
+ fclose(listfp);
+}
+
+static void list_out(int32_t offset, char *str)
+{
+ if (strlen(listdata) + strlen(str) > LIST_HEXBIT) {
+ strcat(listdata, "-");
+ list_emit();
+ }
+ if (!listdata[0])
+ listoffset = offset;
+ strcat(listdata, str);
+}
+
+static void list_address(int32_t offset, const char *brackets,
+ int64_t addr, int size)
+{
+ char q[20];
+ char *r = q;
+
+ nasm_assert(size <= 8);
+
+ *r++ = brackets[0];
+ while (size--) {
+ HEX(r, addr);
+ addr >>= 8;
+ r += 2;
+ }
+ *r++ = brackets[1];
+ *r = '\0';
+ list_out(offset, q);
+}
+
+static void list_output(int32_t offset, const void *data,
+ enum out_type type, uint64_t size)
+{
+ char q[20];
+
+ if (!listp || suppress || user_nolist)
+ return;
+
+ switch (type) {
+ case OUT_RAWDATA:
+ {
+ uint8_t const *p = data;
+
+ if (size == 0 && !listdata[0])
+ listoffset = offset;
+ while (size--) {
+ HEX(q, *p);
+ q[2] = '\0';
+ list_out(offset++, q);
+ p++;
+ }
+ break;
+ }
+ case OUT_ADDRESS:
+ list_address(offset, "[]", *(int64_t *)data, abs((int)size));
+ break;
+ case OUT_REL1ADR:
+ list_address(offset, "()", *(int64_t *)data, 1);
+ break;
+ case OUT_REL2ADR:
+ list_address(offset, "()", *(int64_t *)data, 2);
+ break;
+ case OUT_REL4ADR:
+ list_address(offset, "()", *(int64_t *)data, 4);
+ break;
+ case OUT_REL8ADR:
+ list_address(offset, "()", *(int64_t *)data, 8);
+ break;
+ case OUT_RESERVE:
+ {
+ snprintf(q, sizeof(q), "<res %08"PRIX64">", size);
+ list_out(offset, q);
+ break;
+ }
+ }
+}
+
+static void list_line(int type, char *line)
+{
+ if (!listp)
+ return;
+
+ if (user_nolist)
+ return;
+
+ if (mistack && mistack->inhibiting) {
+ if (type == LIST_MACRO)
+ return;
+ else { /* pop the m i stack */
+ MacroInhibit *temp = mistack;
+ mistack = temp->next;
+ nasm_free(temp);
+ }
+ }
+ list_emit();
+ listlineno = src_get_linnum();
+ listlinep = true;
+ strncpy(listline, line, LIST_MAX_LEN - 1);
+ listline[LIST_MAX_LEN - 1] = '\0';
+ listlevel_e = listlevel;
+}
+
+static void list_uplevel(int type)
+{
+ if (!listp)
+ return;
+ if (type == LIST_INCBIN || type == LIST_TIMES) {
+ suppress |= (type == LIST_INCBIN ? 1 : 2);
+ list_out(listoffset, type == LIST_INCBIN ? "<incbin>" : "<rept>");
+ return;
+ }
+
+ listlevel++;
+
+ if (mistack && mistack->inhibiting && type == LIST_INCLUDE) {
+ MacroInhibit *temp = nasm_malloc(sizeof(MacroInhibit));
+ temp->next = mistack;
+ temp->level = listlevel;
+ temp->inhibiting = false;
+ mistack = temp;
+ } else if (type == LIST_MACRO_NOLIST) {
+ MacroInhibit *temp = nasm_malloc(sizeof(MacroInhibit));
+ temp->next = mistack;
+ temp->level = listlevel;
+ temp->inhibiting = true;
+ mistack = temp;
+ }
+}
+
+static void list_downlevel(int type)
+{
+ if (!listp)
+ return;
+
+ if (type == LIST_INCBIN || type == LIST_TIMES) {
+ suppress &= ~(type == LIST_INCBIN ? 1 : 2);
+ return;
+ }
+
+ listlevel--;
+ while (mistack && mistack->level > listlevel) {
+ MacroInhibit *temp = mistack;
+ mistack = temp->next;
+ nasm_free(temp);
+ }
+}
+
+static void list_error(int severity, const char *pfx, const char *msg)
+{
+ if (!listfp)
+ return;
+
+ snprintf(listerror, sizeof listerror, "%s%s", pfx, msg);
+
+ if ((severity & ERR_MASK) >= ERR_FATAL)
+ list_emit();
+}
+
+
+static const struct lfmt nasm_list = {
+ list_init,
+ list_cleanup,
+ list_output,
+ list_line,
+ list_uplevel,
+ list_downlevel,
+ list_error
+};
+
+const struct lfmt *lfmt = &nasm_list;
diff --git a/asm/listing.h b/asm/listing.h
new file mode 100644
index 00000000..58b5eb8a
--- /dev/null
+++ b/asm/listing.h
@@ -0,0 +1,108 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * listing.h header file for listing.c
+ */
+
+#ifndef NASM_LISTING_H
+#define NASM_LISTING_H
+
+/*
+ * List-file generators should look like this:
+ */
+struct lfmt {
+ /*
+ * Called to initialize the listing file generator. Before this
+ * is called, the other routines will silently do nothing when
+ * called. The `char *' parameter is the file name to write the
+ * listing to.
+ */
+ void (*init)(const char *fname);
+
+ /*
+ * Called to clear stuff up and close the listing file.
+ */
+ void (*cleanup)(void);
+
+ /*
+ * Called to output binary data. Parameters are: the offset;
+ * the data; the data type. Data types are similar to the
+ * output-format interface, only OUT_ADDRESS will _always_ be
+ * displayed as if it's relocatable, so ensure that any non-
+ * relocatable address has been converted to OUT_RAWDATA by
+ * then. Note that OUT_RAWDATA,0 is a valid data type, and is a
+ * dummy call used to give the listing generator an offset to
+ * work with when doing things like uplevel(LIST_TIMES) or
+ * uplevel(LIST_INCBIN).
+ */
+ void (*output)(int32_t offset, const void *data, enum out_type type, uint64_t size);
+
+ /*
+ * Called to send a text line to the listing generator. The
+ * `int' parameter is LIST_READ or LIST_MACRO depending on
+ * whether the line came directly from an input file or is the
+ * result of a multi-line macro expansion.
+ */
+ void (*line)(int type, char *line);
+
+ /*
+ * Called to change one of the various levelled mechanisms in
+ * the listing generator. LIST_INCLUDE and LIST_MACRO can be
+ * used to increase the nesting level of include files and
+ * macro expansions; LIST_TIMES and LIST_INCBIN switch on the
+ * two binary-output-suppression mechanisms for large-scale
+ * pseudo-instructions.
+ *
+ * LIST_MACRO_NOLIST is synonymous with LIST_MACRO except that
+ * it indicates the beginning of the expansion of a `nolist'
+ * macro, so anything under that level won't be expanded unless
+ * it includes another file.
+ */
+ void (*uplevel)(int type);
+
+ /*
+ * Reverse the effects of uplevel.
+ */
+ void (*downlevel)(int type);
+
+ /*
+ * Called on a warning or error, with the error message.
+ */
+ void (*error)(int severity, const char *pfx, const char *msg);
+};
+
+extern const struct lfmt *lfmt;
+extern bool user_nolist;
+
+#endif
diff --git a/asm/nasm.c b/asm/nasm.c
new file mode 100644
index 00000000..ca5584ea
--- /dev/null
+++ b/asm/nasm.c
@@ -0,0 +1,2142 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * The Netwide Assembler main program module
+ */
+
+#include "compiler.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <time.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "saa.h"
+#include "raa.h"
+#include "float.h"
+#include "stdscan.h"
+#include "insns.h"
+#include "preproc.h"
+#include "parser.h"
+#include "eval.h"
+#include "assemble.h"
+#include "labels.h"
+#include "outform.h"
+#include "listing.h"
+#include "iflag.h"
+#include "ver.h"
+
+/*
+ * This is the maximum number of optimization passes to do. If we ever
+ * find a case where the optimizer doesn't naturally converge, we might
+ * have to drop this value so the assembler doesn't appear to just hang.
+ */
+#define MAX_OPTIMIZE (INT_MAX >> 1)
+
+struct forwrefinfo { /* info held on forward refs. */
+ int lineno;
+ int operand;
+};
+
+static int get_bits(char *value);
+static iflag_t get_cpu(char *cpu_str);
+static void parse_cmdline(int, char **);
+static void assemble_file(char *, StrList **);
+static bool is_suppressed_warning(int severity);
+static bool skip_this_pass(int severity);
+static void nasm_verror_gnu(int severity, const char *fmt, va_list args);
+static void nasm_verror_vc(int severity, const char *fmt, va_list args);
+static void nasm_verror_common(int severity, const char *fmt, va_list args);
+static void usage(void);
+
+static bool using_debug_info, opt_verbose_info;
+static const char *debug_format;
+
+bool tasm_compatible_mode = false;
+int pass0, passn;
+int globalrel = 0;
+int globalbnd = 0;
+
+static time_t official_compile_time;
+
+static char inname[FILENAME_MAX];
+static char outname[FILENAME_MAX];
+static char listname[FILENAME_MAX];
+static char errname[FILENAME_MAX];
+static int globallineno; /* for forward-reference tracking */
+/* static int pass = 0; */
+const struct ofmt *ofmt = &OF_DEFAULT;
+const struct ofmt_alias *ofmt_alias = NULL;
+const struct dfmt *dfmt;
+
+static FILE *error_file; /* Where to write error messages */
+
+FILE *ofile = NULL;
+int optimizing = MAX_OPTIMIZE; /* number of optimization passes to take */
+static int sb, cmd_sb = 16; /* by default */
+
+static iflag_t cpu;
+static iflag_t cmd_cpu;
+
+int64_t global_offset_changed; /* referenced in labels.c */
+int64_t prev_offset_changed;
+int32_t stall_count;
+
+struct location location;
+int in_abs_seg; /* Flag we are in ABSOLUTE seg */
+int32_t abs_seg; /* ABSOLUTE segment basis */
+int32_t abs_offset; /* ABSOLUTE offset */
+
+static struct RAA *offsets;
+
+static struct SAA *forwrefs; /* keep track of forward references */
+static const struct forwrefinfo *forwref;
+
+static const struct preproc_ops *preproc;
+
+#define OP_NORMAL (1u << 0)
+#define OP_PREPROCESS (1u << 1)
+#define OP_DEPEND (1u << 2)
+
+static unsigned int operating_mode;
+
+/* Dependency flags */
+static bool depend_emit_phony = false;
+static bool depend_missing_ok = false;
+static const char *depend_target = NULL;
+static const char *depend_file = NULL;
+
+/*
+ * Which of the suppressible warnings are suppressed. Entry zero
+ * isn't an actual warning, but it used for -w+error/-Werror.
+ */
+
+static bool warning_on[ERR_WARN_MAX+1]; /* Current state */
+static bool warning_on_global[ERR_WARN_MAX+1]; /* Command-line state */
+
+static const struct warning {
+ const char *name;
+ const char *help;
+ bool enabled;
+} warnings[ERR_WARN_MAX+1] = {
+ {"error", "treat warnings as errors", false},
+ {"macro-params", "macro calls with wrong parameter count", true},
+ {"macro-selfref", "cyclic macro references", false},
+ {"macro-defaults", "macros with more default than optional parameters", true},
+ {"orphan-labels", "labels alone on lines without trailing `:'", true},
+ {"number-overflow", "numeric constant does not fit", true},
+ {"gnu-elf-extensions", "using 8- or 16-bit relocation in ELF32, a GNU extension", false},
+ {"float-overflow", "floating point overflow", true},
+ {"float-denorm", "floating point denormal", false},
+ {"float-underflow", "floating point underflow", false},
+ {"float-toolong", "too many digits in floating-point number", true},
+ {"user", "%warning directives", true},
+ {"lock", "lock prefix on unlockable instructions", true},
+ {"hle", "invalid hle prefixes", true},
+ {"bnd", "invalid bnd prefixes", true},
+ {"zext-reloc", "relocation zero-extended to match output format", true},
+ {"ptr", "non-NASM keyword used in other assemblers", true},
+};
+
+static bool want_usage;
+static bool terminate_after_phase;
+bool user_nolist = false;
+
+static char *quote_for_make(const char *str);
+
+static int64_t get_curr_offs(void)
+{
+ return in_abs_seg ? abs_offset : raa_read(offsets, location.segment);
+}
+
+static void set_curr_offs(int64_t l_off)
+{
+ if (in_abs_seg)
+ abs_offset = l_off;
+ else
+ offsets = raa_write(offsets, location.segment, l_off);
+}
+
+static void nasm_fputs(const char *line, FILE * outfile)
+{
+ if (outfile) {
+ fputs(line, outfile);
+ putc('\n', outfile);
+ } else
+ puts(line);
+}
+
+/* Convert a struct tm to a POSIX-style time constant */
+static int64_t make_posix_time(struct tm *tm)
+{
+ int64_t t;
+ int64_t y = tm->tm_year;
+
+ /* See IEEE 1003.1:2004, section 4.14 */
+
+ t = (y-70)*365 + (y-69)/4 - (y-1)/100 + (y+299)/400;
+ t += tm->tm_yday;
+ t *= 24;
+ t += tm->tm_hour;
+ t *= 60;
+ t += tm->tm_min;
+ t *= 60;
+ t += tm->tm_sec;
+
+ return t;
+}
+
+static void define_macros_early(void)
+{
+ char temp[128];
+ struct tm lt, *lt_p, gm, *gm_p;
+ int64_t posix_time;
+
+ lt_p = localtime(&official_compile_time);
+ if (lt_p) {
+ lt = *lt_p;
+
+ strftime(temp, sizeof temp, "__DATE__=\"%Y-%m-%d\"", &lt);
+ preproc->pre_define(temp);
+ strftime(temp, sizeof temp, "__DATE_NUM__=%Y%m%d", &lt);
+ preproc->pre_define(temp);
+ strftime(temp, sizeof temp, "__TIME__=\"%H:%M:%S\"", &lt);
+ preproc->pre_define(temp);
+ strftime(temp, sizeof temp, "__TIME_NUM__=%H%M%S", &lt);
+ preproc->pre_define(temp);
+ }
+
+ gm_p = gmtime(&official_compile_time);
+ if (gm_p) {
+ gm = *gm_p;
+
+ strftime(temp, sizeof temp, "__UTC_DATE__=\"%Y-%m-%d\"", &gm);
+ preproc->pre_define(temp);
+ strftime(temp, sizeof temp, "__UTC_DATE_NUM__=%Y%m%d", &gm);
+ preproc->pre_define(temp);
+ strftime(temp, sizeof temp, "__UTC_TIME__=\"%H:%M:%S\"", &gm);
+ preproc->pre_define(temp);
+ strftime(temp, sizeof temp, "__UTC_TIME_NUM__=%H%M%S", &gm);
+ preproc->pre_define(temp);
+ }
+
+ if (gm_p)
+ posix_time = make_posix_time(&gm);
+ else if (lt_p)
+ posix_time = make_posix_time(&lt);
+ else
+ posix_time = 0;
+
+ if (posix_time) {
+ snprintf(temp, sizeof temp, "__POSIX_TIME__=%"PRId64, posix_time);
+ preproc->pre_define(temp);
+ }
+}
+
+static void define_macros_late(void)
+{
+ char temp[128];
+
+ /*
+ * In case if output format is defined by alias
+ * we have to put shortname of the alias itself here
+ * otherwise ABI backward compatibility gets broken.
+ */
+ snprintf(temp, sizeof(temp), "__OUTPUT_FORMAT__=%s",
+ ofmt_alias ? ofmt_alias->shortname : ofmt->shortname);
+ preproc->pre_define(temp);
+}
+
+static void emit_dependencies(StrList *list)
+{
+ FILE *deps;
+ int linepos, len;
+ StrList *l, *nl;
+
+ if (depend_file && strcmp(depend_file, "-")) {
+ deps = nasm_open_write(depend_file, NF_TEXT);
+ if (!deps) {
+ nasm_error(ERR_NONFATAL|ERR_NOFILE|ERR_USAGE,
+ "unable to write dependency file `%s'", depend_file);
+ return;
+ }
+ } else {
+ deps = stdout;
+ }
+
+ linepos = fprintf(deps, "%s:", depend_target);
+ list_for_each(l, list) {
+ char *file = quote_for_make(l->str);
+ len = strlen(file);
+ if (linepos + len > 62 && linepos > 1) {
+ fprintf(deps, " \\\n ");
+ linepos = 1;
+ }
+ fprintf(deps, " %s", file);
+ linepos += len+1;
+ nasm_free(file);
+ }
+ fprintf(deps, "\n\n");
+
+ list_for_each_safe(l, nl, list) {
+ if (depend_emit_phony)
+ fprintf(deps, "%s:\n\n", l->str);
+ nasm_free(l);
+ }
+
+ if (deps != stdout)
+ fclose(deps);
+}
+
+int main(int argc, char **argv)
+{
+ StrList *depend_list = NULL, **depend_ptr;
+
+ time(&official_compile_time);
+
+ iflag_set(&cpu, IF_PLEVEL);
+ iflag_set(&cmd_cpu, IF_PLEVEL);
+
+ pass0 = 0;
+ want_usage = terminate_after_phase = false;
+ nasm_set_verror(nasm_verror_gnu);
+
+ error_file = stderr;
+
+ tolower_init();
+ src_init();
+
+ offsets = raa_init();
+ forwrefs = saa_init((int32_t)sizeof(struct forwrefinfo));
+
+ preproc = &nasmpp;
+ operating_mode = OP_NORMAL;
+
+ /* Define some macros dependent on the runtime, but not
+ on the command line. */
+ define_macros_early();
+
+ parse_cmdline(argc, argv);
+
+ if (terminate_after_phase) {
+ if (want_usage)
+ usage();
+ return 1;
+ }
+
+ if (!using_debug_info) {
+ /* No debug info, redirect to the null backend (empty stubs) */
+ dfmt = &null_debug_form;
+ } else if (!debug_format) {
+ /* Default debug format for this backend */
+ dfmt = ofmt->default_dfmt;
+ } else {
+ dfmt = dfmt_find(ofmt, debug_format);
+ if (!dfmt) {
+ nasm_fatal(ERR_NOFILE | ERR_USAGE,
+ "unrecognized debug format `%s' for"
+ " output format `%s'",
+ debug_format, ofmt->shortname);
+ }
+ }
+
+ if (ofmt->stdmac)
+ preproc->extra_stdmac(ofmt->stdmac);
+
+ /* define some macros dependent of command-line */
+ define_macros_late();
+
+ depend_ptr = (depend_file || (operating_mode & OP_DEPEND)) ? &depend_list : NULL;
+ if (!depend_target)
+ depend_target = quote_for_make(outname);
+
+ if (operating_mode & OP_DEPEND) {
+ char *line;
+
+ if (depend_missing_ok)
+ preproc->include_path(NULL); /* "assume generated" */
+
+ preproc->reset(inname, 0, depend_ptr);
+ if (outname[0] == '\0')
+ ofmt->filename(inname, outname);
+ ofile = NULL;
+ while ((line = preproc->getline()))
+ nasm_free(line);
+ preproc->cleanup(0);
+ } else if (operating_mode & OP_PREPROCESS) {
+ char *line;
+ const char *file_name = NULL;
+ int32_t prior_linnum = 0;
+ int lineinc = 0;
+
+ if (*outname) {
+ ofile = nasm_open_write(outname, NF_TEXT);
+ if (!ofile)
+ nasm_fatal(ERR_NOFILE,
+ "unable to open output file `%s'",
+ outname);
+ } else
+ ofile = NULL;
+
+ location.known = false;
+
+ /* pass = 1; */
+ preproc->reset(inname, 3, depend_ptr);
+ memcpy(warning_on, warning_on_global,
+ (ERR_WARN_MAX+1) * sizeof(bool));
+
+ while ((line = preproc->getline())) {
+ /*
+ * We generate %line directives if needed for later programs
+ */
+ int32_t linnum = prior_linnum += lineinc;
+ int altline = src_get(&linnum, &file_name);
+ if (altline) {
+ if (altline == 1 && lineinc == 1)
+ nasm_fputs("", ofile);
+ else {
+ lineinc = (altline != -1 || lineinc != 1);
+ fprintf(ofile ? ofile : stdout,
+ "%%line %"PRId32"+%d %s\n", linnum, lineinc,
+ file_name);
+ }
+ prior_linnum = linnum;
+ }
+ nasm_fputs(line, ofile);
+ nasm_free(line);
+ }
+ preproc->cleanup(0);
+ if (ofile)
+ fclose(ofile);
+ if (ofile && terminate_after_phase)
+ remove(outname);
+ ofile = NULL;
+ }
+
+ if (operating_mode & OP_NORMAL) {
+ /*
+ * We must call ofmt->filename _anyway_, even if the user
+ * has specified their own output file, because some
+ * formats (eg OBJ and COFF) use ofmt->filename to find out
+ * the name of the input file and then put that inside the
+ * file.
+ */
+ ofmt->filename(inname, outname);
+
+ ofile = nasm_open_write(outname, (ofmt->flags & OFMT_TEXT) ? NF_TEXT : NF_BINARY);
+ if (!ofile)
+ nasm_fatal(ERR_NOFILE,
+ "unable to open output file `%s'", outname);
+
+ /*
+ * We must call init_labels() before ofmt->init() since
+ * some object formats will want to define labels in their
+ * init routines. (eg OS/2 defines the FLAT group)
+ */
+ init_labels();
+
+ ofmt->init();
+ dfmt->init();
+
+ assemble_file(inname, depend_ptr);
+
+ if (!terminate_after_phase) {
+ ofmt->cleanup();
+ cleanup_labels();
+ fflush(ofile);
+ if (ferror(ofile)) {
+ nasm_error(ERR_NONFATAL|ERR_NOFILE,
+ "write error on output file `%s'", outname);
+ terminate_after_phase = true;
+ }
+ }
+
+ if (ofile) {
+ fclose(ofile);
+ if (terminate_after_phase)
+ remove(outname);
+ ofile = NULL;
+ }
+ }
+
+ if (depend_list && !terminate_after_phase)
+ emit_dependencies(depend_list);
+
+ if (want_usage)
+ usage();
+
+ raa_free(offsets);
+ saa_free(forwrefs);
+ eval_cleanup();
+ stdscan_cleanup();
+ src_free();
+
+ return terminate_after_phase;
+}
+
+/*
+ * Get a parameter for a command line option.
+ * First arg must be in the form of e.g. -f...
+ */
+static char *get_param(char *p, char *q, bool *advance)
+{
+ *advance = false;
+ if (p[2]) /* the parameter's in the option */
+ return nasm_skip_spaces(p + 2);
+ if (q && q[0]) {
+ *advance = true;
+ return q;
+ }
+ nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "option `-%c' requires an argument", p[1]);
+ return NULL;
+}
+
+/*
+ * Copy a filename
+ */
+static void copy_filename(char *dst, const char *src)
+{
+ size_t len = strlen(src);
+
+ if (len >= (size_t)FILENAME_MAX) {
+ nasm_fatal(ERR_NOFILE, "file name too long");
+ return;
+ }
+ strncpy(dst, src, FILENAME_MAX);
+}
+
+/*
+ * Convert a string to Make-safe form
+ */
+static char *quote_for_make(const char *str)
+{
+ const char *p;
+ char *os, *q;
+
+ size_t n = 1; /* Terminating zero */
+ size_t nbs = 0;
+
+ if (!str)
+ return NULL;
+
+ for (p = str; *p; p++) {
+ switch (*p) {
+ case ' ':
+ case '\t':
+ /* Convert N backslashes + ws -> 2N+1 backslashes + ws */
+ n += nbs + 2;
+ nbs = 0;
+ break;
+ case '$':
+ case '#':
+ nbs = 0;
+ n += 2;
+ break;
+ case '\\':
+ nbs++;
+ n++;
+ break;
+ default:
+ nbs = 0;
+ n++;
+ break;
+ }
+ }
+
+ /* Convert N backslashes at the end of filename to 2N backslashes */
+ if (nbs)
+ n += nbs;
+
+ os = q = nasm_malloc(n);
+
+ nbs = 0;
+ for (p = str; *p; p++) {
+ switch (*p) {
+ case ' ':
+ case '\t':
+ while (nbs--)
+ *q++ = '\\';
+ *q++ = '\\';
+ *q++ = *p;
+ break;
+ case '$':
+ *q++ = *p;
+ *q++ = *p;
+ nbs = 0;
+ break;
+ case '#':
+ *q++ = '\\';
+ *q++ = *p;
+ nbs = 0;
+ break;
+ case '\\':
+ *q++ = *p;
+ nbs++;
+ break;
+ default:
+ *q++ = *p;
+ nbs = 0;
+ break;
+ }
+ }
+ while (nbs--)
+ *q++ = '\\';
+
+ *q = '\0';
+
+ return os;
+}
+
+struct textargs {
+ const char *label;
+ int value;
+};
+
+enum text_options {
+ OPT_PREFIX,
+ OPT_POSTFIX
+};
+static const struct textargs textopts[] = {
+ {"prefix", OPT_PREFIX},
+ {"postfix", OPT_POSTFIX},
+ {NULL, 0}
+};
+
+static void show_version(void)
+{
+ printf("NASM version %s compiled on %s%s\n",
+ nasm_version, nasm_date, nasm_compile_options);
+ exit(0);
+}
+
+static bool stopoptions = false;
+static bool process_arg(char *p, char *q)
+{
+ char *param;
+ int i;
+ bool advance = false;
+ bool do_warn;
+
+ if (!p || !p[0])
+ return false;
+
+ if (p[0] == '-' && !stopoptions) {
+ if (strchr("oOfpPdDiIlFXuUZwW", p[1])) {
+ /* These parameters take values */
+ if (!(param = get_param(p, q, &advance)))
+ return advance;
+ }
+
+ switch (p[1]) {
+ case 's':
+ error_file = stdout;
+ break;
+
+ case 'o': /* output file */
+ copy_filename(outname, param);
+ break;
+
+ case 'f': /* output format */
+ ofmt = ofmt_find(param, &ofmt_alias);
+ if (!ofmt) {
+ nasm_fatal(ERR_NOFILE | ERR_USAGE,
+ "unrecognised output format `%s' - "
+ "use -hf for a list", param);
+ }
+ break;
+
+ case 'O': /* Optimization level */
+ {
+ int opt;
+
+ if (!*param) {
+ /* Naked -O == -Ox */
+ optimizing = MAX_OPTIMIZE;
+ } else {
+ while (*param) {
+ switch (*param) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ opt = strtoul(param, &param, 10);
+
+ /* -O0 -> optimizing == -1, 0.98 behaviour */
+ /* -O1 -> optimizing == 0, 0.98.09 behaviour */
+ if (opt < 2)
+ optimizing = opt - 1;
+ else
+ optimizing = opt;
+ break;
+
+ case 'v':
+ case '+':
+ param++;
+ opt_verbose_info = true;
+ break;
+
+ case 'x':
+ param++;
+ optimizing = MAX_OPTIMIZE;
+ break;
+
+ default:
+ nasm_fatal(0,
+ "unknown optimization option -O%c\n",
+ *param);
+ break;
+ }
+ }
+ if (optimizing > MAX_OPTIMIZE)
+ optimizing = MAX_OPTIMIZE;
+ }
+ break;
+ }
+
+ case 'p': /* pre-include */
+ case 'P':
+ preproc->pre_include(param);
+ break;
+
+ case 'd': /* pre-define */
+ case 'D':
+ preproc->pre_define(param);
+ break;
+
+ case 'u': /* un-define */
+ case 'U':
+ preproc->pre_undefine(param);
+ break;
+
+ case 'i': /* include search path */
+ case 'I':
+ preproc->include_path(param);
+ break;
+
+ case 'l': /* listing file */
+ copy_filename(listname, param);
+ break;
+
+ case 'Z': /* error messages file */
+ copy_filename(errname, param);
+ break;
+
+ case 'F': /* specify debug format */
+ using_debug_info = true;
+ debug_format = param;
+ break;
+
+ case 'X': /* specify error reporting format */
+ if (nasm_stricmp("vc", param) == 0)
+ nasm_set_verror(nasm_verror_vc);
+ else if (nasm_stricmp("gnu", param) == 0)
+ nasm_set_verror(nasm_verror_gnu);
+ else
+ nasm_fatal(ERR_NOFILE | ERR_USAGE,
+ "unrecognized error reporting format `%s'",
+ param);
+ break;
+
+ case 'g':
+ using_debug_info = true;
+ if (p[2])
+ debug_format = nasm_skip_spaces(p + 2);
+ break;
+
+ case 'h':
+ printf
+ ("usage: nasm [-@ response file] [-o outfile] [-f format] "
+ "[-l listfile]\n"
+ " [options...] [--] filename\n"
+ " or nasm -v (or --v) for version info\n\n"
+ " -t assemble in SciTech TASM compatible mode\n");
+ printf
+ (" -E (or -e) preprocess only (writes output to stdout by default)\n"
+ " -a don't preprocess (assemble only)\n"
+ " -M generate Makefile dependencies on stdout\n"
+ " -MG d:o, missing files assumed generated\n"
+ " -MF <file> set Makefile dependency file\n"
+ " -MD <file> assemble and generate dependencies\n"
+ " -MT <file> dependency target name\n"
+ " -MQ <file> dependency target name (quoted)\n"
+ " -MP emit phony target\n\n"
+ " -Z<file> redirect error messages to file\n"
+ " -s redirect error messages to stdout\n\n"
+ " -g generate debugging information\n\n"
+ " -F format select a debugging format\n\n"
+ " -gformat same as -g -F format\n\n"
+ " -o outfile write output to an outfile\n\n"
+ " -f format select an output format\n\n"
+ " -l listfile write listing to a listfile\n\n"
+ " -I<path> adds a pathname to the include file path\n");
+ printf
+ (" -O<digit> optimize branch offsets\n"
+ " -O0: No optimization\n"
+ " -O1: Minimal optimization\n"
+ " -Ox: Multipass optimization (default)\n\n"
+ " -P<file> pre-includes a file\n"
+ " -D<macro>[=<value>] pre-defines a macro\n"
+ " -U<macro> undefines a macro\n"
+ " -X<format> specifies error reporting format (gnu or vc)\n"
+ " -w+foo enables warning foo (equiv. -Wfoo)\n"
+ " -w-foo disable warning foo (equiv. -Wno-foo)\n\n"
+ " -h show invocation summary and exit\n\n"
+ "--prefix,--postfix\n"
+ " this options prepend or append the given argument to all\n"
+ " extern and global variables\n"
+ "Warnings:\n");
+ for (i = 0; i <= ERR_WARN_MAX; i++)
+ printf(" %-23s %s (default %s)\n",
+ warnings[i].name, warnings[i].help,
+ warnings[i].enabled ? "on" : "off");
+ printf
+ ("\nresponse files should contain command line parameters"
+ ", one per line.\n");
+ if (p[2] == 'f') {
+ printf("\nvalid output formats for -f are"
+ " (`*' denotes default):\n");
+ ofmt_list(ofmt, stdout);
+ } else {
+ printf("\nFor a list of valid output formats, use -hf.\n");
+ printf("For a list of debug formats, use -f <form> -y.\n");
+ }
+ exit(0); /* never need usage message here */
+ break;
+
+ case 'y':
+ printf("\nvalid debug formats for '%s' output format are"
+ " ('*' denotes default):\n", ofmt->shortname);
+ dfmt_list(ofmt, stdout);
+ exit(0);
+ break;
+
+ case 't':
+ tasm_compatible_mode = true;
+ break;
+
+ case 'v':
+ show_version();
+ break;
+
+ case 'e': /* preprocess only */
+ case 'E':
+ operating_mode = OP_PREPROCESS;
+ break;
+
+ case 'a': /* assemble only - don't preprocess */
+ preproc = &preproc_nop;
+ break;
+
+ case 'W':
+ if (param[0] == 'n' && param[1] == 'o' && param[2] == '-') {
+ do_warn = false;
+ param += 3;
+ } else {
+ do_warn = true;
+ }
+ goto set_warning;
+
+ case 'w':
+ if (param[0] != '+' && param[0] != '-') {
+ nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "invalid option to `-w'");
+ break;
+ }
+ do_warn = (param[0] == '+');
+ param++;
+
+set_warning:
+ for (i = 0; i <= ERR_WARN_MAX; i++) {
+ if (!nasm_stricmp(param, warnings[i].name))
+ break;
+ }
+ if (i <= ERR_WARN_MAX) {
+ warning_on_global[i] = do_warn;
+ } else if (!nasm_stricmp(param, "all")) {
+ for (i = 1; i <= ERR_WARN_MAX; i++)
+ warning_on_global[i] = do_warn;
+ } else if (!nasm_stricmp(param, "none")) {
+ for (i = 1; i <= ERR_WARN_MAX; i++)
+ warning_on_global[i] = !do_warn;
+ } else {
+ /* Ignore invalid warning names; forward compatibility */
+ }
+ break;
+
+ case 'M':
+ switch (p[2]) {
+ case 0:
+ operating_mode = OP_DEPEND;
+ break;
+ case 'G':
+ operating_mode = OP_DEPEND;
+ depend_missing_ok = true;
+ break;
+ case 'P':
+ depend_emit_phony = true;
+ break;
+ case 'D':
+ operating_mode = OP_NORMAL;
+ depend_file = q;
+ advance = true;
+ break;
+ case 'F':
+ depend_file = q;
+ advance = true;
+ break;
+ case 'T':
+ depend_target = q;
+ advance = true;
+ break;
+ case 'Q':
+ depend_target = quote_for_make(q);
+ advance = true;
+ break;
+ default:
+ nasm_error(ERR_NONFATAL|ERR_NOFILE|ERR_USAGE,
+ "unknown dependency option `-M%c'", p[2]);
+ break;
+ }
+ if (advance && (!q || !q[0])) {
+ nasm_error(ERR_NONFATAL|ERR_NOFILE|ERR_USAGE,
+ "option `-M%c' requires a parameter", p[2]);
+ break;
+ }
+ break;
+
+ case '-':
+ {
+ int s;
+
+ if (p[2] == 0) { /* -- => stop processing options */
+ stopoptions = 1;
+ break;
+ }
+
+ if (!nasm_stricmp(p, "--v"))
+ show_version();
+
+ for (s = 0; textopts[s].label; s++) {
+ if (!nasm_stricmp(p + 2, textopts[s].label)) {
+ break;
+ }
+ }
+
+ switch (s) {
+
+ case OPT_PREFIX:
+ case OPT_POSTFIX:
+ {
+ if (!q) {
+ nasm_error(ERR_NONFATAL | ERR_NOFILE |
+ ERR_USAGE,
+ "option `--%s' requires an argument",
+ p + 2);
+ break;
+ } else {
+ advance = 1, param = q;
+ }
+
+ switch (s) {
+ case OPT_PREFIX:
+ strlcpy(lprefix, param, PREFIX_MAX);
+ break;
+ case OPT_POSTFIX:
+ strlcpy(lpostfix, param, POSTFIX_MAX);
+ break;
+ default:
+ nasm_panic(ERR_NOFILE,
+ "internal error");
+ break;
+ }
+ break;
+ }
+
+ default:
+ {
+ nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "unrecognised option `--%s'", p + 2);
+ break;
+ }
+ }
+ break;
+ }
+
+ default:
+ if (!ofmt->setinfo(GI_SWITCH, &p))
+ nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "unrecognised option `-%c'", p[1]);
+ break;
+ }
+ } else {
+ if (*inname) {
+ nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "more than one input file specified");
+ } else {
+ copy_filename(inname, p);
+ }
+ }
+
+ return advance;
+}
+
+#define ARG_BUF_DELTA 128
+
+static void process_respfile(FILE * rfile)
+{
+ char *buffer, *p, *q, *prevarg;
+ int bufsize, prevargsize;
+
+ bufsize = prevargsize = ARG_BUF_DELTA;
+ buffer = nasm_malloc(ARG_BUF_DELTA);
+ prevarg = nasm_malloc(ARG_BUF_DELTA);
+ prevarg[0] = '\0';
+
+ while (1) { /* Loop to handle all lines in file */
+ p = buffer;
+ while (1) { /* Loop to handle long lines */
+ q = fgets(p, bufsize - (p - buffer), rfile);
+ if (!q)
+ break;
+ p += strlen(p);
+ if (p > buffer && p[-1] == '\n')
+ break;
+ if (p - buffer > bufsize - 10) {
+ int offset;
+ offset = p - buffer;
+ bufsize += ARG_BUF_DELTA;
+ buffer = nasm_realloc(buffer, bufsize);
+ p = buffer + offset;
+ }
+ }
+
+ if (!q && p == buffer) {
+ if (prevarg[0])
+ process_arg(prevarg, NULL);
+ nasm_free(buffer);
+ nasm_free(prevarg);
+ return;
+ }
+
+ /*
+ * Play safe: remove CRs, LFs and any spurious ^Zs, if any of
+ * them are present at the end of the line.
+ */
+ *(p = &buffer[strcspn(buffer, "\r\n\032")]) = '\0';
+
+ while (p > buffer && nasm_isspace(p[-1]))
+ *--p = '\0';
+
+ p = nasm_skip_spaces(buffer);
+
+ if (process_arg(prevarg, p))
+ *p = '\0';
+
+ if ((int) strlen(p) > prevargsize - 10) {
+ prevargsize += ARG_BUF_DELTA;
+ prevarg = nasm_realloc(prevarg, prevargsize);
+ }
+ strncpy(prevarg, p, prevargsize);
+ }
+}
+
+/* Function to process args from a string of args, rather than the
+ * argv array. Used by the environment variable and response file
+ * processing.
+ */
+static void process_args(char *args)
+{
+ char *p, *q, *arg, *prevarg;
+ char separator = ' ';
+
+ p = args;
+ if (*p && *p != '-')
+ separator = *p++;
+ arg = NULL;
+ while (*p) {
+ q = p;
+ while (*p && *p != separator)
+ p++;
+ while (*p == separator)
+ *p++ = '\0';
+ prevarg = arg;
+ arg = q;
+ if (process_arg(prevarg, arg))
+ arg = NULL;
+ }
+ if (arg)
+ process_arg(arg, NULL);
+}
+
+static void process_response_file(const char *file)
+{
+ char str[2048];
+ FILE *f = nasm_open_read(file, NF_TEXT);
+ if (!f) {
+ perror(file);
+ exit(-1);
+ }
+ while (fgets(str, sizeof str, f)) {
+ process_args(str);
+ }
+ fclose(f);
+}
+
+static void parse_cmdline(int argc, char **argv)
+{
+ FILE *rfile;
+ char *envreal, *envcopy = NULL, *p;
+ int i;
+
+ *inname = *outname = *listname = *errname = '\0';
+
+ for (i = 0; i <= ERR_WARN_MAX; i++)
+ warning_on_global[i] = warnings[i].enabled;
+
+ /*
+ * First, process the NASMENV environment variable.
+ */
+ envreal = getenv("NASMENV");
+ if (envreal) {
+ envcopy = nasm_strdup(envreal);
+ process_args(envcopy);
+ nasm_free(envcopy);
+ }
+
+ /*
+ * Now process the actual command line.
+ */
+ while (--argc) {
+ bool advance;
+ argv++;
+ if (argv[0][0] == '@') {
+ /*
+ * We have a response file, so process this as a set of
+ * arguments like the environment variable. This allows us
+ * to have multiple arguments on a single line, which is
+ * different to the -@resp file processing below for regular
+ * NASM.
+ */
+ process_response_file(argv[0]+1);
+ argc--;
+ argv++;
+ }
+ if (!stopoptions && argv[0][0] == '-' && argv[0][1] == '@') {
+ p = get_param(argv[0], argc > 1 ? argv[1] : NULL, &advance);
+ if (p) {
+ rfile = nasm_open_read(p, NF_TEXT);
+ if (rfile) {
+ process_respfile(rfile);
+ fclose(rfile);
+ } else
+ nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "unable to open response file `%s'", p);
+ }
+ } else
+ advance = process_arg(argv[0], argc > 1 ? argv[1] : NULL);
+ argv += advance, argc -= advance;
+ }
+
+ /*
+ * Look for basic command line typos. This definitely doesn't
+ * catch all errors, but it might help cases of fumbled fingers.
+ */
+ if (!*inname)
+ nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "no input file specified");
+ else if (!strcmp(inname, errname) ||
+ !strcmp(inname, outname) ||
+ !strcmp(inname, listname) ||
+ (depend_file && !strcmp(inname, depend_file)))
+ nasm_fatal(ERR_NOFILE | ERR_USAGE,
+ "file `%s' is both input and output file",
+ inname);
+
+ if (*errname) {
+ error_file = nasm_open_write(errname, NF_TEXT);
+ if (!error_file) {
+ error_file = stderr; /* Revert to default! */
+ nasm_fatal(ERR_NOFILE | ERR_USAGE,
+ "cannot open file `%s' for error messages",
+ errname);
+ }
+ }
+}
+
+static enum directives getkw(char **directive, char **value);
+
+static void assemble_file(char *fname, StrList **depend_ptr)
+{
+ char *directive, *value, *p, *q, *special, *line;
+ insn output_ins;
+ int i, validid;
+ bool rn_error;
+ int32_t seg;
+ int64_t offs;
+ struct tokenval tokval;
+ expr *e;
+ int pass_max;
+
+ if (cmd_sb == 32 && iflag_ffs(&cmd_cpu) < IF_386)
+ nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu");
+
+ pass_max = prev_offset_changed = (INT_MAX >> 1) + 2; /* Almost unlimited */
+ for (passn = 1; pass0 <= 2; passn++) {
+ int pass1, pass2;
+ ldfunc def_label;
+
+ pass1 = pass0 == 2 ? 2 : 1; /* 1, 1, 1, ..., 1, 2 */
+ pass2 = passn > 1 ? 2 : 1; /* 1, 2, 2, ..., 2, 2 */
+ /* pass0 0, 0, 0, ..., 1, 2 */
+
+ def_label = passn > 1 ? redefine_label : define_label;
+
+ globalbits = sb = cmd_sb; /* set 'bits' to command line default */
+ cpu = cmd_cpu;
+ if (pass0 == 2) {
+ lfmt->init(listname);
+ }
+ in_abs_seg = false;
+ global_offset_changed = 0; /* set by redefine_label */
+ location.segment = ofmt->section(NULL, pass2, &sb);
+ globalbits = sb;
+ if (passn > 1) {
+ saa_rewind(forwrefs);
+ forwref = saa_rstruct(forwrefs);
+ raa_free(offsets);
+ offsets = raa_init();
+ }
+ preproc->reset(fname, pass1, pass1 == 2 ? depend_ptr : NULL);
+ memcpy(warning_on, warning_on_global, (ERR_WARN_MAX+1) * sizeof(bool));
+
+ globallineno = 0;
+ if (passn == 1)
+ location.known = true;
+ location.offset = offs = get_curr_offs();
+
+ while ((line = preproc->getline())) {
+ enum directives d;
+ globallineno++;
+
+ /*
+ * Here we parse our directives; this is not handled by the
+ * 'real' parser. This really should be a separate function.
+ */
+ directive = line;
+ d = getkw(&directive, &value);
+ if (d) {
+ int err = 0;
+
+ switch (d) {
+ case D_SEGMENT: /* [SEGMENT n] */
+ case D_SECTION:
+ seg = ofmt->section(value, pass2, &sb);
+ if (seg == NO_SEG) {
+ nasm_error(pass1 == 1 ? ERR_NONFATAL : ERR_PANIC,
+ "segment name `%s' not recognized",
+ value);
+ } else {
+ in_abs_seg = false;
+ location.segment = seg;
+ }
+ break;
+ case D_SECTALIGN: /* [SECTALIGN n] */
+ if (*value) {
+ stdscan_reset();
+ stdscan_set(value);
+ tokval.t_type = TOKEN_INVALID;
+ e = evaluate(stdscan, NULL, &tokval, NULL, pass2, NULL);
+ if (e) {
+ unsigned int align = (unsigned int)e->value;
+ if ((uint64_t)e->value > 0x7fffffff) {
+ /*
+ * FIXME: Please make some sane message here
+ * ofmt should have some 'check' method which
+ * would report segment alignment bounds.
+ */
+ nasm_fatal(0,
+ "incorrect segment alignment `%s'", value);
+ } else if (!is_power2(align)) {
+ nasm_error(ERR_NONFATAL,
+ "segment alignment `%s' is not power of two",
+ value);
+ }
+
+ /* callee should be able to handle all details */
+ if (location.segment != NO_SEG)
+ ofmt->sectalign(location.segment, align);
+ }
+ }
+ break;
+ case D_EXTERN: /* [EXTERN label:special] */
+ if (*value == '$')
+ value++; /* skip initial $ if present */
+ if (pass0 == 2) {
+ q = value;
+ while (*q && *q != ':')
+ q++;
+ if (*q == ':') {
+ *q++ = '\0';
+ ofmt->symdef(value, 0L, 0L, 3, q);
+ }
+ } else if (passn == 1) {
+ q = value;
+ validid = true;
+ if (!isidstart(*q))
+ validid = false;
+ while (*q && *q != ':') {
+ if (!isidchar(*q))
+ validid = false;
+ q++;
+ }
+ if (!validid) {
+ nasm_error(ERR_NONFATAL,
+ "identifier expected after EXTERN");
+ break;
+ }
+ if (*q == ':') {
+ *q++ = '\0';
+ special = q;
+ } else
+ special = NULL;
+ if (!is_extern(value)) { /* allow re-EXTERN to be ignored */
+ int temp = pass0;
+ pass0 = 1; /* fake pass 1 in labels.c */
+ declare_as_global(value, special);
+ define_label(value, seg_alloc(), 0L, NULL,
+ false, true);
+ pass0 = temp;
+ }
+ } /* else pass0 == 1 */
+ break;
+ case D_BITS: /* [BITS bits] */
+ globalbits = sb = get_bits(value);
+ break;
+ case D_GLOBAL: /* [GLOBAL symbol:special] */
+ if (*value == '$')
+ value++; /* skip initial $ if present */
+ if (pass0 == 2) { /* pass 2 */
+ q = value;
+ while (*q && *q != ':')
+ q++;
+ if (*q == ':') {
+ *q++ = '\0';
+ ofmt->symdef(value, 0L, 0L, 3, q);
+ }
+ } else if (pass2 == 1) { /* pass == 1 */
+ q = value;
+ validid = true;
+ if (!isidstart(*q))
+ validid = false;
+ while (*q && *q != ':') {
+ if (!isidchar(*q))
+ validid = false;
+ q++;
+ }
+ if (!validid) {
+ nasm_error(ERR_NONFATAL,
+ "identifier expected after GLOBAL");
+ break;
+ }
+ if (*q == ':') {
+ *q++ = '\0';
+ special = q;
+ } else
+ special = NULL;
+ declare_as_global(value, special);
+ } /* pass == 1 */
+ break;
+ case D_COMMON: /* [COMMON symbol size:special] */
+ {
+ int64_t size;
+
+ if (*value == '$')
+ value++; /* skip initial $ if present */
+ p = value;
+ validid = true;
+ if (!isidstart(*p))
+ validid = false;
+ while (*p && !nasm_isspace(*p)) {
+ if (!isidchar(*p))
+ validid = false;
+ p++;
+ }
+ if (!validid) {
+ nasm_error(ERR_NONFATAL,
+ "identifier expected after COMMON");
+ break;
+ }
+ if (*p) {
+ p = nasm_zap_spaces_fwd(p);
+ q = p;
+ while (*q && *q != ':')
+ q++;
+ if (*q == ':') {
+ *q++ = '\0';
+ special = q;
+ } else {
+ special = NULL;
+ }
+ size = readnum(p, &rn_error);
+ if (rn_error) {
+ nasm_error(ERR_NONFATAL,
+ "invalid size specified"
+ " in COMMON declaration");
+ break;
+ }
+ } else {
+ nasm_error(ERR_NONFATAL,
+ "no size specified in"
+ " COMMON declaration");
+ break;
+ }
+
+ if (pass0 < 2) {
+ define_common(value, seg_alloc(), size, special);
+ } else if (pass0 == 2) {
+ if (special)
+ ofmt->symdef(value, 0L, 0L, 3, special);
+ }
+ break;
+ }
+ case D_ABSOLUTE: /* [ABSOLUTE address] */
+ stdscan_reset();
+ stdscan_set(value);
+ tokval.t_type = TOKEN_INVALID;
+ e = evaluate(stdscan, NULL, &tokval, NULL, pass2, NULL);
+ if (e) {
+ if (!is_reloc(e))
+ nasm_error(pass0 ==
+ 1 ? ERR_NONFATAL : ERR_PANIC,
+ "cannot use non-relocatable expression as "
+ "ABSOLUTE address");
+ else {
+ abs_seg = reloc_seg(e);
+ abs_offset = reloc_value(e);
+ }
+ } else if (passn == 1)
+ abs_offset = 0x100; /* don't go near zero in case of / */
+ else
+ nasm_panic(0, "invalid ABSOLUTE address "
+ "in pass two");
+ in_abs_seg = true;
+ location.segment = NO_SEG;
+ break;
+ case D_DEBUG: /* [DEBUG] */
+ {
+ char debugid[128];
+ bool badid, overlong;
+
+ p = value;
+ q = debugid;
+ badid = overlong = false;
+ if (!isidstart(*p)) {
+ badid = true;
+ } else {
+ while (*p && !nasm_isspace(*p)) {
+ if (q >= debugid + sizeof debugid - 1) {
+ overlong = true;
+ break;
+ }
+ if (!isidchar(*p))
+ badid = true;
+ *q++ = *p++;
+ }
+ *q = 0;
+ }
+ if (badid) {
+ nasm_error(passn == 1 ? ERR_NONFATAL : ERR_PANIC,
+ "identifier expected after DEBUG");
+ break;
+ }
+ if (overlong) {
+ nasm_error(passn == 1 ? ERR_NONFATAL : ERR_PANIC,
+ "DEBUG identifier too long");
+ break;
+ }
+ p = nasm_skip_spaces(p);
+ if (pass0 == 2)
+ dfmt->debug_directive(debugid, p);
+ break;
+ }
+ case D_WARNING: /* [WARNING {+|-|*}warn-name] */
+ value = nasm_skip_spaces(value);
+ switch(*value) {
+ case '-': validid = 0; value++; break;
+ case '+': validid = 1; value++; break;
+ case '*': validid = 2; value++; break;
+ default: validid = 1; break;
+ }
+
+ for (i = 1; i <= ERR_WARN_MAX; i++)
+ if (!nasm_stricmp(value, warnings[i].name))
+ break;
+ if (i <= ERR_WARN_MAX) {
+ switch(validid) {
+ case 0:
+ warning_on[i] = false;
+ break;
+ case 1:
+ warning_on[i] = true;
+ break;
+ case 2:
+ warning_on[i] = warning_on_global[i];
+ break;
+ }
+ }
+ break;
+ case D_CPU: /* [CPU] */
+ cpu = get_cpu(value);
+ break;
+ case D_LIST: /* [LIST {+|-}] */
+ value = nasm_skip_spaces(value);
+ if (*value == '+') {
+ user_nolist = 0;
+ } else {
+ if (*value == '-') {
+ user_nolist = 1;
+ } else {
+ err = 1;
+ }
+ }
+ break;
+ case D_DEFAULT: /* [DEFAULT] */
+ stdscan_reset();
+ stdscan_set(value);
+ tokval.t_type = TOKEN_INVALID;
+ if (stdscan(NULL, &tokval) != TOKEN_INVALID) {
+ switch ((int)tokval.t_integer) {
+ case S_REL:
+ globalrel = 1;
+ break;
+ case S_ABS:
+ globalrel = 0;
+ break;
+ case P_BND:
+ globalbnd = 1;
+ break;
+ case P_NOBND:
+ globalbnd = 0;
+ break;
+ default:
+ err = 1;
+ break;
+ }
+ } else {
+ err = 1;
+ }
+ break;
+ case D_FLOAT:
+ if (float_option(value)) {
+ nasm_error(pass1 == 1 ? ERR_NONFATAL : ERR_PANIC,
+ "unknown 'float' directive: %s",
+ value);
+ }
+ break;
+ default:
+ if (ofmt->directive(d, value, pass2))
+ break;
+ /* else fall through */
+ case D_unknown:
+ nasm_error(pass1 == 1 ? ERR_NONFATAL : ERR_PANIC,
+ "unrecognised directive [%s]",
+ directive);
+ break;
+ }
+ if (err) {
+ nasm_error(ERR_NONFATAL,
+ "invalid parameter to [%s] directive",
+ directive);
+ }
+ } else { /* it isn't a directive */
+ parse_line(pass1, line, &output_ins, def_label);
+
+ if (optimizing > 0) {
+ if (forwref != NULL && globallineno == forwref->lineno) {
+ output_ins.forw_ref = true;
+ do {
+ output_ins.oprs[forwref->operand].opflags |= OPFLAG_FORWARD;
+ forwref = saa_rstruct(forwrefs);
+ } while (forwref != NULL
+ && forwref->lineno == globallineno);
+ } else
+ output_ins.forw_ref = false;
+
+ if (output_ins.forw_ref) {
+ if (passn == 1) {
+ for (i = 0; i < output_ins.operands; i++) {
+ if (output_ins.oprs[i].opflags & OPFLAG_FORWARD) {
+ struct forwrefinfo *fwinf = (struct forwrefinfo *)saa_wstruct(forwrefs);
+ fwinf->lineno = globallineno;
+ fwinf->operand = i;
+ }
+ }
+ }
+ }
+ }
+
+ /* forw_ref */
+ if (output_ins.opcode == I_EQU) {
+ if (pass1 == 1) {
+ /*
+ * Special `..' EQUs get processed in pass two,
+ * except `..@' macro-processor EQUs which are done
+ * in the normal place.
+ */
+ if (!output_ins.label)
+ nasm_error(ERR_NONFATAL,
+ "EQU not preceded by label");
+
+ else if (output_ins.label[0] != '.' ||
+ output_ins.label[1] != '.' ||
+ output_ins.label[2] == '@') {
+ if (output_ins.operands == 1 &&
+ (output_ins.oprs[0].type & IMMEDIATE) &&
+ output_ins.oprs[0].wrt == NO_SEG) {
+ bool isext = !!(output_ins.oprs[0].opflags & OPFLAG_EXTERN);
+ def_label(output_ins.label,
+ output_ins.oprs[0].segment,
+ output_ins.oprs[0].offset, NULL,
+ false, isext);
+ } else if (output_ins.operands == 2
+ && (output_ins.oprs[0].type & IMMEDIATE)
+ && (output_ins.oprs[0].type & COLON)
+ && output_ins.oprs[0].segment == NO_SEG
+ && output_ins.oprs[0].wrt == NO_SEG
+ && (output_ins.oprs[1].type & IMMEDIATE)
+ && output_ins.oprs[1].segment == NO_SEG
+ && output_ins.oprs[1].wrt == NO_SEG) {
+ def_label(output_ins.label,
+ output_ins.oprs[0].offset | SEG_ABS,
+ output_ins.oprs[1].offset,
+ NULL, false, false);
+ } else
+ nasm_error(ERR_NONFATAL,
+ "bad syntax for EQU");
+ }
+ } else {
+ /*
+ * Special `..' EQUs get processed here, except
+ * `..@' macro processor EQUs which are done above.
+ */
+ if (output_ins.label[0] == '.' &&
+ output_ins.label[1] == '.' &&
+ output_ins.label[2] != '@') {
+ if (output_ins.operands == 1 &&
+ (output_ins.oprs[0].type & IMMEDIATE)) {
+ define_label(output_ins.label,
+ output_ins.oprs[0].segment,
+ output_ins.oprs[0].offset,
+ NULL, false, false);
+ } else if (output_ins.operands == 2
+ && (output_ins.oprs[0].type & IMMEDIATE)
+ && (output_ins.oprs[0].type & COLON)
+ && output_ins.oprs[0].segment == NO_SEG
+ && (output_ins.oprs[1].type & IMMEDIATE)
+ && output_ins.oprs[1].segment == NO_SEG) {
+ define_label(output_ins.label,
+ output_ins.oprs[0].offset | SEG_ABS,
+ output_ins.oprs[1].offset,
+ NULL, false, false);
+ } else
+ nasm_error(ERR_NONFATAL,
+ "bad syntax for EQU");
+ }
+ }
+ } else { /* instruction isn't an EQU */
+
+ if (pass1 == 1) {
+
+ int64_t l = insn_size(location.segment, offs, sb, cpu,
+ &output_ins);
+
+ /* if (using_debug_info) && output_ins.opcode != -1) */
+ if (using_debug_info)
+ { /* fbk 03/25/01 */
+ /* this is done here so we can do debug type info */
+ int32_t typeinfo =
+ TYS_ELEMENTS(output_ins.operands);
+ switch (output_ins.opcode) {
+ case I_RESB:
+ typeinfo =
+ TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_BYTE;
+ break;
+ case I_RESW:
+ typeinfo =
+ TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_WORD;
+ break;
+ case I_RESD:
+ typeinfo =
+ TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_DWORD;
+ break;
+ case I_RESQ:
+ typeinfo =
+ TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_QWORD;
+ break;
+ case I_REST:
+ typeinfo =
+ TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_TBYTE;
+ break;
+ case I_RESO:
+ typeinfo =
+ TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_OWORD;
+ break;
+ case I_RESY:
+ typeinfo =
+ TYS_ELEMENTS(output_ins.oprs[0].offset) | TY_YWORD;
+ break;
+ case I_DB:
+ typeinfo |= TY_BYTE;
+ break;
+ case I_DW:
+ typeinfo |= TY_WORD;
+ break;
+ case I_DD:
+ if (output_ins.eops_float)
+ typeinfo |= TY_FLOAT;
+ else
+ typeinfo |= TY_DWORD;
+ break;
+ case I_DQ:
+ typeinfo |= TY_QWORD;
+ break;
+ case I_DT:
+ typeinfo |= TY_TBYTE;
+ break;
+ case I_DO:
+ typeinfo |= TY_OWORD;
+ break;
+ case I_DY:
+ typeinfo |= TY_YWORD;
+ break;
+ default:
+ typeinfo = TY_LABEL;
+
+ }
+
+ dfmt->debug_typevalue(typeinfo);
+ }
+ if (l != -1) {
+ offs += l;
+ set_curr_offs(offs);
+ }
+ /*
+ * else l == -1 => invalid instruction, which will be
+ * flagged as an error on pass 2
+ */
+
+ } else {
+ offs += assemble(location.segment, offs, sb, cpu,
+ &output_ins);
+ set_curr_offs(offs);
+
+ }
+ } /* not an EQU */
+ cleanup_insn(&output_ins);
+ }
+ nasm_free(line);
+ location.offset = offs = get_curr_offs();
+ } /* end while (line = preproc->getline... */
+
+ if (pass0 == 2 && global_offset_changed && !terminate_after_phase)
+ nasm_error(ERR_NONFATAL,
+ "phase error detected at end of assembly.");
+
+ if (pass1 == 1)
+ preproc->cleanup(1);
+
+ if ((passn > 1 && !global_offset_changed) || pass0 == 2) {
+ pass0++;
+ } else if (global_offset_changed &&
+ global_offset_changed < prev_offset_changed) {
+ prev_offset_changed = global_offset_changed;
+ stall_count = 0;
+ } else {
+ stall_count++;
+ }
+
+ if (terminate_after_phase)
+ break;
+
+ if ((stall_count > 997) || (passn >= pass_max)) {
+ /* We get here if the labels don't converge
+ * Example: FOO equ FOO + 1
+ */
+ nasm_error(ERR_NONFATAL,
+ "Can't find valid values for all labels "
+ "after %d passes, giving up.", passn);
+ nasm_error(ERR_NONFATAL,
+ "Possible causes: recursive EQUs, macro abuse.");
+ break;
+ }
+ }
+
+ preproc->cleanup(0);
+ lfmt->cleanup();
+ if (!terminate_after_phase && opt_verbose_info) {
+ /* -On and -Ov switches */
+ fprintf(stdout, "info: assembly required 1+%d+1 passes\n", passn-3);
+ }
+}
+
+static enum directives getkw(char **directive, char **value)
+{
+ char *p, *q, *buf;
+
+ buf = nasm_skip_spaces(*directive);
+
+ /* it should be enclosed in [ ] */
+ if (*buf != '[')
+ return D_none;
+ q = strchr(buf, ']');
+ if (!q)
+ return D_none;
+
+ /* stip off the comments */
+ p = strchr(buf, ';');
+ if (p) {
+ if (p < q) /* ouch! somwhere inside */
+ return D_none;
+ *p = '\0';
+ }
+
+ /* no brace, no trailing spaces */
+ *q = '\0';
+ nasm_zap_spaces_rev(--q);
+
+ /* directive */
+ p = nasm_skip_spaces(++buf);
+ q = nasm_skip_word(p);
+ if (!q)
+ return D_none; /* sigh... no value there */
+ *q = '\0';
+ *directive = p;
+
+ /* and value finally */
+ p = nasm_skip_spaces(++q);
+ *value = p;
+
+ return find_directive(*directive);
+}
+
+/**
+ * gnu style error reporting
+ * This function prints an error message to error_file in the
+ * style used by GNU. An example would be:
+ * file.asm:50: error: blah blah blah
+ * where file.asm is the name of the file, 50 is the line number on
+ * which the error occurs (or is detected) and "error:" is one of
+ * the possible optional diagnostics -- it can be "error" or "warning"
+ * or something else. Finally the line terminates with the actual
+ * error message.
+ *
+ * @param severity the severity of the warning or error
+ * @param fmt the printf style format string
+ */
+static void nasm_verror_gnu(int severity, const char *fmt, va_list ap)
+{
+ const char *currentfile = NULL;
+ int32_t lineno = 0;
+
+ if (is_suppressed_warning(severity))
+ return;
+
+ if (!(severity & ERR_NOFILE))
+ src_get(&lineno, &currentfile);
+
+ if (!skip_this_pass(severity)) {
+ if (currentfile) {
+ fprintf(error_file, "%s:%"PRId32": ", currentfile, lineno);
+ } else {
+ fputs("nasm: ", error_file);
+ }
+ }
+
+ nasm_verror_common(severity, fmt, ap);
+}
+
+/**
+ * MS style error reporting
+ * This function prints an error message to error_file in the
+ * style used by Visual C and some other Microsoft tools. An example
+ * would be:
+ * file.asm(50) : error: blah blah blah
+ * where file.asm is the name of the file, 50 is the line number on
+ * which the error occurs (or is detected) and "error:" is one of
+ * the possible optional diagnostics -- it can be "error" or "warning"
+ * or something else. Finally the line terminates with the actual
+ * error message.
+ *
+ * @param severity the severity of the warning or error
+ * @param fmt the printf style format string
+ */
+static void nasm_verror_vc(int severity, const char *fmt, va_list ap)
+{
+ const char *currentfile = NULL;
+ int32_t lineno = 0;
+
+ if (is_suppressed_warning(severity))
+ return;
+
+ if (!(severity & ERR_NOFILE))
+ src_get(&lineno, &currentfile);
+
+ if (!skip_this_pass(severity)) {
+ if (currentfile) {
+ fprintf(error_file, "%s(%"PRId32") : ", currentfile, lineno);
+ } else {
+ fputs("nasm: ", error_file);
+ }
+ }
+
+ nasm_verror_common(severity, fmt, ap);
+}
+
+/**
+ * check for supressed warning
+ * checks for suppressed warning or pass one only warning and we're
+ * not in pass 1
+ *
+ * @param severity the severity of the warning or error
+ * @return true if we should abort error/warning printing
+ */
+static bool is_suppressed_warning(int severity)
+{
+ /* Not a warning at all */
+ if ((severity & ERR_MASK) != ERR_WARNING)
+ return false;
+
+ /* Might be a warning but suppresed explicitly */
+ if (severity & ERR_WARN_MASK)
+ return !warning_on[WARN_IDX(severity)];
+ else
+ return false;
+}
+
+static bool skip_this_pass(int severity)
+{
+ /* See if it's a pass-specific warning which should be skipped. */
+
+ if ((severity & ERR_MASK) > ERR_WARNING)
+ return false;
+
+ /*
+ * passn is 1 on the very first pass only.
+ * pass0 is 2 on the code-generation (final) pass only.
+ * These are the passes we care about in this case.
+ */
+ return (((severity & ERR_PASS1) && passn != 1) ||
+ ((severity & ERR_PASS2) && pass0 != 2));
+}
+
+/**
+ * common error reporting
+ * This is the common back end of the error reporting schemes currently
+ * implemented. It prints the nature of the warning and then the
+ * specific error message to error_file and may or may not return. It
+ * doesn't return if the error severity is a "panic" or "debug" type.
+ *
+ * @param severity the severity of the warning or error
+ * @param fmt the printf style format string
+ */
+static void nasm_verror_common(int severity, const char *fmt, va_list args)
+{
+ char msg[1024];
+ const char *pfx;
+
+ switch (severity & (ERR_MASK|ERR_NO_SEVERITY)) {
+ case ERR_WARNING:
+ pfx = "warning: ";
+ break;
+ case ERR_NONFATAL:
+ pfx = "error: ";
+ break;
+ case ERR_FATAL:
+ pfx = "fatal: ";
+ break;
+ case ERR_PANIC:
+ pfx = "panic: ";
+ break;
+ case ERR_DEBUG:
+ pfx = "debug: ";
+ break;
+ default:
+ pfx = "";
+ break;
+ }
+
+ vsnprintf(msg, sizeof msg - 64, fmt, args);
+ if ((severity & (ERR_WARN_MASK|ERR_PP_LISTMACRO)) == ERR_WARN_MASK) {
+ char *p = strchr(msg, '\0');
+ snprintf(p, 64, " [-w+%s]", warnings[WARN_IDX(severity)].name);
+ }
+
+ if (!skip_this_pass(severity))
+ fprintf(error_file, "%s%s\n", pfx, msg);
+
+ /* Are we recursing from error_list_macros? */
+ if (severity & ERR_PP_LISTMACRO)
+ return;
+
+ /*
+ * Don't suppress this with skip_this_pass(), or we don't get
+ * pass1 or preprocessor warnings in the list file
+ */
+ if ((severity & ERR_MASK) >= ERR_WARNING)
+ lfmt->error(severity, pfx, msg);
+
+ if (severity & ERR_USAGE)
+ want_usage = true;
+
+ preproc->error_list_macros(severity);
+
+ switch (severity & ERR_MASK) {
+ case ERR_DEBUG:
+ /* no further action, by definition */
+ break;
+ case ERR_WARNING:
+ /* Treat warnings as errors */
+ if (warning_on[WARN_IDX(ERR_WARN_TERM)])
+ terminate_after_phase = true;
+ break;
+ case ERR_NONFATAL:
+ terminate_after_phase = true;
+ break;
+ case ERR_FATAL:
+ if (ofile) {
+ fclose(ofile);
+ remove(outname);
+ ofile = NULL;
+ }
+ if (want_usage)
+ usage();
+ exit(1); /* instantly die */
+ break; /* placate silly compilers */
+ case ERR_PANIC:
+ fflush(NULL);
+ /* abort(); */ /* halt, catch fire, and dump core */
+ if (ofile) {
+ fclose(ofile);
+ remove(outname);
+ ofile = NULL;
+ }
+ exit(3);
+ break;
+ }
+}
+
+static void usage(void)
+{
+ fputs("type `nasm -h' for help\n", error_file);
+}
+
+static iflag_t get_cpu(char *value)
+{
+ iflag_t r;
+
+ iflag_clear_all(&r);
+
+ if (!strcmp(value, "8086"))
+ iflag_set(&r, IF_8086);
+ else if (!strcmp(value, "186"))
+ iflag_set(&r, IF_186);
+ else if (!strcmp(value, "286"))
+ iflag_set(&r, IF_286);
+ else if (!strcmp(value, "386"))
+ iflag_set(&r, IF_386);
+ else if (!strcmp(value, "486"))
+ iflag_set(&r, IF_486);
+ else if (!strcmp(value, "586") ||
+ !nasm_stricmp(value, "pentium"))
+ iflag_set(&r, IF_PENT);
+ else if (!strcmp(value, "686") ||
+ !nasm_stricmp(value, "ppro") ||
+ !nasm_stricmp(value, "pentiumpro") ||
+ !nasm_stricmp(value, "p2"))
+ iflag_set(&r, IF_P6);
+ else if (!nasm_stricmp(value, "p3") ||
+ !nasm_stricmp(value, "katmai"))
+ iflag_set(&r, IF_KATMAI);
+ else if (!nasm_stricmp(value, "p4") || /* is this right? -- jrc */
+ !nasm_stricmp(value, "willamette"))
+ iflag_set(&r, IF_WILLAMETTE);
+ else if (!nasm_stricmp(value, "prescott"))
+ iflag_set(&r, IF_PRESCOTT);
+ else if (!nasm_stricmp(value, "x64") ||
+ !nasm_stricmp(value, "x86-64"))
+ iflag_set(&r, IF_X86_64);
+ else if (!nasm_stricmp(value, "ia64") ||
+ !nasm_stricmp(value, "ia-64") ||
+ !nasm_stricmp(value, "itanium")||
+ !nasm_stricmp(value, "itanic") ||
+ !nasm_stricmp(value, "merced"))
+ iflag_set(&r, IF_IA64);
+ else {
+ iflag_set(&r, IF_PLEVEL);
+ nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
+ "unknown 'cpu' type");
+ }
+ return r;
+}
+
+static int get_bits(char *value)
+{
+ int i;
+
+ if ((i = atoi(value)) == 16)
+ return i; /* set for a 16-bit segment */
+ else if (i == 32) {
+ if (iflag_ffs(&cpu) < IF_386) {
+ nasm_error(ERR_NONFATAL,
+ "cannot specify 32-bit segment on processor below a 386");
+ i = 16;
+ }
+ } else if (i == 64) {
+ if (iflag_ffs(&cpu) < IF_X86_64) {
+ nasm_error(ERR_NONFATAL,
+ "cannot specify 64-bit segment on processor below an x86-64");
+ i = 16;
+ }
+ } else {
+ nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
+ "`%s' is not a valid segment size; must be 16, 32 or 64",
+ value);
+ i = 16;
+ }
+ return i;
+}
diff --git a/asm/parser.c b/asm/parser.c
new file mode 100644
index 00000000..6210cc69
--- /dev/null
+++ b/asm/parser.c
@@ -0,0 +1,1168 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2013 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * parser.c source line parser for the Netwide Assembler
+ */
+
+#include "compiler.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "insns.h"
+#include "nasmlib.h"
+#include "stdscan.h"
+#include "eval.h"
+#include "parser.h"
+#include "float.h"
+#include "tables.h"
+
+extern int in_abs_seg; /* ABSOLUTE segment flag */
+extern int32_t abs_seg; /* ABSOLUTE segment */
+extern int32_t abs_offset; /* ABSOLUTE segment offset */
+
+static int is_comma_next(void);
+
+static int i;
+static struct tokenval tokval;
+
+static int prefix_slot(int prefix)
+{
+ switch (prefix) {
+ case P_WAIT:
+ return PPS_WAIT;
+ case R_CS:
+ case R_DS:
+ case R_SS:
+ case R_ES:
+ case R_FS:
+ case R_GS:
+ return PPS_SEG;
+ case P_LOCK:
+ return PPS_LOCK;
+ case P_REP:
+ case P_REPE:
+ case P_REPZ:
+ case P_REPNE:
+ case P_REPNZ:
+ case P_XACQUIRE:
+ case P_XRELEASE:
+ case P_BND:
+ case P_NOBND:
+ return PPS_REP;
+ case P_O16:
+ case P_O32:
+ case P_O64:
+ case P_OSP:
+ return PPS_OSIZE;
+ case P_A16:
+ case P_A32:
+ case P_A64:
+ case P_ASP:
+ return PPS_ASIZE;
+ case P_EVEX:
+ case P_VEX3:
+ case P_VEX2:
+ return PPS_VEX;
+ default:
+ nasm_panic(0, "Invalid value %d passed to prefix_slot()", prefix);
+ return -1;
+ }
+}
+
+static void process_size_override(insn *result, operand *op)
+{
+ if (tasm_compatible_mode) {
+ switch ((int)tokval.t_integer) {
+ /* For TASM compatibility a size override inside the
+ * brackets changes the size of the operand, not the
+ * address type of the operand as it does in standard
+ * NASM syntax. Hence:
+ *
+ * mov eax,[DWORD val]
+ *
+ * is valid syntax in TASM compatibility mode. Note that
+ * you lose the ability to override the default address
+ * type for the instruction, but we never use anything
+ * but 32-bit flat model addressing in our code.
+ */
+ case S_BYTE:
+ op->type |= BITS8;
+ break;
+ case S_WORD:
+ op->type |= BITS16;
+ break;
+ case S_DWORD:
+ case S_LONG:
+ op->type |= BITS32;
+ break;
+ case S_QWORD:
+ op->type |= BITS64;
+ break;
+ case S_TWORD:
+ op->type |= BITS80;
+ break;
+ case S_OWORD:
+ op->type |= BITS128;
+ break;
+ default:
+ nasm_error(ERR_NONFATAL,
+ "invalid operand size specification");
+ break;
+ }
+ } else {
+ /* Standard NASM compatible syntax */
+ switch ((int)tokval.t_integer) {
+ case S_NOSPLIT:
+ op->eaflags |= EAF_TIMESTWO;
+ break;
+ case S_REL:
+ op->eaflags |= EAF_REL;
+ break;
+ case S_ABS:
+ op->eaflags |= EAF_ABS;
+ break;
+ case S_BYTE:
+ op->disp_size = 8;
+ op->eaflags |= EAF_BYTEOFFS;
+ break;
+ case P_A16:
+ case P_A32:
+ case P_A64:
+ if (result->prefixes[PPS_ASIZE] &&
+ result->prefixes[PPS_ASIZE] != tokval.t_integer)
+ nasm_error(ERR_NONFATAL,
+ "conflicting address size specifications");
+ else
+ result->prefixes[PPS_ASIZE] = tokval.t_integer;
+ break;
+ case S_WORD:
+ op->disp_size = 16;
+ op->eaflags |= EAF_WORDOFFS;
+ break;
+ case S_DWORD:
+ case S_LONG:
+ op->disp_size = 32;
+ op->eaflags |= EAF_WORDOFFS;
+ break;
+ case S_QWORD:
+ op->disp_size = 64;
+ op->eaflags |= EAF_WORDOFFS;
+ break;
+ default:
+ nasm_error(ERR_NONFATAL, "invalid size specification in"
+ " effective address");
+ break;
+ }
+ }
+}
+
+/*
+ * when two or more decorators follow a register operand,
+ * consecutive decorators are parsed here.
+ * opmask and zeroing decorators can be placed in any order.
+ * e.g. zmm1 {k2}{z} or zmm2 {z}{k3}
+ * decorator(s) are placed at the end of an operand.
+ */
+static bool parse_braces(decoflags_t *decoflags)
+{
+ int i;
+ bool recover = false;
+
+ i = tokval.t_type;
+ do {
+ if (i == TOKEN_OPMASK) {
+ if (*decoflags & OPMASK_MASK) {
+ nasm_error(ERR_NONFATAL, "opmask k%"PRIu64" is already set",
+ *decoflags & OPMASK_MASK);
+ *decoflags &= ~OPMASK_MASK;
+ }
+ *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
+ } else if (i == TOKEN_DECORATOR) {
+ switch (tokval.t_integer) {
+ case BRC_Z:
+ /*
+ * according to AVX512 spec, only zeroing/merging decorator
+ * is supported with opmask
+ */
+ *decoflags |= GEN_Z(0);
+ break;
+ default:
+ nasm_error(ERR_NONFATAL, "{%s} is not an expected decorator",
+ tokval.t_charptr);
+ break;
+ }
+ } else if (i == ',' || i == TOKEN_EOS){
+ break;
+ } else {
+ nasm_error(ERR_NONFATAL, "only a series of valid decorators"
+ " expected");
+ recover = true;
+ break;
+ }
+ i = stdscan(NULL, &tokval);
+ } while(1);
+
+ return recover;
+}
+
+static int parse_mref(operand *op, const expr *e)
+{
+ int b, i, s; /* basereg, indexreg, scale */
+ int64_t o; /* offset */
+
+ b = i = -1;
+ o = s = 0;
+
+ if (e->type && e->type <= EXPR_REG_END) { /* this bit's a register */
+ bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
+
+ if (is_gpr && e->value == 1)
+ b = e->type; /* It can be basereg */
+ else /* No, it has to be indexreg */
+ i = e->type, s = e->value;
+ e++;
+ }
+ if (e->type && e->type <= EXPR_REG_END) { /* it's a 2nd register */
+ bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
+
+ if (b != -1) /* If the first was the base, ... */
+ i = e->type, s = e->value; /* second has to be indexreg */
+
+ else if (!is_gpr || e->value != 1) {
+ /* If both want to be index */
+ nasm_error(ERR_NONFATAL,
+ "invalid effective address: two index registers");
+ return -1;
+ } else
+ b = e->type;
+ e++;
+ }
+ if (e->type != 0) { /* is there an offset? */
+ if (e->type <= EXPR_REG_END) { /* in fact, is there an error? */
+ nasm_error(ERR_NONFATAL,
+ "beroset-p-603-invalid effective address");
+ return -1;
+ } else {
+ if (e->type == EXPR_UNKNOWN) {
+ op->opflags |= OPFLAG_UNKNOWN;
+ o = 0; /* doesn't matter what */
+ op->wrt = NO_SEG; /* nor this */
+ op->segment = NO_SEG; /* or this */
+ while (e->type)
+ e++; /* go to the end of the line */
+ } else {
+ if (e->type == EXPR_SIMPLE) {
+ o = e->value;
+ e++;
+ }
+ if (e->type == EXPR_WRT) {
+ op->wrt = e->value;
+ e++;
+ } else
+ op->wrt = NO_SEG;
+ /*
+ * Look for a segment base type.
+ */
+ if (e->type && e->type < EXPR_SEGBASE) {
+ nasm_error(ERR_NONFATAL,
+ "beroset-p-630-invalid effective address");
+ return -1;
+ }
+ while (e->type && e->value == 0)
+ e++;
+ if (e->type && e->value != 1) {
+ nasm_error(ERR_NONFATAL,
+ "beroset-p-637-invalid effective address");
+ return -1;
+ }
+ if (e->type) {
+ op->segment = e->type - EXPR_SEGBASE;
+ e++;
+ } else
+ op->segment = NO_SEG;
+ while (e->type && e->value == 0)
+ e++;
+ if (e->type) {
+ nasm_error(ERR_NONFATAL,
+ "beroset-p-650-invalid effective address");
+ return -1;
+ }
+ }
+ }
+ } else {
+ o = 0;
+ op->wrt = NO_SEG;
+ op->segment = NO_SEG;
+ }
+
+ if (e->type != 0) { /* there'd better be nothing left! */
+ nasm_error(ERR_NONFATAL,
+ "beroset-p-663-invalid effective address");
+ return -1;
+ }
+
+ op->basereg = b;
+ op->indexreg = i;
+ op->scale = s;
+ op->offset = o;
+ return 0;
+}
+
+static void mref_set_optype(operand *op)
+{
+ int b = op->basereg;
+ int i = op->indexreg;
+ int s = op->scale;
+
+ /* It is memory, but it can match any r/m operand */
+ op->type |= MEMORY_ANY;
+
+ if (b == -1 && (i == -1 || s == 0)) {
+ int is_rel = globalbits == 64 &&
+ !(op->eaflags & EAF_ABS) &&
+ ((globalrel &&
+ !(op->eaflags & EAF_FSGS)) ||
+ (op->eaflags & EAF_REL));
+
+ op->type |= is_rel ? IP_REL : MEM_OFFS;
+ }
+
+ if (i != -1) {
+ opflags_t iclass = nasm_reg_flags[i];
+
+ if (is_class(XMMREG,iclass))
+ op->type |= XMEM;
+ else if (is_class(YMMREG,iclass))
+ op->type |= YMEM;
+ else if (is_class(ZMMREG,iclass))
+ op->type |= ZMEM;
+ }
+}
+
+insn *parse_line(int pass, char *buffer, insn *result, ldfunc ldef)
+{
+ bool insn_is_label = false;
+ struct eval_hints hints;
+ int opnum;
+ int critical;
+ bool first;
+ bool recover;
+
+restart_parse:
+ first = true;
+ result->forw_ref = false;
+
+ stdscan_reset();
+ stdscan_set(buffer);
+ i = stdscan(NULL, &tokval);
+
+ result->label = NULL; /* Assume no label */
+ result->eops = NULL; /* must do this, whatever happens */
+ result->operands = 0; /* must initialize this */
+ result->evex_rm = 0; /* Ensure EVEX rounding mode is reset */
+ result->evex_brerop = -1; /* Reset EVEX broadcasting/ER op position */
+
+ /* Ignore blank lines */
+ if (i == TOKEN_EOS)
+ goto fail;
+
+ if (i != TOKEN_ID &&
+ i != TOKEN_INSN &&
+ i != TOKEN_PREFIX &&
+ (i != TOKEN_REG || !IS_SREG(tokval.t_integer))) {
+ nasm_error(ERR_NONFATAL,
+ "label or instruction expected at start of line");
+ goto fail;
+ }
+
+ if (i == TOKEN_ID || (insn_is_label && i == TOKEN_INSN)) {
+ /* there's a label here */
+ first = false;
+ result->label = tokval.t_charptr;
+ i = stdscan(NULL, &tokval);
+ if (i == ':') { /* skip over the optional colon */
+ i = stdscan(NULL, &tokval);
+ } else if (i == 0) {
+ nasm_error(ERR_WARNING | ERR_WARN_OL | ERR_PASS1,
+ "label alone on a line without a colon might be in error");
+ }
+ if (i != TOKEN_INSN || tokval.t_integer != I_EQU) {
+ /*
+ * FIXME: location.segment could be NO_SEG, in which case
+ * it is possible we should be passing 'abs_seg'. Look into this.
+ * Work out whether that is *really* what we should be doing.
+ * Generally fix things. I think this is right as it is, but
+ * am still not certain.
+ */
+ ldef(result->label, in_abs_seg ? abs_seg : location.segment,
+ location.offset, NULL, true, false);
+ }
+ }
+
+ /* Just a label here */
+ if (i == TOKEN_EOS)
+ goto fail;
+
+ nasm_build_assert(P_none != 0);
+ memset(result->prefixes, P_none, sizeof(result->prefixes));
+ result->times = 1L;
+
+ while (i == TOKEN_PREFIX ||
+ (i == TOKEN_REG && IS_SREG(tokval.t_integer))) {
+ first = false;
+
+ /*
+ * Handle special case: the TIMES prefix.
+ */
+ if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
+ expr *value;
+
+ i = stdscan(NULL, &tokval);
+ value = evaluate(stdscan, NULL, &tokval, NULL, pass0, NULL);
+ i = tokval.t_type;
+ if (!value) /* Error in evaluator */
+ goto fail;
+ if (!is_simple(value)) {
+ nasm_error(ERR_NONFATAL,
+ "non-constant argument supplied to TIMES");
+ result->times = 1L;
+ } else {
+ result->times = value->value;
+ if (value->value < 0 && pass0 == 2) {
+ nasm_error(ERR_NONFATAL, "TIMES value %"PRId64" is negative",
+ value->value);
+ result->times = 0;
+ }
+ }
+ } else {
+ int slot = prefix_slot(tokval.t_integer);
+ if (result->prefixes[slot]) {
+ if (result->prefixes[slot] == tokval.t_integer)
+ nasm_error(ERR_WARNING | ERR_PASS1,
+ "instruction has redundant prefixes");
+ else
+ nasm_error(ERR_NONFATAL,
+ "instruction has conflicting prefixes");
+ }
+ result->prefixes[slot] = tokval.t_integer;
+ i = stdscan(NULL, &tokval);
+ }
+ }
+
+ if (i != TOKEN_INSN) {
+ int j;
+ enum prefixes pfx;
+
+ for (j = 0; j < MAXPREFIX; j++) {
+ if ((pfx = result->prefixes[j]) != P_none)
+ break;
+ }
+
+ if (i == 0 && pfx != P_none) {
+ /*
+ * Instruction prefixes are present, but no actual
+ * instruction. This is allowed: at this point we
+ * invent a notional instruction of RESB 0.
+ */
+ result->opcode = I_RESB;
+ result->operands = 1;
+ result->oprs[0].type = IMMEDIATE;
+ result->oprs[0].offset = 0L;
+ result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
+ return result;
+ } else {
+ nasm_error(ERR_NONFATAL, "parser: instruction expected");
+ goto fail;
+ }
+ }
+
+ result->opcode = tokval.t_integer;
+ result->condition = tokval.t_inttwo;
+
+ /*
+ * INCBIN cannot be satisfied with incorrectly
+ * evaluated operands, since the correct values _must_ be known
+ * on the first pass. Hence, even in pass one, we set the
+ * `critical' flag on calling evaluate(), so that it will bomb
+ * out on undefined symbols.
+ */
+ if (result->opcode == I_INCBIN) {
+ critical = (pass0 < 2 ? 1 : 2);
+
+ } else
+ critical = (pass == 2 ? 2 : 0);
+
+ if (result->opcode == I_DB || result->opcode == I_DW ||
+ result->opcode == I_DD || result->opcode == I_DQ ||
+ result->opcode == I_DT || result->opcode == I_DO ||
+ result->opcode == I_DY || result->opcode == I_DZ ||
+ result->opcode == I_INCBIN) {
+ extop *eop, **tail = &result->eops, **fixptr;
+ int oper_num = 0;
+ int32_t sign;
+
+ result->eops_float = false;
+
+ /*
+ * Begin to read the DB/DW/DD/DQ/DT/DO/DY/DZ/INCBIN operands.
+ */
+ while (1) {
+ i = stdscan(NULL, &tokval);
+ if (i == TOKEN_EOS)
+ break;
+ else if (first && i == ':') {
+ insn_is_label = true;
+ goto restart_parse;
+ }
+ first = false;
+ fixptr = tail;
+ eop = *tail = nasm_malloc(sizeof(extop));
+ tail = &eop->next;
+ eop->next = NULL;
+ eop->type = EOT_NOTHING;
+ oper_num++;
+ sign = +1;
+
+ /*
+ * is_comma_next() here is to distinguish this from
+ * a string used as part of an expression...
+ */
+ if (i == TOKEN_STR && is_comma_next()) {
+ eop->type = EOT_DB_STRING;
+ eop->stringval = tokval.t_charptr;
+ eop->stringlen = tokval.t_inttwo;
+ i = stdscan(NULL, &tokval); /* eat the comma */
+ } else if (i == TOKEN_STRFUNC) {
+ bool parens = false;
+ const char *funcname = tokval.t_charptr;
+ enum strfunc func = tokval.t_integer;
+ i = stdscan(NULL, &tokval);
+ if (i == '(') {
+ parens = true;
+ i = stdscan(NULL, &tokval);
+ }
+ if (i != TOKEN_STR) {
+ nasm_error(ERR_NONFATAL,
+ "%s must be followed by a string constant",
+ funcname);
+ eop->type = EOT_NOTHING;
+ } else {
+ eop->type = EOT_DB_STRING_FREE;
+ eop->stringlen =
+ string_transform(tokval.t_charptr, tokval.t_inttwo,
+ &eop->stringval, func);
+ if (eop->stringlen == (size_t)-1) {
+ nasm_error(ERR_NONFATAL, "invalid string for transform");
+ eop->type = EOT_NOTHING;
+ }
+ }
+ if (parens && i && i != ')') {
+ i = stdscan(NULL, &tokval);
+ if (i != ')') {
+ nasm_error(ERR_NONFATAL, "unterminated %s function",
+ funcname);
+ }
+ }
+ if (i && i != ',')
+ i = stdscan(NULL, &tokval);
+ } else if (i == '-' || i == '+') {
+ char *save = stdscan_get();
+ int token = i;
+ sign = (i == '-') ? -1 : 1;
+ i = stdscan(NULL, &tokval);
+ if (i != TOKEN_FLOAT) {
+ stdscan_set(save);
+ i = tokval.t_type = token;
+ goto is_expression;
+ } else {
+ goto is_float;
+ }
+ } else if (i == TOKEN_FLOAT) {
+is_float:
+ eop->type = EOT_DB_STRING;
+ result->eops_float = true;
+
+ eop->stringlen = idata_bytes(result->opcode);
+ if (eop->stringlen > 16) {
+ nasm_error(ERR_NONFATAL, "floating-point constant"
+ " encountered in DY or DZ instruction");
+ eop->stringlen = 0;
+ } else if (eop->stringlen < 1) {
+ nasm_error(ERR_NONFATAL, "floating-point constant"
+ " encountered in unknown instruction");
+ /*
+ * fix suggested by Pedro Gimeno... original line was:
+ * eop->type = EOT_NOTHING;
+ */
+ eop->stringlen = 0;
+ }
+
+ eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen);
+ tail = &eop->next;
+ *fixptr = eop;
+ eop->stringval = (char *)eop + sizeof(extop);
+ if (!eop->stringlen ||
+ !float_const(tokval.t_charptr, sign,
+ (uint8_t *)eop->stringval, eop->stringlen))
+ eop->type = EOT_NOTHING;
+ i = stdscan(NULL, &tokval); /* eat the comma */
+ } else {
+ /* anything else, assume it is an expression */
+ expr *value;
+
+is_expression:
+ value = evaluate(stdscan, NULL, &tokval, NULL,
+ critical, NULL);
+ i = tokval.t_type;
+ if (!value) /* Error in evaluator */
+ goto fail;
+ if (is_unknown(value)) {
+ eop->type = EOT_DB_NUMBER;
+ eop->offset = 0; /* doesn't matter what we put */
+ eop->segment = eop->wrt = NO_SEG; /* likewise */
+ } else if (is_reloc(value)) {
+ eop->type = EOT_DB_NUMBER;
+ eop->offset = reloc_value(value);
+ eop->segment = reloc_seg(value);
+ eop->wrt = reloc_wrt(value);
+ } else {
+ nasm_error(ERR_NONFATAL,
+ "operand %d: expression is not simple"
+ " or relocatable", oper_num);
+ }
+ }
+
+ /*
+ * We're about to call stdscan(), which will eat the
+ * comma that we're currently sitting on between
+ * arguments. However, we'd better check first that it
+ * _is_ a comma.
+ */
+ if (i == TOKEN_EOS) /* also could be EOL */
+ break;
+ if (i != ',') {
+ nasm_error(ERR_NONFATAL, "comma expected after operand %d",
+ oper_num);
+ goto fail;
+ }
+ }
+
+ if (result->opcode == I_INCBIN) {
+ /*
+ * Correct syntax for INCBIN is that there should be
+ * one string operand, followed by one or two numeric
+ * operands.
+ */
+ if (!result->eops || result->eops->type != EOT_DB_STRING)
+ nasm_error(ERR_NONFATAL, "`incbin' expects a file name");
+ else if (result->eops->next &&
+ result->eops->next->type != EOT_DB_NUMBER)
+ nasm_error(ERR_NONFATAL, "`incbin': second parameter is"
+ " non-numeric");
+ else if (result->eops->next && result->eops->next->next &&
+ result->eops->next->next->type != EOT_DB_NUMBER)
+ nasm_error(ERR_NONFATAL, "`incbin': third parameter is"
+ " non-numeric");
+ else if (result->eops->next && result->eops->next->next &&
+ result->eops->next->next->next)
+ nasm_error(ERR_NONFATAL,
+ "`incbin': more than three parameters");
+ else
+ return result;
+ /*
+ * If we reach here, one of the above errors happened.
+ * Throw the instruction away.
+ */
+ goto fail;
+ } else /* DB ... */ if (oper_num == 0)
+ nasm_error(ERR_WARNING | ERR_PASS1,
+ "no operand for data declaration");
+ else
+ result->operands = oper_num;
+
+ return result;
+ }
+
+ /*
+ * Now we begin to parse the operands. There may be up to four
+ * of these, separated by commas, and terminated by a zero token.
+ */
+
+ for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
+ operand *op = &result->oprs[opnum];
+ expr *value; /* used most of the time */
+ bool mref; /* is this going to be a memory ref? */
+ bool bracket; /* is it a [] mref, or a & mref? */
+ bool mib; /* compound (mib) mref? */
+ int setsize = 0;
+ decoflags_t brace_flags = 0; /* flags for decorators in braces */
+
+ op->disp_size = 0; /* have to zero this whatever */
+ op->eaflags = 0; /* and this */
+ op->opflags = 0;
+ op->decoflags = 0;
+
+ i = stdscan(NULL, &tokval);
+ if (i == TOKEN_EOS)
+ break; /* end of operands: get out of here */
+ else if (first && i == ':') {
+ insn_is_label = true;
+ goto restart_parse;
+ }
+ first = false;
+ op->type = 0; /* so far, no override */
+ while (i == TOKEN_SPECIAL) { /* size specifiers */
+ switch ((int)tokval.t_integer) {
+ case S_BYTE:
+ if (!setsize) /* we want to use only the first */
+ op->type |= BITS8;
+ setsize = 1;
+ break;
+ case S_WORD:
+ if (!setsize)
+ op->type |= BITS16;
+ setsize = 1;
+ break;
+ case S_DWORD:
+ case S_LONG:
+ if (!setsize)
+ op->type |= BITS32;
+ setsize = 1;
+ break;
+ case S_QWORD:
+ if (!setsize)
+ op->type |= BITS64;
+ setsize = 1;
+ break;
+ case S_TWORD:
+ if (!setsize)
+ op->type |= BITS80;
+ setsize = 1;
+ break;
+ case S_OWORD:
+ if (!setsize)
+ op->type |= BITS128;
+ setsize = 1;
+ break;
+ case S_YWORD:
+ if (!setsize)
+ op->type |= BITS256;
+ setsize = 1;
+ break;
+ case S_ZWORD:
+ if (!setsize)
+ op->type |= BITS512;
+ setsize = 1;
+ break;
+ case S_TO:
+ op->type |= TO;
+ break;
+ case S_STRICT:
+ op->type |= STRICT;
+ break;
+ case S_FAR:
+ op->type |= FAR;
+ break;
+ case S_NEAR:
+ op->type |= NEAR;
+ break;
+ case S_SHORT:
+ op->type |= SHORT;
+ break;
+ default:
+ nasm_error(ERR_NONFATAL, "invalid operand size specification");
+ }
+ i = stdscan(NULL, &tokval);
+ }
+
+ if (i == '[' || i == '&') { /* memory reference */
+ mref = true;
+ bracket = (i == '[');
+ i = stdscan(NULL, &tokval); /* then skip the colon */
+ while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
+ process_size_override(result, op);
+ i = stdscan(NULL, &tokval);
+ }
+ /* when a comma follows an opening bracket - [ , eax*4] */
+ if (i == ',') {
+ /* treat as if there is a zero displacement virtually */
+ tokval.t_type = TOKEN_NUM;
+ tokval.t_integer = 0;
+ stdscan_set(stdscan_get() - 1); /* rewind the comma */
+ }
+ } else { /* immediate operand, or register */
+ mref = false;
+ bracket = false; /* placate optimisers */
+ }
+
+ if ((op->type & FAR) && !mref &&
+ result->opcode != I_JMP && result->opcode != I_CALL) {
+ nasm_error(ERR_NONFATAL, "invalid use of FAR operand specifier");
+ }
+
+ value = evaluate(stdscan, NULL, &tokval,
+ &op->opflags, critical, &hints);
+ i = tokval.t_type;
+ if (op->opflags & OPFLAG_FORWARD) {
+ result->forw_ref = true;
+ }
+ if (!value) /* Error in evaluator */
+ goto fail;
+ if (i == ':' && mref) { /* it was seg:offset */
+ /*
+ * Process the segment override.
+ */
+ if (value[1].type != 0 ||
+ value->value != 1 ||
+ !IS_SREG(value->type))
+ nasm_error(ERR_NONFATAL, "invalid segment override");
+ else if (result->prefixes[PPS_SEG])
+ nasm_error(ERR_NONFATAL,
+ "instruction has conflicting segment overrides");
+ else {
+ result->prefixes[PPS_SEG] = value->type;
+ if (IS_FSGS(value->type))
+ op->eaflags |= EAF_FSGS;
+ }
+
+ i = stdscan(NULL, &tokval); /* then skip the colon */
+ while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
+ process_size_override(result, op);
+ i = stdscan(NULL, &tokval);
+ }
+ value = evaluate(stdscan, NULL, &tokval,
+ &op->opflags, critical, &hints);
+ i = tokval.t_type;
+ if (op->opflags & OPFLAG_FORWARD) {
+ result->forw_ref = true;
+ }
+ /* and get the offset */
+ if (!value) /* Error in evaluator */
+ goto fail;
+ }
+
+ mib = false;
+ if (mref && bracket && i == ',') {
+ /* [seg:base+offset,index*scale] syntax (mib) */
+
+ operand o1, o2; /* Partial operands */
+
+ if (parse_mref(&o1, value))
+ goto fail;
+
+ i = stdscan(NULL, &tokval); /* Eat comma */
+ value = evaluate(stdscan, NULL, &tokval, &op->opflags,
+ critical, &hints);
+ i = tokval.t_type;
+ if (!value)
+ goto fail;
+
+ if (parse_mref(&o2, value))
+ goto fail;
+
+ if (o2.basereg != -1 && o2.indexreg == -1) {
+ o2.indexreg = o2.basereg;
+ o2.scale = 1;
+ o2.basereg = -1;
+ }
+
+ if (o1.indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
+ o2.segment != NO_SEG || o2.wrt != NO_SEG) {
+ nasm_error(ERR_NONFATAL, "invalid mib expression");
+ goto fail;
+ }
+
+ op->basereg = o1.basereg;
+ op->indexreg = o2.indexreg;
+ op->scale = o2.scale;
+ op->offset = o1.offset;
+ op->segment = o1.segment;
+ op->wrt = o1.wrt;
+
+ if (op->basereg != -1) {
+ op->hintbase = op->basereg;
+ op->hinttype = EAH_MAKEBASE;
+ } else if (op->indexreg != -1) {
+ op->hintbase = op->indexreg;
+ op->hinttype = EAH_NOTBASE;
+ } else {
+ op->hintbase = -1;
+ op->hinttype = EAH_NOHINT;
+ }
+
+ mib = true;
+ }
+
+ recover = false;
+ if (mref && bracket) { /* find ] at the end */
+ if (i != ']') {
+ nasm_error(ERR_NONFATAL, "parser: expecting ]");
+ recover = true;
+ } else { /* we got the required ] */
+ i = stdscan(NULL, &tokval);
+ if ((i == TOKEN_DECORATOR) || (i == TOKEN_OPMASK)) {
+ /*
+ * according to AVX512 spec, broacast or opmask decorator
+ * is expected for memory reference operands
+ */
+ if (tokval.t_flag & TFLAG_BRDCAST) {
+ brace_flags |= GEN_BRDCAST(0) |
+ VAL_BRNUM(tokval.t_integer - BRC_1TO2);
+ i = stdscan(NULL, &tokval);
+ } else if (i == TOKEN_OPMASK) {
+ brace_flags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
+ i = stdscan(NULL, &tokval);
+ } else {
+ nasm_error(ERR_NONFATAL, "broadcast or opmask "
+ "decorator expected inside braces");
+ recover = true;
+ }
+ }
+
+ if (i != 0 && i != ',') {
+ nasm_error(ERR_NONFATAL, "comma or end of line expected");
+ recover = true;
+ }
+ }
+ } else { /* immediate operand */
+ if (i != 0 && i != ',' && i != ':' &&
+ i != TOKEN_DECORATOR && i != TOKEN_OPMASK) {
+ nasm_error(ERR_NONFATAL, "comma, colon, decorator or end of "
+ "line expected after operand");
+ recover = true;
+ } else if (i == ':') {
+ op->type |= COLON;
+ } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
+ /* parse opmask (and zeroing) after an operand */
+ recover = parse_braces(&brace_flags);
+ }
+ }
+ if (recover) {
+ do { /* error recovery */
+ i = stdscan(NULL, &tokval);
+ } while (i != 0 && i != ',');
+ }
+
+ /*
+ * now convert the exprs returned from evaluate()
+ * into operand descriptions...
+ */
+ op->decoflags |= brace_flags;
+
+ if (mref) { /* it's a memory reference */
+ /* A mib reference was fully parsed already */
+ if (!mib) {
+ if (parse_mref(op, value))
+ goto fail;
+ op->hintbase = hints.base;
+ op->hinttype = hints.type;
+ }
+ mref_set_optype(op);
+ } else { /* it's not a memory reference */
+ if (is_just_unknown(value)) { /* it's immediate but unknown */
+ op->type |= IMMEDIATE;
+ op->opflags |= OPFLAG_UNKNOWN;
+ op->offset = 0; /* don't care */
+ op->segment = NO_SEG; /* don't care again */
+ op->wrt = NO_SEG; /* still don't care */
+
+ if(optimizing >= 0 && !(op->type & STRICT)) {
+ /* Be optimistic */
+ op->type |=
+ UNITY | SBYTEWORD | SBYTEDWORD | UDWORD | SDWORD;
+ }
+ } else if (is_reloc(value)) { /* it's immediate */
+ op->type |= IMMEDIATE;
+ op->offset = reloc_value(value);
+ op->segment = reloc_seg(value);
+ op->wrt = reloc_wrt(value);
+
+ if (is_simple(value)) {
+ uint64_t n = reloc_value(value);
+ if (n == 1)
+ op->type |= UNITY;
+ if (optimizing >= 0 &&
+ !(op->type & STRICT)) {
+ if ((uint32_t) (n + 128) <= 255)
+ op->type |= SBYTEDWORD;
+ if ((uint16_t) (n + 128) <= 255)
+ op->type |= SBYTEWORD;
+ if (n <= 0xFFFFFFFF)
+ op->type |= UDWORD;
+ if (n + 0x80000000 <= 0xFFFFFFFF)
+ op->type |= SDWORD;
+ }
+ }
+ } else if(value->type == EXPR_RDSAE) {
+ /*
+ * it's not an operand but a rounding or SAE decorator.
+ * put the decorator information in the (opflag_t) type field
+ * of previous operand.
+ */
+ opnum--; op--;
+ switch (value->value) {
+ case BRC_RN:
+ case BRC_RU:
+ case BRC_RD:
+ case BRC_RZ:
+ case BRC_SAE:
+ op->decoflags |= (value->value == BRC_SAE ? SAE : ER);
+ result->evex_rm = value->value;
+ break;
+ default:
+ nasm_error(ERR_NONFATAL, "invalid decorator");
+ break;
+ }
+ } else { /* it's a register */
+ opflags_t rs;
+
+ if (value->type >= EXPR_SIMPLE || value->value != 1) {
+ nasm_error(ERR_NONFATAL, "invalid operand type");
+ goto fail;
+ }
+
+ /*
+ * check that its only 1 register, not an expression...
+ */
+ for (i = 1; value[i].type; i++)
+ if (value[i].value) {
+ nasm_error(ERR_NONFATAL, "invalid operand type");
+ goto fail;
+ }
+
+ /* clear overrides, except TO which applies to FPU regs */
+ if (op->type & ~TO) {
+ /*
+ * we want to produce a warning iff the specified size
+ * is different from the register size
+ */
+ rs = op->type & SIZE_MASK;
+ } else
+ rs = 0;
+
+ op->type &= TO;
+ op->type |= REGISTER;
+ op->type |= nasm_reg_flags[value->type];
+ op->decoflags |= brace_flags;
+ op->basereg = value->type;
+
+ if (rs && (op->type & SIZE_MASK) != rs)
+ nasm_error(ERR_WARNING | ERR_PASS1,
+ "register size specification ignored");
+ }
+ }
+
+ /* remember the position of operand having broadcasting/ER mode */
+ if (op->decoflags & (BRDCAST_MASK | ER | SAE))
+ result->evex_brerop = opnum;
+ }
+
+ result->operands = opnum; /* set operand count */
+
+ /* clear remaining operands */
+ while (opnum < MAX_OPERANDS)
+ result->oprs[opnum++].type = 0;
+
+ /*
+ * Transform RESW, RESD, RESQ, REST, RESO, RESY, RESZ into RESB.
+ */
+ switch (result->opcode) {
+ case I_RESW:
+ result->opcode = I_RESB;
+ result->oprs[0].offset *= 2;
+ break;
+ case I_RESD:
+ result->opcode = I_RESB;
+ result->oprs[0].offset *= 4;
+ break;
+ case I_RESQ:
+ result->opcode = I_RESB;
+ result->oprs[0].offset *= 8;
+ break;
+ case I_REST:
+ result->opcode = I_RESB;
+ result->oprs[0].offset *= 10;
+ break;
+ case I_RESO:
+ result->opcode = I_RESB;
+ result->oprs[0].offset *= 16;
+ break;
+ case I_RESY:
+ result->opcode = I_RESB;
+ result->oprs[0].offset *= 32;
+ break;
+ case I_RESZ:
+ result->opcode = I_RESB;
+ result->oprs[0].offset *= 64;
+ break;
+ default:
+ break;
+ }
+
+ return result;
+
+fail:
+ result->opcode = I_none;
+ return result;
+}
+
+static int is_comma_next(void)
+{
+ struct tokenval tv;
+ char *p;
+ int i;
+
+ p = stdscan_get();
+ i = stdscan(NULL, &tv);
+ stdscan_set(p);
+
+ return (i == ',' || i == ';' || !i);
+}
+
+void cleanup_insn(insn * i)
+{
+ extop *e;
+
+ while ((e = i->eops)) {
+ i->eops = e->next;
+ if (e->type == EOT_DB_STRING_FREE)
+ nasm_free(e->stringval);
+ nasm_free(e);
+ }
+}
diff --git a/asm/parser.h b/asm/parser.h
new file mode 100644
index 00000000..e148c7fa
--- /dev/null
+++ b/asm/parser.h
@@ -0,0 +1,45 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * parser.h header file for the parser module of the Netwide
+ * Assembler
+ */
+
+#ifndef NASM_PARSER_H
+#define NASM_PARSER_H
+
+insn *parse_line(int pass, char *buffer, insn *result, ldfunc ldef);
+void cleanup_insn(insn *instruction);
+
+#endif
diff --git a/asm/phash.pl b/asm/phash.pl
new file mode 100755
index 00000000..3ef6e714
--- /dev/null
+++ b/asm/phash.pl
@@ -0,0 +1,109 @@
+#!/usr/bin/perl
+## --------------------------------------------------------------------------
+##
+## Copyright 1996-2009 the NASM Authors - All rights reserved.
+##
+## Redistribution and use in source and binary forms, with or without
+## modification, are permitted provided that the following
+## conditions are met:
+##
+## * Redistributions of source code must retain the above copyright
+## notice, this list of conditions and the following disclaimer.
+## * Redistributions in binary form must reproduce the above
+## copyright notice, this list of conditions and the following
+## disclaimer in the documentation and/or other materials provided
+## with the distribution.
+##
+## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+##
+## --------------------------------------------------------------------------
+
+#
+# Perfect Minimal Hash Generator written in Perl, which produces
+# C output.
+#
+
+require 'phash.ph';
+
+#
+# Read input file
+#
+sub read_input() {
+ my $key,$val;
+ my %out;
+ my $x = 0;
+
+ while (defined($l = <STDIN>)) {
+ chomp $l;
+ $l =~ s/\s*(\#.*|)$//;
+
+ next if ($l eq '');
+
+ if ($l =~ /^([^=]+)\=([^=]+)$/) {
+ $out{$1} = $2;
+ $x = $2;
+ } else {
+ $out{$l} = $x;
+ }
+ $x++;
+ }
+
+ return %out;
+}
+
+#
+# Main program
+#
+sub main() {
+ my $n;
+ my %data;
+ my @hashinfo;
+ my $x, $i;
+
+ %data = read_input();
+ @hashinfo = gen_perfect_hash(\%data);
+
+ if (!@hashinfo) {
+ die "$0: no hash found\n";
+ }
+
+ verify_hash_table(\%data, \@hashinfo);
+
+ ($n, $sv, $f1, $f2, $g) = @hashinfo;
+
+ print "static int HASHNAME_fg1[$n] =\n";
+ print "{\n";
+ for ($i = 0; $i < $n; $i++) {
+ print "\t", ${$g}[${$f1}[$i]], "\n";
+ }
+ print "};\n\n";
+
+ print "static int HASHNAME_fg2[$n] =\n";
+ print "{\n";
+ for ($i = 0; $i < $n; $i++) {
+ print "\t", ${$g}[${$f2}[$i]], "\n";
+ }
+ print "};\n\n";
+
+ print "struct p_hash HASHNAME =\n";
+ print "{\n";
+ print "\t$n\n";
+ print "\t$sv\n";
+ print "\tHASHNAME_fg1,\n";
+ print "\tHASHNAME_fg2,\n";
+ print "};\n";
+}
+
+main();
diff --git a/asm/pptok.dat b/asm/pptok.dat
new file mode 100644
index 00000000..b78d138a
--- /dev/null
+++ b/asm/pptok.dat
@@ -0,0 +1,95 @@
+## --------------------------------------------------------------------------
+##
+## Copyright 1996-2010 The NASM Authors - All Rights Reserved
+## See the file AUTHORS included with the NASM distribution for
+## the specific copyright holders.
+##
+## Redistribution and use in source and binary forms, with or without
+## modification, are permitted provided that the following
+## conditions are met:
+##
+## * Redistributions of source code must retain the above copyright
+## notice, this list of conditions and the following disclaimer.
+## * Redistributions in binary form must reproduce the above
+## copyright notice, this list of conditions and the following
+## disclaimer in the documentation and/or other materials provided
+## with the distribution.
+##
+## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+##
+## --------------------------------------------------------------------------
+
+#
+# A * at the end indicates a condition; the list of conditions are
+# on lines starting with *; the negatives are auto-generated
+#
+*
+*ctx
+*def
+*empty
+*env
+*id
+*idn
+*idni
+*macro
+*num
+*str
+*token
+%arg
+%assign
+%clear
+%define
+%defstr
+%deftok
+%depend
+%elif*
+%else
+%endif
+%endm
+%endmacro
+%endrep
+%error
+%exitmacro
+%exitrep
+%fatal
+%iassign
+%idefine
+%idefstr
+%ideftok
+%if*
+%imacro
+%irmacro
+%include
+%ixdefine
+%line
+%local
+%macro
+%pathsearch
+%pop
+%push
+%rep
+%repl
+%rmacro
+%rotate
+%stacksize
+%strcat
+%strlen
+%substr
+%undef
+%unimacro
+%unmacro
+%use
+%warning
+%xdefine
diff --git a/asm/pptok.pl b/asm/pptok.pl
new file mode 100755
index 00000000..203332d6
--- /dev/null
+++ b/asm/pptok.pl
@@ -0,0 +1,271 @@
+#!/usr/bin/perl
+## --------------------------------------------------------------------------
+##
+## Copyright 1996-2009 The NASM Authors - All Rights Reserved
+## See the file AUTHORS included with the NASM distribution for
+## the specific copyright holders.
+##
+## Redistribution and use in source and binary forms, with or without
+## modification, are permitted provided that the following
+## conditions are met:
+##
+## * Redistributions of source code must retain the above copyright
+## notice, this list of conditions and the following disclaimer.
+## * Redistributions in binary form must reproduce the above
+## copyright notice, this list of conditions and the following
+## disclaimer in the documentation and/or other materials provided
+## with the distribution.
+##
+## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+##
+## --------------------------------------------------------------------------
+
+#
+# Produce pptok.c, pptok.h and pptok.ph from pptok.dat
+#
+
+require 'phash.ph';
+
+my($what, $in, $out) = @ARGV;
+
+#
+# Read pptok.dat
+#
+open(IN, "< $in") or die "$0: cannot open: $in\n";
+while (defined($line = <IN>)) {
+ $line =~ s/\r?\n$//; # Remove trailing \r\n or \n
+ $line =~ s/^\s+//; # Remove leading whitespace
+ $line =~ s/\s*\#.*$//; # Remove comments and trailing whitespace
+ next if ($line eq '');
+
+ if ($line =~ /^\%(.*)\*$/) {
+ push(@cctok, $1);
+ } elsif ($line =~ /^\%(.*)$/) {
+ push(@pptok, $1);
+ } elsif ($line =~ /^\*(.*)$/) {
+ push(@cond, $1);
+ }
+}
+close(IN);
+
+@cctok = sort @cctok;
+@cond = sort @cond;
+@pptok = sort @pptok;
+
+# Generate the expanded list including conditionals. The conditionals
+# are at the beginning, padded to a power of 2, with the inverses
+# interspersed; this allows a simple mask to pick out the condition.
+
+while ((scalar @cond) & (scalar @cond)-1) {
+ push(@cond, undef);
+}
+
+@cptok = ();
+foreach $ct (@cctok) {
+ foreach $cc (@cond) {
+ if (defined($cc)) {
+ push(@cptok, $ct.$cc);
+ push(@cptok, $ct.'n'.$cc);
+ } else {
+ push(@cptok, undef, undef);
+ }
+ }
+}
+$first_uncond = $pptok[0];
+@pptok = (@cptok, @pptok);
+
+open(OUT, "> $out") or die "$0: cannot open: $out\n";
+
+#
+# Output pptok.h
+#
+if ($what eq 'h') {
+ print OUT "/* Automatically generated from $in by $0 */\n";
+ print OUT "/* Do not edit */\n";
+ print OUT "\n";
+
+ print OUT "enum preproc_token {\n";
+ $n = 0;
+ foreach $pt (@pptok) {
+ if (defined($pt)) {
+ printf OUT " %-16s = %3d,\n", "PP_\U$pt\E", $n;
+ }
+ $n++;
+ }
+ printf OUT " %-16s = %3d\n", 'PP_INVALID', -1;
+ print OUT "};\n";
+ print OUT "\n";
+
+ print OUT "enum pp_conditional {\n";
+ $n = 0;
+ $c = '';
+ foreach $cc (@cond) {
+ if (defined($cc)) {
+ printf OUT "$c %-16s = %3d", "PPC_IF\U$cc\E", $n;
+ $c = ',';
+ }
+ $n += 2;
+ }
+ print OUT "\n};\n\n";
+
+ printf OUT "#define PP_COND(x) ((enum pp_conditional)((x) & 0x%x))\n",
+ (scalar(@cond)-1) << 1;
+ print OUT "#define PP_IS_COND(x) ((unsigned int)(x) < PP_\U$first_uncond\E)\n";
+ print OUT "#define PP_NEGATIVE(x) ((x) & 1)\n";
+ print OUT "\n";
+
+ foreach $ct (@cctok) {
+ print OUT "#define CASE_PP_\U$ct\E";
+ $pref = " \\\n";
+ foreach $cc (@cond) {
+ if (defined($cc)) {
+ print OUT "$pref\tcase PP_\U${ct}${cc}\E: \\\n";
+ print OUT "\tcase PP_\U${ct}N${cc}\E";
+ $pref = ":\\\n";
+ }
+ }
+ print OUT "\n"; # No colon or newline on the last one
+ }
+}
+
+#
+# Output pptok.c
+#
+if ($what eq 'c') {
+ print OUT "/* Automatically generated from $in by $0 */\n";
+ print OUT "/* Do not edit */\n";
+ print OUT "\n";
+
+ my %tokens = ();
+ my @tokendata = ();
+
+ my $n = 0;
+ foreach $pt (@pptok) {
+ if (defined($pt)) {
+ $tokens{'%'.$pt} = $n;
+ if ($pt =~ /[\@\[\]\\_]/) {
+ # Fail on characters which look like upper-case letters
+ # to the quick-and-dirty downcasing in the prehash
+ # (see below)
+ die "$in: invalid character in token: $pt";
+ }
+ }
+ $n++;
+ }
+
+ my @hashinfo = gen_perfect_hash(\%tokens);
+ if (!@hashinfo) {
+ die "$0: no hash found\n";
+ }
+
+ # Paranoia...
+ verify_hash_table(\%tokens, \@hashinfo);
+
+ ($n, $sv, $g) = @hashinfo;
+ $sv2 = $sv+2;
+
+ die if ($n & ($n-1));
+
+ print OUT "#include \"compiler.h\"\n";
+ print OUT "#include <ctype.h>\n";
+ print OUT "#include \"nasmlib.h\"\n";
+ print OUT "#include \"hashtbl.h\"\n";
+ print OUT "#include \"preproc.h\"\n";
+ print OUT "\n";
+
+ # Note that this is global.
+ printf OUT "const char * const pp_directives[%d] = {\n", scalar(@pptok);
+ foreach $d (@pptok) {
+ if (defined($d)) {
+ print OUT " \"%$d\",\n";
+ } else {
+ print OUT " NULL,\n";
+ }
+ }
+ print OUT "};\n";
+
+ printf OUT "const uint8_t pp_directives_len[%d] = {\n", scalar(@pptok);
+ foreach $d (@pptok) {
+ printf OUT " %d,\n", defined($d) ? length($d)+1 : 0;
+ }
+ print OUT "};\n";
+
+ print OUT "enum preproc_token pp_token_hash(const char *token)\n";
+ print OUT "{\n";
+
+ # Put a large value in unused slots. This makes it extremely unlikely
+ # that any combination that involves unused slot will pass the range test.
+ # This speeds up rejection of unrecognized tokens, i.e. identifiers.
+ print OUT "#define UNUSED (65535/3)\n";
+
+ print OUT " static const int16_t hash1[$n] = {\n";
+ for ($i = 0; $i < $n; $i++) {
+ my $h = ${$g}[$i*2+0];
+ print OUT " ", defined($h) ? $h : 'UNUSED', ",\n";
+ }
+ print OUT " };\n";
+
+ print OUT " static const int16_t hash2[$n] = {\n";
+ for ($i = 0; $i < $n; $i++) {
+ my $h = ${$g}[$i*2+1];
+ print OUT " ", defined($h) ? $h : 'UNUSED', ",\n";
+ }
+ print OUT " };\n";
+
+ print OUT " uint32_t k1, k2;\n";
+ print OUT " uint64_t crc;\n";
+ # For correct overflow behavior, "ix" should be unsigned of the same
+ # width as the hash arrays.
+ print OUT " uint16_t ix;\n";
+ print OUT "\n";
+
+ printf OUT " crc = crc64i(UINT64_C(0x%08x%08x), token);\n",
+ $$sv[0], $$sv[1];
+ print OUT " k1 = (uint32_t)crc;\n";
+ print OUT " k2 = (uint32_t)(crc >> 32);\n";
+ print OUT "\n";
+ printf OUT " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
+ printf OUT " if (ix >= %d)\n", scalar(@pptok);
+ print OUT " return PP_INVALID;\n";
+ print OUT "\n";
+
+ print OUT " if (!pp_directives[ix] || nasm_stricmp(pp_directives[ix], token))\n";
+ print OUT " return PP_INVALID;\n";
+ print OUT "\n";
+ print OUT " return ix;\n";
+ print OUT "}\n";
+}
+
+#
+# Output pptok.ph
+#
+if ($what eq 'ph') {
+ print OUT "# Automatically generated from $in by $0\n";
+ print OUT "# Do not edit\n";
+ print OUT "\n";
+
+ print OUT "%pptok_hash = (\n";
+ $n = 0;
+ foreach $tok (@pptok) {
+ if (defined($tok)) {
+ printf OUT " '%%%s' => %d,\n", $tok, $n;
+ }
+ $n++;
+ }
+ print OUT ");\n";
+ print OUT "1;\n";
+}
+
+
diff --git a/asm/preproc-nop.c b/asm/preproc-nop.c
new file mode 100644
index 00000000..1cb14457
--- /dev/null
+++ b/asm/preproc-nop.c
@@ -0,0 +1,186 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * This is a null preprocessor which just copies lines from input
+ * to output. It's used when someone explicitly requests that NASM
+ * not preprocess their source file.
+ */
+
+#include "compiler.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <time.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "preproc.h"
+#include "listing.h"
+
+#define BUF_DELTA 512
+
+static FILE *nop_fp;
+static int32_t nop_lineinc;
+
+static void nop_reset(char *file, int pass, StrList **deplist)
+{
+ src_set(0, file);
+ nop_lineinc = 1;
+ nop_fp = nasm_open_read(file, NF_TEXT);
+
+ if (!nop_fp)
+ nasm_fatal(ERR_NOFILE, "unable to open input file `%s'", file);
+ (void)pass; /* placate compilers */
+
+ if (deplist) {
+ StrList *sl = nasm_malloc(strlen(file)+1+sizeof sl->next);
+ sl->next = NULL;
+ strcpy(sl->str, file);
+ *deplist = sl;
+ }
+}
+
+static char *nop_getline(void)
+{
+ char *buffer, *p, *q;
+ int bufsize;
+
+ bufsize = BUF_DELTA;
+ buffer = nasm_malloc(BUF_DELTA);
+ src_set_linnum(src_get_linnum() + nop_lineinc);
+
+ while (1) { /* Loop to handle %line */
+
+ p = buffer;
+ while (1) { /* Loop to handle long lines */
+ q = fgets(p, bufsize - (p - buffer), nop_fp);
+ if (!q)
+ break;
+ p += strlen(p);
+ if (p > buffer && p[-1] == '\n')
+ break;
+ if (p - buffer > bufsize - 10) {
+ int offset;
+ offset = p - buffer;
+ bufsize += BUF_DELTA;
+ buffer = nasm_realloc(buffer, bufsize);
+ p = buffer + offset;
+ }
+ }
+
+ if (!q && p == buffer) {
+ nasm_free(buffer);
+ return NULL;
+ }
+
+ /*
+ * Play safe: remove CRs, LFs and any spurious ^Zs, if any of
+ * them are present at the end of the line.
+ */
+ buffer[strcspn(buffer, "\r\n\032")] = '\0';
+
+ if (!nasm_strnicmp(buffer, "%line", 5)) {
+ int32_t ln;
+ int li;
+ char *nm = nasm_malloc(strlen(buffer));
+ if (sscanf(buffer + 5, "%"PRId32"+%d %s", &ln, &li, nm) == 3) {
+ src_set(ln, nm);
+ nop_lineinc = li;
+ nasm_free(nm);
+ continue;
+ }
+ nasm_free(nm);
+ }
+ break;
+ }
+
+ lfmt->line(LIST_READ, buffer);
+
+ return buffer;
+}
+
+static void nop_cleanup(int pass)
+{
+ (void)pass; /* placate GCC */
+ if (nop_fp) {
+ fclose(nop_fp);
+ nop_fp = NULL;
+ }
+}
+
+static void nop_extra_stdmac(macros_t *macros)
+{
+ (void)macros;
+}
+
+static void nop_pre_define(char *definition)
+{
+ (void)definition;
+}
+
+static void nop_pre_undefine(char *definition)
+{
+ (void)definition;
+}
+
+static void nop_pre_include(char *fname)
+{
+ (void)fname;
+}
+
+static void nop_include_path(char *path)
+{
+ (void)path;
+}
+
+static void nop_error_list_macros(int severity)
+{
+ (void)severity;
+}
+
+const struct preproc_ops preproc_nop = {
+ nop_reset,
+ nop_getline,
+ nop_cleanup,
+ nop_extra_stdmac,
+ nop_pre_define,
+ nop_pre_undefine,
+ nop_pre_include,
+ nop_include_path,
+ nop_error_list_macros,
+};
diff --git a/asm/preproc.c b/asm/preproc.c
new file mode 100644
index 00000000..e36ae9c9
--- /dev/null
+++ b/asm/preproc.c
@@ -0,0 +1,5295 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * preproc.c macro preprocessor for the Netwide Assembler
+ */
+
+/* Typical flow of text through preproc
+ *
+ * pp_getline gets tokenized lines, either
+ *
+ * from a macro expansion
+ *
+ * or
+ * {
+ * read_line gets raw text from stdmacpos, or predef, or current input file
+ * tokenize converts to tokens
+ * }
+ *
+ * expand_mmac_params is used to expand %1 etc., unless a macro is being
+ * defined or a false conditional is being processed
+ * (%0, %1, %+1, %-1, %%foo
+ *
+ * do_directive checks for directives
+ *
+ * expand_smacro is used to expand single line macros
+ *
+ * expand_mmacro is used to expand multi-line macros
+ *
+ * detoken is used to convert the line back to text
+ */
+
+#include "compiler.h"
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "preproc.h"
+#include "hashtbl.h"
+#include "quote.h"
+#include "stdscan.h"
+#include "eval.h"
+#include "tokens.h"
+#include "tables.h"
+#include "listing.h"
+
+typedef struct SMacro SMacro;
+typedef struct MMacro MMacro;
+typedef struct MMacroInvocation MMacroInvocation;
+typedef struct Context Context;
+typedef struct Token Token;
+typedef struct Blocks Blocks;
+typedef struct Line Line;
+typedef struct Include Include;
+typedef struct Cond Cond;
+typedef struct IncPath IncPath;
+
+/*
+ * Note on the storage of both SMacro and MMacros: the hash table
+ * indexes them case-insensitively, and we then have to go through a
+ * linked list of potential case aliases (and, for MMacros, parameter
+ * ranges); this is to preserve the matching semantics of the earlier
+ * code. If the number of case aliases for a specific macro is a
+ * performance issue, you may want to reconsider your coding style.
+ */
+
+/*
+ * Store the definition of a single-line macro.
+ */
+struct SMacro {
+ SMacro *next;
+ char *name;
+ bool casesense;
+ bool in_progress;
+ unsigned int nparam;
+ Token *expansion;
+};
+
+/*
+ * Store the definition of a multi-line macro. This is also used to
+ * store the interiors of `%rep...%endrep' blocks, which are
+ * effectively self-re-invoking multi-line macros which simply
+ * don't have a name or bother to appear in the hash tables. %rep
+ * blocks are signified by having a NULL `name' field.
+ *
+ * In a MMacro describing a `%rep' block, the `in_progress' field
+ * isn't merely boolean, but gives the number of repeats left to
+ * run.
+ *
+ * The `next' field is used for storing MMacros in hash tables; the
+ * `next_active' field is for stacking them on istk entries.
+ *
+ * When a MMacro is being expanded, `params', `iline', `nparam',
+ * `paramlen', `rotate' and `unique' are local to the invocation.
+ */
+struct MMacro {
+ MMacro *next;
+ MMacroInvocation *prev; /* previous invocation */
+ char *name;
+ int nparam_min, nparam_max;
+ bool casesense;
+ bool plus; /* is the last parameter greedy? */
+ bool nolist; /* is this macro listing-inhibited? */
+ int64_t in_progress; /* is this macro currently being expanded? */
+ int32_t max_depth; /* maximum number of recursive expansions allowed */
+ Token *dlist; /* All defaults as one list */
+ Token **defaults; /* Parameter default pointers */
+ int ndefs; /* number of default parameters */
+ Line *expansion;
+
+ MMacro *next_active;
+ MMacro *rep_nest; /* used for nesting %rep */
+ Token **params; /* actual parameters */
+ Token *iline; /* invocation line */
+ unsigned int nparam, rotate;
+ int *paramlen;
+ uint64_t unique;
+ int lineno; /* Current line number on expansion */
+ uint64_t condcnt; /* number of if blocks... */
+
+ const char *fname; /* File where defined */
+ int32_t xline; /* First line in macro */
+};
+
+
+/* Store the definition of a multi-line macro, as defined in a
+ * previous recursive macro expansion.
+ */
+struct MMacroInvocation {
+ MMacroInvocation *prev; /* previous invocation */
+ Token **params; /* actual parameters */
+ Token *iline; /* invocation line */
+ unsigned int nparam, rotate;
+ int *paramlen;
+ uint64_t unique;
+ uint64_t condcnt;
+};
+
+
+/*
+ * The context stack is composed of a linked list of these.
+ */
+struct Context {
+ Context *next;
+ char *name;
+ struct hash_table localmac;
+ uint32_t number;
+};
+
+/*
+ * This is the internal form which we break input lines up into.
+ * Typically stored in linked lists.
+ *
+ * Note that `type' serves a double meaning: TOK_SMAC_PARAM is not
+ * necessarily used as-is, but is intended to denote the number of
+ * the substituted parameter. So in the definition
+ *
+ * %define a(x,y) ( (x) & ~(y) )
+ *
+ * the token representing `x' will have its type changed to
+ * TOK_SMAC_PARAM, but the one representing `y' will be
+ * TOK_SMAC_PARAM+1.
+ *
+ * TOK_INTERNAL_STRING is a dirty hack: it's a single string token
+ * which doesn't need quotes around it. Used in the pre-include
+ * mechanism as an alternative to trying to find a sensible type of
+ * quote to use on the filename we were passed.
+ */
+enum pp_token_type {
+ TOK_NONE = 0, TOK_WHITESPACE, TOK_COMMENT, TOK_ID,
+ TOK_PREPROC_ID, TOK_STRING,
+ TOK_NUMBER, TOK_FLOAT, TOK_SMAC_END, TOK_OTHER,
+ TOK_INTERNAL_STRING,
+ TOK_PREPROC_Q, TOK_PREPROC_QQ,
+ TOK_PASTE, /* %+ */
+ TOK_INDIRECT, /* %[...] */
+ TOK_SMAC_PARAM, /* MUST BE LAST IN THE LIST!!! */
+ TOK_MAX = INT_MAX /* Keep compiler from reducing the range */
+};
+
+#define PP_CONCAT_MASK(x) (1 << (x))
+#define PP_CONCAT_MATCH(t, mask) (PP_CONCAT_MASK((t)->type) & mask)
+
+struct tokseq_match {
+ int mask_head;
+ int mask_tail;
+};
+
+struct Token {
+ Token *next;
+ char *text;
+ union {
+ SMacro *mac; /* associated macro for TOK_SMAC_END */
+ size_t len; /* scratch length field */
+ } a; /* Auxiliary data */
+ enum pp_token_type type;
+};
+
+/*
+ * Multi-line macro definitions are stored as a linked list of
+ * these, which is essentially a container to allow several linked
+ * lists of Tokens.
+ *
+ * Note that in this module, linked lists are treated as stacks
+ * wherever possible. For this reason, Lines are _pushed_ on to the
+ * `expansion' field in MMacro structures, so that the linked list,
+ * if walked, would give the macro lines in reverse order; this
+ * means that we can walk the list when expanding a macro, and thus
+ * push the lines on to the `expansion' field in _istk_ in reverse
+ * order (so that when popped back off they are in the right
+ * order). It may seem cockeyed, and it relies on my design having
+ * an even number of steps in, but it works...
+ *
+ * Some of these structures, rather than being actual lines, are
+ * markers delimiting the end of the expansion of a given macro.
+ * This is for use in the cycle-tracking and %rep-handling code.
+ * Such structures have `finishes' non-NULL, and `first' NULL. All
+ * others have `finishes' NULL, but `first' may still be NULL if
+ * the line is blank.
+ */
+struct Line {
+ Line *next;
+ MMacro *finishes;
+ Token *first;
+};
+
+/*
+ * To handle an arbitrary level of file inclusion, we maintain a
+ * stack (ie linked list) of these things.
+ */
+struct Include {
+ Include *next;
+ FILE *fp;
+ Cond *conds;
+ Line *expansion;
+ const char *fname;
+ int lineno, lineinc;
+ MMacro *mstk; /* stack of active macros/reps */
+};
+
+/*
+ * Include search path. This is simply a list of strings which get
+ * prepended, in turn, to the name of an include file, in an
+ * attempt to find the file if it's not in the current directory.
+ */
+struct IncPath {
+ IncPath *next;
+ char *path;
+};
+
+/*
+ * Conditional assembly: we maintain a separate stack of these for
+ * each level of file inclusion. (The only reason we keep the
+ * stacks separate is to ensure that a stray `%endif' in a file
+ * included from within the true branch of a `%if' won't terminate
+ * it and cause confusion: instead, rightly, it'll cause an error.)
+ */
+struct Cond {
+ Cond *next;
+ int state;
+};
+enum {
+ /*
+ * These states are for use just after %if or %elif: IF_TRUE
+ * means the condition has evaluated to truth so we are
+ * currently emitting, whereas IF_FALSE means we are not
+ * currently emitting but will start doing so if a %else comes
+ * up. In these states, all directives are admissible: %elif,
+ * %else and %endif. (And of course %if.)
+ */
+ COND_IF_TRUE, COND_IF_FALSE,
+ /*
+ * These states come up after a %else: ELSE_TRUE means we're
+ * emitting, and ELSE_FALSE means we're not. In ELSE_* states,
+ * any %elif or %else will cause an error.
+ */
+ COND_ELSE_TRUE, COND_ELSE_FALSE,
+ /*
+ * These states mean that we're not emitting now, and also that
+ * nothing until %endif will be emitted at all. COND_DONE is
+ * used when we've had our moment of emission
+ * and have now started seeing %elifs. COND_NEVER is used when
+ * the condition construct in question is contained within a
+ * non-emitting branch of a larger condition construct,
+ * or if there is an error.
+ */
+ COND_DONE, COND_NEVER
+};
+#define emitting(x) ( (x) == COND_IF_TRUE || (x) == COND_ELSE_TRUE )
+
+/*
+ * These defines are used as the possible return values for do_directive
+ */
+#define NO_DIRECTIVE_FOUND 0
+#define DIRECTIVE_FOUND 1
+
+/*
+ * This define sets the upper limit for smacro and recursive mmacro
+ * expansions
+ */
+#define DEADMAN_LIMIT (1 << 20)
+
+/* max reps */
+#define REP_LIMIT ((INT64_C(1) << 62))
+
+/*
+ * Condition codes. Note that we use c_ prefix not C_ because C_ is
+ * used in nasm.h for the "real" condition codes. At _this_ level,
+ * we treat CXZ and ECXZ as condition codes, albeit non-invertible
+ * ones, so we need a different enum...
+ */
+static const char * const conditions[] = {
+ "a", "ae", "b", "be", "c", "cxz", "e", "ecxz", "g", "ge", "l", "le",
+ "na", "nae", "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no",
+ "np", "ns", "nz", "o", "p", "pe", "po", "rcxz", "s", "z"
+};
+enum pp_conds {
+ c_A, c_AE, c_B, c_BE, c_C, c_CXZ, c_E, c_ECXZ, c_G, c_GE, c_L, c_LE,
+ c_NA, c_NAE, c_NB, c_NBE, c_NC, c_NE, c_NG, c_NGE, c_NL, c_NLE, c_NO,
+ c_NP, c_NS, c_NZ, c_O, c_P, c_PE, c_PO, c_RCXZ, c_S, c_Z,
+ c_none = -1
+};
+static const enum pp_conds inverse_ccs[] = {
+ c_NA, c_NAE, c_NB, c_NBE, c_NC, -1, c_NE, -1, c_NG, c_NGE, c_NL, c_NLE,
+ c_A, c_AE, c_B, c_BE, c_C, c_E, c_G, c_GE, c_L, c_LE, c_O, c_P, c_S,
+ c_Z, c_NO, c_NP, c_PO, c_PE, -1, c_NS, c_NZ
+};
+
+/*
+ * Directive names.
+ */
+/* If this is a an IF, ELIF, ELSE or ENDIF keyword */
+static int is_condition(enum preproc_token arg)
+{
+ return PP_IS_COND(arg) || (arg == PP_ELSE) || (arg == PP_ENDIF);
+}
+
+/* For TASM compatibility we need to be able to recognise TASM compatible
+ * conditional compilation directives. Using the NASM pre-processor does
+ * not work, so we look for them specifically from the following list and
+ * then jam in the equivalent NASM directive into the input stream.
+ */
+
+enum {
+ TM_ARG, TM_ELIF, TM_ELSE, TM_ENDIF, TM_IF, TM_IFDEF, TM_IFDIFI,
+ TM_IFNDEF, TM_INCLUDE, TM_LOCAL
+};
+
+static const char * const tasm_directives[] = {
+ "arg", "elif", "else", "endif", "if", "ifdef", "ifdifi",
+ "ifndef", "include", "local"
+};
+
+static int StackSize = 4;
+static const char *StackPointer = "ebp";
+static int ArgOffset = 8;
+static int LocalOffset = 0;
+
+static Context *cstk;
+static Include *istk;
+static IncPath *ipath = NULL;
+
+static int pass; /* HACK: pass 0 = generate dependencies only */
+static StrList **dephead, **deptail; /* Dependency list */
+
+static uint64_t unique; /* unique identifier numbers */
+
+static Line *predef = NULL;
+static bool do_predef;
+
+/*
+ * The current set of multi-line macros we have defined.
+ */
+static struct hash_table mmacros;
+
+/*
+ * The current set of single-line macros we have defined.
+ */
+static struct hash_table smacros;
+
+/*
+ * The multi-line macro we are currently defining, or the %rep
+ * block we are currently reading, if any.
+ */
+static MMacro *defining;
+
+static uint64_t nested_mac_count;
+static uint64_t nested_rep_count;
+
+/*
+ * The number of macro parameters to allocate space for at a time.
+ */
+#define PARAM_DELTA 16
+
+/*
+ * The standard macro set: defined in macros.c in the array nasm_stdmac.
+ * This gives our position in the macro set, when we're processing it.
+ */
+static macros_t *stdmacpos;
+
+/*
+ * The extra standard macros that come from the object format, if
+ * any.
+ */
+static macros_t *extrastdmac = NULL;
+static bool any_extrastdmac;
+
+/*
+ * Tokens are allocated in blocks to improve speed
+ */
+#define TOKEN_BLOCKSIZE 4096
+static Token *freeTokens = NULL;
+struct Blocks {
+ Blocks *next;
+ void *chunk;
+};
+
+static Blocks blocks = { NULL, NULL };
+
+/*
+ * Forward declarations.
+ */
+static Token *expand_mmac_params(Token * tline);
+static Token *expand_smacro(Token * tline);
+static Token *expand_id(Token * tline);
+static Context *get_ctx(const char *name, const char **namep);
+static void make_tok_num(Token * tok, int64_t val);
+static void pp_verror(int severity, const char *fmt, va_list ap);
+static vefunc real_verror;
+static void *new_Block(size_t size);
+static void delete_Blocks(void);
+static Token *new_Token(Token * next, enum pp_token_type type,
+ const char *text, int txtlen);
+static Token *delete_Token(Token * t);
+
+/*
+ * Macros for safe checking of token pointers, avoid *(NULL)
+ */
+#define tok_type_(x,t) ((x) && (x)->type == (t))
+#define skip_white_(x) if (tok_type_((x), TOK_WHITESPACE)) (x)=(x)->next
+#define tok_is_(x,v) (tok_type_((x), TOK_OTHER) && !strcmp((x)->text,(v)))
+#define tok_isnt_(x,v) ((x) && ((x)->type!=TOK_OTHER || strcmp((x)->text,(v))))
+
+/*
+ * nasm_unquote with error if the string contains NUL characters.
+ * If the string contains NUL characters, issue an error and return
+ * the C len, i.e. truncate at the NUL.
+ */
+static size_t nasm_unquote_cstr(char *qstr, enum preproc_token directive)
+{
+ size_t len = nasm_unquote(qstr, NULL);
+ size_t clen = strlen(qstr);
+
+ if (len != clen)
+ nasm_error(ERR_NONFATAL, "NUL character in `%s' directive",
+ pp_directives[directive]);
+
+ return clen;
+}
+
+/*
+ * In-place reverse a list of tokens.
+ */
+static Token *reverse_tokens(Token *t)
+{
+ Token *prev = NULL;
+ Token *next;
+
+ while (t) {
+ next = t->next;
+ t->next = prev;
+ prev = t;
+ t = next;
+ }
+
+ return prev;
+}
+
+/*
+ * Handle TASM specific directives, which do not contain a % in
+ * front of them. We do it here because I could not find any other
+ * place to do it for the moment, and it is a hack (ideally it would
+ * be nice to be able to use the NASM pre-processor to do it).
+ */
+static char *check_tasm_directive(char *line)
+{
+ int32_t i, j, k, m, len;
+ char *p, *q, *oldline, oldchar;
+
+ p = nasm_skip_spaces(line);
+
+ /* Binary search for the directive name */
+ i = -1;
+ j = ARRAY_SIZE(tasm_directives);
+ q = nasm_skip_word(p);
+ len = q - p;
+ if (len) {
+ oldchar = p[len];
+ p[len] = 0;
+ while (j - i > 1) {
+ k = (j + i) / 2;
+ m = nasm_stricmp(p, tasm_directives[k]);
+ if (m == 0) {
+ /* We have found a directive, so jam a % in front of it
+ * so that NASM will then recognise it as one if it's own.
+ */
+ p[len] = oldchar;
+ len = strlen(p);
+ oldline = line;
+ line = nasm_malloc(len + 2);
+ line[0] = '%';
+ if (k == TM_IFDIFI) {
+ /*
+ * NASM does not recognise IFDIFI, so we convert
+ * it to %if 0. This is not used in NASM
+ * compatible code, but does need to parse for the
+ * TASM macro package.
+ */
+ strcpy(line + 1, "if 0");
+ } else {
+ memcpy(line + 1, p, len + 1);
+ }
+ nasm_free(oldline);
+ return line;
+ } else if (m < 0) {
+ j = k;
+ } else
+ i = k;
+ }
+ p[len] = oldchar;
+ }
+ return line;
+}
+
+/*
+ * The pre-preprocessing stage... This function translates line
+ * number indications as they emerge from GNU cpp (`# lineno "file"
+ * flags') into NASM preprocessor line number indications (`%line
+ * lineno file').
+ */
+static char *prepreproc(char *line)
+{
+ int lineno, fnlen;
+ char *fname, *oldline;
+
+ if (line[0] == '#' && line[1] == ' ') {
+ oldline = line;
+ fname = oldline + 2;
+ lineno = atoi(fname);
+ fname += strspn(fname, "0123456789 ");
+ if (*fname == '"')
+ fname++;
+ fnlen = strcspn(fname, "\"");
+ line = nasm_malloc(20 + fnlen);
+ snprintf(line, 20 + fnlen, "%%line %d %.*s", lineno, fnlen, fname);
+ nasm_free(oldline);
+ }
+ if (tasm_compatible_mode)
+ return check_tasm_directive(line);
+ return line;
+}
+
+/*
+ * Free a linked list of tokens.
+ */
+static void free_tlist(Token * list)
+{
+ while (list)
+ list = delete_Token(list);
+}
+
+/*
+ * Free a linked list of lines.
+ */
+static void free_llist(Line * list)
+{
+ Line *l, *tmp;
+ list_for_each_safe(l, tmp, list) {
+ free_tlist(l->first);
+ nasm_free(l);
+ }
+}
+
+/*
+ * Free an MMacro
+ */
+static void free_mmacro(MMacro * m)
+{
+ nasm_free(m->name);
+ free_tlist(m->dlist);
+ nasm_free(m->defaults);
+ free_llist(m->expansion);
+ nasm_free(m);
+}
+
+/*
+ * Free all currently defined macros, and free the hash tables
+ */
+static void free_smacro_table(struct hash_table *smt)
+{
+ SMacro *s, *tmp;
+ const char *key;
+ struct hash_tbl_node *it = NULL;
+
+ while ((s = hash_iterate(smt, &it, &key)) != NULL) {
+ nasm_free((void *)key);
+ list_for_each_safe(s, tmp, s) {
+ nasm_free(s->name);
+ free_tlist(s->expansion);
+ nasm_free(s);
+ }
+ }
+ hash_free(smt);
+}
+
+static void free_mmacro_table(struct hash_table *mmt)
+{
+ MMacro *m, *tmp;
+ const char *key;
+ struct hash_tbl_node *it = NULL;
+
+ it = NULL;
+ while ((m = hash_iterate(mmt, &it, &key)) != NULL) {
+ nasm_free((void *)key);
+ list_for_each_safe(m ,tmp, m)
+ free_mmacro(m);
+ }
+ hash_free(mmt);
+}
+
+static void free_macros(void)
+{
+ free_smacro_table(&smacros);
+ free_mmacro_table(&mmacros);
+}
+
+/*
+ * Initialize the hash tables
+ */
+static void init_macros(void)
+{
+ hash_init(&smacros, HASH_LARGE);
+ hash_init(&mmacros, HASH_LARGE);
+}
+
+/*
+ * Pop the context stack.
+ */
+static void ctx_pop(void)
+{
+ Context *c = cstk;
+
+ cstk = cstk->next;
+ free_smacro_table(&c->localmac);
+ nasm_free(c->name);
+ nasm_free(c);
+}
+
+/*
+ * Search for a key in the hash index; adding it if necessary
+ * (in which case we initialize the data pointer to NULL.)
+ */
+static void **
+hash_findi_add(struct hash_table *hash, const char *str)
+{
+ struct hash_insert hi;
+ void **r;
+ char *strx;
+
+ r = hash_findi(hash, str, &hi);
+ if (r)
+ return r;
+
+ strx = nasm_strdup(str); /* Use a more efficient allocator here? */
+ return hash_add(&hi, strx, NULL);
+}
+
+/*
+ * Like hash_findi, but returns the data element rather than a pointer
+ * to it. Used only when not adding a new element, hence no third
+ * argument.
+ */
+static void *
+hash_findix(struct hash_table *hash, const char *str)
+{
+ void **p;
+
+ p = hash_findi(hash, str, NULL);
+ return p ? *p : NULL;
+}
+
+/*
+ * read line from standart macros set,
+ * if there no more left -- return NULL
+ */
+static char *line_from_stdmac(void)
+{
+ unsigned char c;
+ const unsigned char *p = stdmacpos;
+ char *line, *q;
+ size_t len = 0;
+
+ if (!stdmacpos)
+ return NULL;
+
+ while ((c = *p++)) {
+ if (c >= 0x80)
+ len += pp_directives_len[c - 0x80] + 1;
+ else
+ len++;
+ }
+
+ line = nasm_malloc(len + 1);
+ q = line;
+ while ((c = *stdmacpos++)) {
+ if (c >= 0x80) {
+ memcpy(q, pp_directives[c - 0x80], pp_directives_len[c - 0x80]);
+ q += pp_directives_len[c - 0x80];
+ *q++ = ' ';
+ } else {
+ *q++ = c;
+ }
+ }
+ stdmacpos = p;
+ *q = '\0';
+
+ if (!*stdmacpos) {
+ /* This was the last of the standard macro chain... */
+ stdmacpos = NULL;
+ if (any_extrastdmac) {
+ stdmacpos = extrastdmac;
+ any_extrastdmac = false;
+ } else if (do_predef) {
+ Line *pd, *l;
+ Token *head, **tail, *t;
+
+ /*
+ * Nasty hack: here we push the contents of
+ * `predef' on to the top-level expansion stack,
+ * since this is the most convenient way to
+ * implement the pre-include and pre-define
+ * features.
+ */
+ list_for_each(pd, predef) {
+ head = NULL;
+ tail = &head;
+ list_for_each(t, pd->first) {
+ *tail = new_Token(NULL, t->type, t->text, 0);
+ tail = &(*tail)->next;
+ }
+
+ l = nasm_malloc(sizeof(Line));
+ l->next = istk->expansion;
+ l->first = head;
+ l->finishes = NULL;
+
+ istk->expansion = l;
+ }
+ do_predef = false;
+ }
+ }
+
+ return line;
+}
+
+static char *read_line(void)
+{
+ unsigned int size, c, next;
+ const unsigned int delta = 512;
+ const unsigned int pad = 8;
+ unsigned int nr_cont = 0;
+ bool cont = false;
+ char *buffer, *p;
+
+ /* Standart macros set (predefined) goes first */
+ p = line_from_stdmac();
+ if (p)
+ return p;
+
+ size = delta;
+ p = buffer = nasm_malloc(size);
+
+ for (;;) {
+ c = fgetc(istk->fp);
+ if ((int)(c) == EOF) {
+ p[0] = 0;
+ break;
+ }
+
+ switch (c) {
+ case '\r':
+ next = fgetc(istk->fp);
+ if (next != '\n')
+ ungetc(next, istk->fp);
+ if (cont) {
+ cont = false;
+ continue;
+ }
+ break;
+
+ case '\n':
+ if (cont) {
+ cont = false;
+ continue;
+ }
+ break;
+
+ case '\\':
+ next = fgetc(istk->fp);
+ ungetc(next, istk->fp);
+ if (next == '\r' || next == '\n') {
+ cont = true;
+ nr_cont++;
+ continue;
+ }
+ break;
+ }
+
+ if (c == '\r' || c == '\n') {
+ *p++ = 0;
+ break;
+ }
+
+ if (p >= (buffer + size - pad)) {
+ buffer = nasm_realloc(buffer, size + delta);
+ p = buffer + size - pad;
+ size += delta;
+ }
+
+ *p++ = (unsigned char)c;
+ }
+
+ if (p == buffer) {
+ nasm_free(buffer);
+ return NULL;
+ }
+
+ src_set_linnum(src_get_linnum() + istk->lineinc +
+ (nr_cont * istk->lineinc));
+
+ /*
+ * Handle spurious ^Z, which may be inserted into source files
+ * by some file transfer utilities.
+ */
+ buffer[strcspn(buffer, "\032")] = '\0';
+
+ lfmt->line(LIST_READ, buffer);
+
+ return buffer;
+}
+
+/*
+ * Tokenize a line of text. This is a very simple process since we
+ * don't need to parse the value out of e.g. numeric tokens: we
+ * simply split one string into many.
+ */
+static Token *tokenize(char *line)
+{
+ char c, *p = line;
+ enum pp_token_type type;
+ Token *list = NULL;
+ Token *t, **tail = &list;
+
+ while (*line) {
+ p = line;
+ if (*p == '%') {
+ p++;
+ if (*p == '+' && !nasm_isdigit(p[1])) {
+ p++;
+ type = TOK_PASTE;
+ } else if (nasm_isdigit(*p) ||
+ ((*p == '-' || *p == '+') && nasm_isdigit(p[1]))) {
+ do {
+ p++;
+ }
+ while (nasm_isdigit(*p));
+ type = TOK_PREPROC_ID;
+ } else if (*p == '{') {
+ p++;
+ while (*p) {
+ if (*p == '}')
+ break;
+ p[-1] = *p;
+ p++;
+ }
+ if (*p != '}')
+ nasm_error(ERR_WARNING | ERR_PASS1,
+ "unterminated %%{ construct");
+ p[-1] = '\0';
+ if (*p)
+ p++;
+ type = TOK_PREPROC_ID;
+ } else if (*p == '[') {
+ int lvl = 1;
+ line += 2; /* Skip the leading %[ */
+ p++;
+ while (lvl && (c = *p++)) {
+ switch (c) {
+ case ']':
+ lvl--;
+ break;
+ case '%':
+ if (*p == '[')
+ lvl++;
+ break;
+ case '\'':
+ case '\"':
+ case '`':
+ p = nasm_skip_string(p - 1) + 1;
+ break;
+ default:
+ break;
+ }
+ }
+ p--;
+ if (*p)
+ *p++ = '\0';
+ if (lvl)
+ nasm_error(ERR_NONFATAL|ERR_PASS1,
+ "unterminated %%[ construct");
+ type = TOK_INDIRECT;
+ } else if (*p == '?') {
+ type = TOK_PREPROC_Q; /* %? */
+ p++;
+ if (*p == '?') {
+ type = TOK_PREPROC_QQ; /* %?? */
+ p++;
+ }
+ } else if (*p == '!') {
+ type = TOK_PREPROC_ID;
+ p++;
+ if (isidchar(*p)) {
+ do {
+ p++;
+ }
+ while (isidchar(*p));
+ } else if (*p == '\'' || *p == '\"' || *p == '`') {
+ p = nasm_skip_string(p);
+ if (*p)
+ p++;
+ else
+ nasm_error(ERR_NONFATAL|ERR_PASS1,
+ "unterminated %%! string");
+ } else {
+ /* %! without string or identifier */
+ type = TOK_OTHER; /* Legacy behavior... */
+ }
+ } else if (isidchar(*p) ||
+ ((*p == '!' || *p == '%' || *p == '$') &&
+ isidchar(p[1]))) {
+ do {
+ p++;
+ }
+ while (isidchar(*p));
+ type = TOK_PREPROC_ID;
+ } else {
+ type = TOK_OTHER;
+ if (*p == '%')
+ p++;
+ }
+ } else if (isidstart(*p) || (*p == '$' && isidstart(p[1]))) {
+ type = TOK_ID;
+ p++;
+ while (*p && isidchar(*p))
+ p++;
+ } else if (*p == '\'' || *p == '"' || *p == '`') {
+ /*
+ * A string token.
+ */
+ type = TOK_STRING;
+ p = nasm_skip_string(p);
+
+ if (*p) {
+ p++;
+ } else {
+ nasm_error(ERR_WARNING|ERR_PASS1, "unterminated string");
+ /* Handling unterminated strings by UNV */
+ /* type = -1; */
+ }
+ } else if (p[0] == '$' && p[1] == '$') {
+ type = TOK_OTHER; /* TOKEN_BASE */
+ p += 2;
+ } else if (isnumstart(*p)) {
+ bool is_hex = false;
+ bool is_float = false;
+ bool has_e = false;
+ char c, *r;
+
+ /*
+ * A numeric token.
+ */
+
+ if (*p == '$') {
+ p++;
+ is_hex = true;
+ }
+
+ for (;;) {
+ c = *p++;
+
+ if (!is_hex && (c == 'e' || c == 'E')) {
+ has_e = true;
+ if (*p == '+' || *p == '-') {
+ /*
+ * e can only be followed by +/- if it is either a
+ * prefixed hex number or a floating-point number
+ */
+ p++;
+ is_float = true;
+ }
+ } else if (c == 'H' || c == 'h' || c == 'X' || c == 'x') {
+ is_hex = true;
+ } else if (c == 'P' || c == 'p') {
+ is_float = true;
+ if (*p == '+' || *p == '-')
+ p++;
+ } else if (isnumchar(c) || c == '_')
+ ; /* just advance */
+ else if (c == '.') {
+ /*
+ * we need to deal with consequences of the legacy
+ * parser, like "1.nolist" being two tokens
+ * (TOK_NUMBER, TOK_ID) here; at least give it
+ * a shot for now. In the future, we probably need
+ * a flex-based scanner with proper pattern matching
+ * to do it as well as it can be done. Nothing in
+ * the world is going to help the person who wants
+ * 0x123.p16 interpreted as two tokens, though.
+ */
+ r = p;
+ while (*r == '_')
+ r++;
+
+ if (nasm_isdigit(*r) || (is_hex && nasm_isxdigit(*r)) ||
+ (!is_hex && (*r == 'e' || *r == 'E')) ||
+ (*r == 'p' || *r == 'P')) {
+ p = r;
+ is_float = true;
+ } else
+ break; /* Terminate the token */
+ } else
+ break;
+ }
+ p--; /* Point to first character beyond number */
+
+ if (p == line+1 && *line == '$') {
+ type = TOK_OTHER; /* TOKEN_HERE */
+ } else {
+ if (has_e && !is_hex) {
+ /* 1e13 is floating-point, but 1e13h is not */
+ is_float = true;
+ }
+
+ type = is_float ? TOK_FLOAT : TOK_NUMBER;
+ }
+ } else if (nasm_isspace(*p)) {
+ type = TOK_WHITESPACE;
+ p = nasm_skip_spaces(p);
+ /*
+ * Whitespace just before end-of-line is discarded by
+ * pretending it's a comment; whitespace just before a
+ * comment gets lumped into the comment.
+ */
+ if (!*p || *p == ';') {
+ type = TOK_COMMENT;
+ while (*p)
+ p++;
+ }
+ } else if (*p == ';') {
+ type = TOK_COMMENT;
+ while (*p)
+ p++;
+ } else {
+ /*
+ * Anything else is an operator of some kind. We check
+ * for all the double-character operators (>>, <<, //,
+ * %%, <=, >=, ==, !=, <>, &&, ||, ^^), but anything
+ * else is a single-character operator.
+ */
+ type = TOK_OTHER;
+ if ((p[0] == '>' && p[1] == '>') ||
+ (p[0] == '<' && p[1] == '<') ||
+ (p[0] == '/' && p[1] == '/') ||
+ (p[0] == '<' && p[1] == '=') ||
+ (p[0] == '>' && p[1] == '=') ||
+ (p[0] == '=' && p[1] == '=') ||
+ (p[0] == '!' && p[1] == '=') ||
+ (p[0] == '<' && p[1] == '>') ||
+ (p[0] == '&' && p[1] == '&') ||
+ (p[0] == '|' && p[1] == '|') ||
+ (p[0] == '^' && p[1] == '^')) {
+ p++;
+ }
+ p++;
+ }
+
+ /* Handling unterminated string by UNV */
+ /*if (type == -1)
+ {
+ *tail = t = new_Token(NULL, TOK_STRING, line, p-line+1);
+ t->text[p-line] = *line;
+ tail = &t->next;
+ }
+ else */
+ if (type != TOK_COMMENT) {
+ *tail = t = new_Token(NULL, type, line, p - line);
+ tail = &t->next;
+ }
+ line = p;
+ }
+ return list;
+}
+
+/*
+ * this function allocates a new managed block of memory and
+ * returns a pointer to the block. The managed blocks are
+ * deleted only all at once by the delete_Blocks function.
+ */
+static void *new_Block(size_t size)
+{
+ Blocks *b = &blocks;
+
+ /* first, get to the end of the linked list */
+ while (b->next)
+ b = b->next;
+ /* now allocate the requested chunk */
+ b->chunk = nasm_malloc(size);
+
+ /* now allocate a new block for the next request */
+ b->next = nasm_zalloc(sizeof(Blocks));
+ return b->chunk;
+}
+
+/*
+ * this function deletes all managed blocks of memory
+ */
+static void delete_Blocks(void)
+{
+ Blocks *a, *b = &blocks;
+
+ /*
+ * keep in mind that the first block, pointed to by blocks
+ * is a static and not dynamically allocated, so we don't
+ * free it.
+ */
+ while (b) {
+ if (b->chunk)
+ nasm_free(b->chunk);
+ a = b;
+ b = b->next;
+ if (a != &blocks)
+ nasm_free(a);
+ }
+ memset(&blocks, 0, sizeof(blocks));
+}
+
+/*
+ * this function creates a new Token and passes a pointer to it
+ * back to the caller. It sets the type and text elements, and
+ * also the a.mac and next elements to NULL.
+ */
+static Token *new_Token(Token * next, enum pp_token_type type,
+ const char *text, int txtlen)
+{
+ Token *t;
+ int i;
+
+ if (!freeTokens) {
+ freeTokens = (Token *) new_Block(TOKEN_BLOCKSIZE * sizeof(Token));
+ for (i = 0; i < TOKEN_BLOCKSIZE - 1; i++)
+ freeTokens[i].next = &freeTokens[i + 1];
+ freeTokens[i].next = NULL;
+ }
+ t = freeTokens;
+ freeTokens = t->next;
+ t->next = next;
+ t->a.mac = NULL;
+ t->type = type;
+ if (type == TOK_WHITESPACE || !text) {
+ t->text = NULL;
+ } else {
+ if (txtlen == 0)
+ txtlen = strlen(text);
+ t->text = nasm_malloc(txtlen+1);
+ memcpy(t->text, text, txtlen);
+ t->text[txtlen] = '\0';
+ }
+ return t;
+}
+
+static Token *delete_Token(Token * t)
+{
+ Token *next = t->next;
+ nasm_free(t->text);
+ t->next = freeTokens;
+ freeTokens = t;
+ return next;
+}
+
+/*
+ * Convert a line of tokens back into text.
+ * If expand_locals is not zero, identifiers of the form "%$*xxx"
+ * will be transformed into ..@ctxnum.xxx
+ */
+static char *detoken(Token * tlist, bool expand_locals)
+{
+ Token *t;
+ char *line, *p;
+ const char *q;
+ int len = 0;
+
+ list_for_each(t, tlist) {
+ if (t->type == TOK_PREPROC_ID && t->text[1] == '!') {
+ char *v;
+ char *q = t->text;
+
+ v = t->text + 2;
+ if (*v == '\'' || *v == '\"' || *v == '`') {
+ size_t len = nasm_unquote(v, NULL);
+ size_t clen = strlen(v);
+
+ if (len != clen) {
+ nasm_error(ERR_NONFATAL | ERR_PASS1,
+ "NUL character in %%! string");
+ v = NULL;
+ }
+ }
+
+ if (v) {
+ char *p = getenv(v);
+ if (!p) {
+ nasm_error(ERR_NONFATAL | ERR_PASS1,
+ "nonexistent environment variable `%s'", v);
+ p = "";
+ }
+ t->text = nasm_strdup(p);
+ }
+ nasm_free(q);
+ }
+
+ /* Expand local macros here and not during preprocessing */
+ if (expand_locals &&
+ t->type == TOK_PREPROC_ID && t->text &&
+ t->text[0] == '%' && t->text[1] == '$') {
+ const char *q;
+ char *p;
+ Context *ctx = get_ctx(t->text, &q);
+ if (ctx) {
+ char buffer[40];
+ snprintf(buffer, sizeof(buffer), "..@%"PRIu32".", ctx->number);
+ p = nasm_strcat(buffer, q);
+ nasm_free(t->text);
+ t->text = p;
+ }
+ }
+ if (t->type == TOK_WHITESPACE)
+ len++;
+ else if (t->text)
+ len += strlen(t->text);
+ }
+
+ p = line = nasm_malloc(len + 1);
+
+ list_for_each(t, tlist) {
+ if (t->type == TOK_WHITESPACE) {
+ *p++ = ' ';
+ } else if (t->text) {
+ q = t->text;
+ while (*q)
+ *p++ = *q++;
+ }
+ }
+ *p = '\0';
+
+ return line;
+}
+
+/*
+ * A scanner, suitable for use by the expression evaluator, which
+ * operates on a line of Tokens. Expects a pointer to a pointer to
+ * the first token in the line to be passed in as its private_data
+ * field.
+ *
+ * FIX: This really needs to be unified with stdscan.
+ */
+static int ppscan(void *private_data, struct tokenval *tokval)
+{
+ Token **tlineptr = private_data;
+ Token *tline;
+ char ourcopy[MAX_KEYWORD+1], *p, *r, *s;
+
+ do {
+ tline = *tlineptr;
+ *tlineptr = tline ? tline->next : NULL;
+ } while (tline && (tline->type == TOK_WHITESPACE ||
+ tline->type == TOK_COMMENT));
+
+ if (!tline)
+ return tokval->t_type = TOKEN_EOS;
+
+ tokval->t_charptr = tline->text;
+
+ if (tline->text[0] == '$' && !tline->text[1])
+ return tokval->t_type = TOKEN_HERE;
+ if (tline->text[0] == '$' && tline->text[1] == '$' && !tline->text[2])
+ return tokval->t_type = TOKEN_BASE;
+
+ if (tline->type == TOK_ID) {
+ p = tokval->t_charptr = tline->text;
+ if (p[0] == '$') {
+ tokval->t_charptr++;
+ return tokval->t_type = TOKEN_ID;
+ }
+
+ for (r = p, s = ourcopy; *r; r++) {
+ if (r >= p+MAX_KEYWORD)
+ return tokval->t_type = TOKEN_ID; /* Not a keyword */
+ *s++ = nasm_tolower(*r);
+ }
+ *s = '\0';
+ /* right, so we have an identifier sitting in temp storage. now,
+ * is it actually a register or instruction name, or what? */
+ return nasm_token_hash(ourcopy, tokval);
+ }
+
+ if (tline->type == TOK_NUMBER) {
+ bool rn_error;
+ tokval->t_integer = readnum(tline->text, &rn_error);
+ tokval->t_charptr = tline->text;
+ if (rn_error)
+ return tokval->t_type = TOKEN_ERRNUM;
+ else
+ return tokval->t_type = TOKEN_NUM;
+ }
+
+ if (tline->type == TOK_FLOAT) {
+ return tokval->t_type = TOKEN_FLOAT;
+ }
+
+ if (tline->type == TOK_STRING) {
+ char bq, *ep;
+
+ bq = tline->text[0];
+ tokval->t_charptr = tline->text;
+ tokval->t_inttwo = nasm_unquote(tline->text, &ep);
+
+ if (ep[0] != bq || ep[1] != '\0')
+ return tokval->t_type = TOKEN_ERRSTR;
+ else
+ return tokval->t_type = TOKEN_STR;
+ }
+
+ if (tline->type == TOK_OTHER) {
+ if (!strcmp(tline->text, "<<"))
+ return tokval->t_type = TOKEN_SHL;
+ if (!strcmp(tline->text, ">>"))
+ return tokval->t_type = TOKEN_SHR;
+ if (!strcmp(tline->text, "//"))
+ return tokval->t_type = TOKEN_SDIV;
+ if (!strcmp(tline->text, "%%"))
+ return tokval->t_type = TOKEN_SMOD;
+ if (!strcmp(tline->text, "=="))
+ return tokval->t_type = TOKEN_EQ;
+ if (!strcmp(tline->text, "<>"))
+ return tokval->t_type = TOKEN_NE;
+ if (!strcmp(tline->text, "!="))
+ return tokval->t_type = TOKEN_NE;
+ if (!strcmp(tline->text, "<="))
+ return tokval->t_type = TOKEN_LE;
+ if (!strcmp(tline->text, ">="))
+ return tokval->t_type = TOKEN_GE;
+ if (!strcmp(tline->text, "&&"))
+ return tokval->t_type = TOKEN_DBL_AND;
+ if (!strcmp(tline->text, "^^"))
+ return tokval->t_type = TOKEN_DBL_XOR;
+ if (!strcmp(tline->text, "||"))
+ return tokval->t_type = TOKEN_DBL_OR;
+ }
+
+ /*
+ * We have no other options: just return the first character of
+ * the token text.
+ */
+ return tokval->t_type = tline->text[0];
+}
+
+/*
+ * Compare a string to the name of an existing macro; this is a
+ * simple wrapper which calls either strcmp or nasm_stricmp
+ * depending on the value of the `casesense' parameter.
+ */
+static int mstrcmp(const char *p, const char *q, bool casesense)
+{
+ return casesense ? strcmp(p, q) : nasm_stricmp(p, q);
+}
+
+/*
+ * Compare a string to the name of an existing macro; this is a
+ * simple wrapper which calls either strcmp or nasm_stricmp
+ * depending on the value of the `casesense' parameter.
+ */
+static int mmemcmp(const char *p, const char *q, size_t l, bool casesense)
+{
+ return casesense ? memcmp(p, q, l) : nasm_memicmp(p, q, l);
+}
+
+/*
+ * Return the Context structure associated with a %$ token. Return
+ * NULL, having _already_ reported an error condition, if the
+ * context stack isn't deep enough for the supplied number of $
+ * signs.
+ *
+ * If "namep" is non-NULL, set it to the pointer to the macro name
+ * tail, i.e. the part beyond %$...
+ */
+static Context *get_ctx(const char *name, const char **namep)
+{
+ Context *ctx;
+ int i;
+
+ if (namep)
+ *namep = name;
+
+ if (!name || name[0] != '%' || name[1] != '$')
+ return NULL;
+
+ if (!cstk) {
+ nasm_error(ERR_NONFATAL, "`%s': context stack is empty", name);
+ return NULL;
+ }
+
+ name += 2;
+ ctx = cstk;
+ i = 0;
+ while (ctx && *name == '$') {
+ name++;
+ i++;
+ ctx = ctx->next;
+ }
+ if (!ctx) {
+ nasm_error(ERR_NONFATAL, "`%s': context stack is only"
+ " %d level%s deep", name, i, (i == 1 ? "" : "s"));
+ return NULL;
+ }
+
+ if (namep)
+ *namep = name;
+
+ return ctx;
+}
+
+/*
+ * Check to see if a file is already in a string list
+ */
+static bool in_list(const StrList *list, const char *str)
+{
+ while (list) {
+ if (!strcmp(list->str, str))
+ return true;
+ list = list->next;
+ }
+ return false;
+}
+
+/*
+ * Open an include file. This routine must always return a valid
+ * file pointer if it returns - it's responsible for throwing an
+ * ERR_FATAL and bombing out completely if not. It should also try
+ * the include path one by one until it finds the file or reaches
+ * the end of the path.
+ */
+static FILE *inc_fopen(const char *file, StrList **dhead, StrList ***dtail,
+ bool missing_ok, enum file_flags mode)
+{
+ FILE *fp;
+ char *prefix = "";
+ IncPath *ip = ipath;
+ int len = strlen(file);
+ size_t prefix_len = 0;
+ StrList *sl;
+
+ while (1) {
+ sl = nasm_malloc(prefix_len+len+1+sizeof sl->next);
+ memcpy(sl->str, prefix, prefix_len);
+ memcpy(sl->str+prefix_len, file, len+1);
+ fp = nasm_open_read(sl->str, mode);
+ if (fp && dhead && !in_list(*dhead, sl->str)) {
+ sl->next = NULL;
+ **dtail = sl;
+ *dtail = &sl->next;
+ } else {
+ nasm_free(sl);
+ }
+ if (fp)
+ return fp;
+ if (!ip) {
+ if (!missing_ok)
+ break;
+ prefix = NULL;
+ } else {
+ prefix = ip->path;
+ ip = ip->next;
+ }
+ if (prefix) {
+ prefix_len = strlen(prefix);
+ } else {
+ /* -MG given and file not found */
+ if (dhead && !in_list(*dhead, file)) {
+ sl = nasm_malloc(len+1+sizeof sl->next);
+ sl->next = NULL;
+ strcpy(sl->str, file);
+ **dtail = sl;
+ *dtail = &sl->next;
+ }
+ return NULL;
+ }
+ }
+
+ nasm_error(ERR_FATAL, "unable to open include file `%s'", file);
+ return NULL;
+}
+
+/*
+ * Opens an include or input file. Public version, for use by modules
+ * that get a file:lineno pair and need to look at the file again
+ * (e.g. the CodeView debug backend). Returns NULL on failure.
+ */
+FILE *pp_input_fopen(const char *filename, enum file_flags mode)
+{
+ FILE *fp;
+ StrList *xsl = NULL;
+ StrList **xst = &xsl;
+
+ fp = inc_fopen(filename, &xsl, &xst, true, mode);
+ if (xsl)
+ nasm_free(xsl);
+ return fp;
+}
+
+/*
+ * Determine if we should warn on defining a single-line macro of
+ * name `name', with `nparam' parameters. If nparam is 0 or -1, will
+ * return true if _any_ single-line macro of that name is defined.
+ * Otherwise, will return true if a single-line macro with either
+ * `nparam' or no parameters is defined.
+ *
+ * If a macro with precisely the right number of parameters is
+ * defined, or nparam is -1, the address of the definition structure
+ * will be returned in `defn'; otherwise NULL will be returned. If `defn'
+ * is NULL, no action will be taken regarding its contents, and no
+ * error will occur.
+ *
+ * Note that this is also called with nparam zero to resolve
+ * `ifdef'.
+ *
+ * If you already know which context macro belongs to, you can pass
+ * the context pointer as first parameter; if you won't but name begins
+ * with %$ the context will be automatically computed. If all_contexts
+ * is true, macro will be searched in outer contexts as well.
+ */
+static bool
+smacro_defined(Context * ctx, const char *name, int nparam, SMacro ** defn,
+ bool nocase)
+{
+ struct hash_table *smtbl;
+ SMacro *m;
+
+ if (ctx) {
+ smtbl = &ctx->localmac;
+ } else if (name[0] == '%' && name[1] == '$') {
+ if (cstk)
+ ctx = get_ctx(name, &name);
+ if (!ctx)
+ return false; /* got to return _something_ */
+ smtbl = &ctx->localmac;
+ } else {
+ smtbl = &smacros;
+ }
+ m = (SMacro *) hash_findix(smtbl, name);
+
+ while (m) {
+ if (!mstrcmp(m->name, name, m->casesense && nocase) &&
+ (nparam <= 0 || m->nparam == 0 || nparam == (int) m->nparam)) {
+ if (defn) {
+ if (nparam == (int) m->nparam || nparam == -1)
+ *defn = m;
+ else
+ *defn = NULL;
+ }
+ return true;
+ }
+ m = m->next;
+ }
+
+ return false;
+}
+
+/*
+ * Count and mark off the parameters in a multi-line macro call.
+ * This is called both from within the multi-line macro expansion
+ * code, and also to mark off the default parameters when provided
+ * in a %macro definition line.
+ */
+static void count_mmac_params(Token * t, int *nparam, Token *** params)
+{
+ int paramsize, brace;
+
+ *nparam = paramsize = 0;
+ *params = NULL;
+ while (t) {
+ /* +1: we need space for the final NULL */
+ if (*nparam+1 >= paramsize) {
+ paramsize += PARAM_DELTA;
+ *params = nasm_realloc(*params, sizeof(**params) * paramsize);
+ }
+ skip_white_(t);
+ brace = 0;
+ if (tok_is_(t, "{"))
+ brace++;
+ (*params)[(*nparam)++] = t;
+ if (brace) {
+ while (brace && (t = t->next) != NULL) {
+ if (tok_is_(t, "{"))
+ brace++;
+ else if (tok_is_(t, "}"))
+ brace--;
+ }
+
+ if (t) {
+ /*
+ * Now we've found the closing brace, look further
+ * for the comma.
+ */
+ t = t->next;
+ skip_white_(t);
+ if (tok_isnt_(t, ",")) {
+ nasm_error(ERR_NONFATAL,
+ "braces do not enclose all of macro parameter");
+ while (tok_isnt_(t, ","))
+ t = t->next;
+ }
+ }
+ } else {
+ while (tok_isnt_(t, ","))
+ t = t->next;
+ }
+ if (t) { /* got a comma/brace */
+ t = t->next; /* eat the comma */
+ }
+ }
+}
+
+/*
+ * Determine whether one of the various `if' conditions is true or
+ * not.
+ *
+ * We must free the tline we get passed.
+ */
+static bool if_condition(Token * tline, enum preproc_token ct)
+{
+ enum pp_conditional i = PP_COND(ct);
+ bool j;
+ Token *t, *tt, **tptr, *origline;
+ struct tokenval tokval;
+ expr *evalresult;
+ enum pp_token_type needtype;
+ char *p;
+
+ origline = tline;
+
+ switch (i) {
+ case PPC_IFCTX:
+ j = false; /* have we matched yet? */
+ while (true) {
+ skip_white_(tline);
+ if (!tline)
+ break;
+ if (tline->type != TOK_ID) {
+ nasm_error(ERR_NONFATAL,
+ "`%s' expects context identifiers", pp_directives[ct]);
+ free_tlist(origline);
+ return -1;
+ }
+ if (cstk && cstk->name && !nasm_stricmp(tline->text, cstk->name))
+ j = true;
+ tline = tline->next;
+ }
+ break;
+
+ case PPC_IFDEF:
+ j = false; /* have we matched yet? */
+ while (tline) {
+ skip_white_(tline);
+ if (!tline || (tline->type != TOK_ID &&
+ (tline->type != TOK_PREPROC_ID ||
+ tline->text[1] != '$'))) {
+ nasm_error(ERR_NONFATAL,
+ "`%s' expects macro identifiers", pp_directives[ct]);
+ goto fail;
+ }
+ if (smacro_defined(NULL, tline->text, 0, NULL, true))
+ j = true;
+ tline = tline->next;
+ }
+ break;
+
+ case PPC_IFENV:
+ tline = expand_smacro(tline);
+ j = false; /* have we matched yet? */
+ while (tline) {
+ skip_white_(tline);
+ if (!tline || (tline->type != TOK_ID &&
+ tline->type != TOK_STRING &&
+ (tline->type != TOK_PREPROC_ID ||
+ tline->text[1] != '!'))) {
+ nasm_error(ERR_NONFATAL,
+ "`%s' expects environment variable names",
+ pp_directives[ct]);
+ goto fail;
+ }
+ p = tline->text;
+ if (tline->type == TOK_PREPROC_ID)
+ p += 2; /* Skip leading %! */
+ if (*p == '\'' || *p == '\"' || *p == '`')
+ nasm_unquote_cstr(p, ct);
+ if (getenv(p))
+ j = true;
+ tline = tline->next;
+ }
+ break;
+
+ case PPC_IFIDN:
+ case PPC_IFIDNI:
+ tline = expand_smacro(tline);
+ t = tt = tline;
+ while (tok_isnt_(tt, ","))
+ tt = tt->next;
+ if (!tt) {
+ nasm_error(ERR_NONFATAL,
+ "`%s' expects two comma-separated arguments",
+ pp_directives[ct]);
+ goto fail;
+ }
+ tt = tt->next;
+ j = true; /* assume equality unless proved not */
+ while ((t->type != TOK_OTHER || strcmp(t->text, ",")) && tt) {
+ if (tt->type == TOK_OTHER && !strcmp(tt->text, ",")) {
+ nasm_error(ERR_NONFATAL, "`%s': more than one comma on line",
+ pp_directives[ct]);
+ goto fail;
+ }
+ if (t->type == TOK_WHITESPACE) {
+ t = t->next;
+ continue;
+ }
+ if (tt->type == TOK_WHITESPACE) {
+ tt = tt->next;
+ continue;
+ }
+ if (tt->type != t->type) {
+ j = false; /* found mismatching tokens */
+ break;
+ }
+ /* When comparing strings, need to unquote them first */
+ if (t->type == TOK_STRING) {
+ size_t l1 = nasm_unquote(t->text, NULL);
+ size_t l2 = nasm_unquote(tt->text, NULL);
+
+ if (l1 != l2) {
+ j = false;
+ break;
+ }
+ if (mmemcmp(t->text, tt->text, l1, i == PPC_IFIDN)) {
+ j = false;
+ break;
+ }
+ } else if (mstrcmp(tt->text, t->text, i == PPC_IFIDN) != 0) {
+ j = false; /* found mismatching tokens */
+ break;
+ }
+
+ t = t->next;
+ tt = tt->next;
+ }
+ if ((t->type != TOK_OTHER || strcmp(t->text, ",")) || tt)
+ j = false; /* trailing gunk on one end or other */
+ break;
+
+ case PPC_IFMACRO:
+ {
+ bool found = false;
+ MMacro searching, *mmac;
+
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (!tok_type_(tline, TOK_ID)) {
+ nasm_error(ERR_NONFATAL,
+ "`%s' expects a macro name", pp_directives[ct]);
+ goto fail;
+ }
+ searching.name = nasm_strdup(tline->text);
+ searching.casesense = true;
+ searching.plus = false;
+ searching.nolist = false;
+ searching.in_progress = 0;
+ searching.max_depth = 0;
+ searching.rep_nest = NULL;
+ searching.nparam_min = 0;
+ searching.nparam_max = INT_MAX;
+ tline = expand_smacro(tline->next);
+ skip_white_(tline);
+ if (!tline) {
+ } else if (!tok_type_(tline, TOK_NUMBER)) {
+ nasm_error(ERR_NONFATAL,
+ "`%s' expects a parameter count or nothing",
+ pp_directives[ct]);
+ } else {
+ searching.nparam_min = searching.nparam_max =
+ readnum(tline->text, &j);
+ if (j)
+ nasm_error(ERR_NONFATAL,
+ "unable to parse parameter count `%s'",
+ tline->text);
+ }
+ if (tline && tok_is_(tline->next, "-")) {
+ tline = tline->next->next;
+ if (tok_is_(tline, "*"))
+ searching.nparam_max = INT_MAX;
+ else if (!tok_type_(tline, TOK_NUMBER))
+ nasm_error(ERR_NONFATAL,
+ "`%s' expects a parameter count after `-'",
+ pp_directives[ct]);
+ else {
+ searching.nparam_max = readnum(tline->text, &j);
+ if (j)
+ nasm_error(ERR_NONFATAL,
+ "unable to parse parameter count `%s'",
+ tline->text);
+ if (searching.nparam_min > searching.nparam_max)
+ nasm_error(ERR_NONFATAL,
+ "minimum parameter count exceeds maximum");
+ }
+ }
+ if (tline && tok_is_(tline->next, "+")) {
+ tline = tline->next;
+ searching.plus = true;
+ }
+ mmac = (MMacro *) hash_findix(&mmacros, searching.name);
+ while (mmac) {
+ if (!strcmp(mmac->name, searching.name) &&
+ (mmac->nparam_min <= searching.nparam_max
+ || searching.plus)
+ && (searching.nparam_min <= mmac->nparam_max
+ || mmac->plus)) {
+ found = true;
+ break;
+ }
+ mmac = mmac->next;
+ }
+ if (tline && tline->next)
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after %%ifmacro ignored");
+ nasm_free(searching.name);
+ j = found;
+ break;
+ }
+
+ case PPC_IFID:
+ needtype = TOK_ID;
+ goto iftype;
+ case PPC_IFNUM:
+ needtype = TOK_NUMBER;
+ goto iftype;
+ case PPC_IFSTR:
+ needtype = TOK_STRING;
+ goto iftype;
+
+iftype:
+ t = tline = expand_smacro(tline);
+
+ while (tok_type_(t, TOK_WHITESPACE) ||
+ (needtype == TOK_NUMBER &&
+ tok_type_(t, TOK_OTHER) &&
+ (t->text[0] == '-' || t->text[0] == '+') &&
+ !t->text[1]))
+ t = t->next;
+
+ j = tok_type_(t, needtype);
+ break;
+
+ case PPC_IFTOKEN:
+ t = tline = expand_smacro(tline);
+ while (tok_type_(t, TOK_WHITESPACE))
+ t = t->next;
+
+ j = false;
+ if (t) {
+ t = t->next; /* Skip the actual token */
+ while (tok_type_(t, TOK_WHITESPACE))
+ t = t->next;
+ j = !t; /* Should be nothing left */
+ }
+ break;
+
+ case PPC_IFEMPTY:
+ t = tline = expand_smacro(tline);
+ while (tok_type_(t, TOK_WHITESPACE))
+ t = t->next;
+
+ j = !t; /* Should be empty */
+ break;
+
+ case PPC_IF:
+ t = tline = expand_smacro(tline);
+ tptr = &t;
+ tokval.t_type = TOKEN_INVALID;
+ evalresult = evaluate(ppscan, tptr, &tokval,
+ NULL, pass | CRITICAL, NULL);
+ if (!evalresult)
+ return -1;
+ if (tokval.t_type)
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after expression ignored");
+ if (!is_simple(evalresult)) {
+ nasm_error(ERR_NONFATAL,
+ "non-constant value given to `%s'", pp_directives[ct]);
+ goto fail;
+ }
+ j = reloc_value(evalresult) != 0;
+ break;
+
+ default:
+ nasm_error(ERR_FATAL,
+ "preprocessor directive `%s' not yet implemented",
+ pp_directives[ct]);
+ goto fail;
+ }
+
+ free_tlist(origline);
+ return j ^ PP_NEGATIVE(ct);
+
+fail:
+ free_tlist(origline);
+ return -1;
+}
+
+/*
+ * Common code for defining an smacro
+ */
+static bool define_smacro(Context *ctx, const char *mname, bool casesense,
+ int nparam, Token *expansion)
+{
+ SMacro *smac, **smhead;
+ struct hash_table *smtbl;
+
+ if (smacro_defined(ctx, mname, nparam, &smac, casesense)) {
+ if (!smac) {
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "single-line macro `%s' defined both with and"
+ " without parameters", mname);
+ /*
+ * Some instances of the old code considered this a failure,
+ * some others didn't. What is the right thing to do here?
+ */
+ free_tlist(expansion);
+ return false; /* Failure */
+ } else {
+ /*
+ * We're redefining, so we have to take over an
+ * existing SMacro structure. This means freeing
+ * what was already in it.
+ */
+ nasm_free(smac->name);
+ free_tlist(smac->expansion);
+ }
+ } else {
+ smtbl = ctx ? &ctx->localmac : &smacros;
+ smhead = (SMacro **) hash_findi_add(smtbl, mname);
+ smac = nasm_malloc(sizeof(SMacro));
+ smac->next = *smhead;
+ *smhead = smac;
+ }
+ smac->name = nasm_strdup(mname);
+ smac->casesense = casesense;
+ smac->nparam = nparam;
+ smac->expansion = expansion;
+ smac->in_progress = false;
+ return true; /* Success */
+}
+
+/*
+ * Undefine an smacro
+ */
+static void undef_smacro(Context *ctx, const char *mname)
+{
+ SMacro **smhead, *s, **sp;
+ struct hash_table *smtbl;
+
+ smtbl = ctx ? &ctx->localmac : &smacros;
+ smhead = (SMacro **)hash_findi(smtbl, mname, NULL);
+
+ if (smhead) {
+ /*
+ * We now have a macro name... go hunt for it.
+ */
+ sp = smhead;
+ while ((s = *sp) != NULL) {
+ if (!mstrcmp(s->name, mname, s->casesense)) {
+ *sp = s->next;
+ nasm_free(s->name);
+ free_tlist(s->expansion);
+ nasm_free(s);
+ } else {
+ sp = &s->next;
+ }
+ }
+ }
+}
+
+/*
+ * Parse a mmacro specification.
+ */
+static bool parse_mmacro_spec(Token *tline, MMacro *def, const char *directive)
+{
+ bool err;
+
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (!tok_type_(tline, TOK_ID)) {
+ nasm_error(ERR_NONFATAL, "`%s' expects a macro name", directive);
+ return false;
+ }
+
+ def->prev = NULL;
+ def->name = nasm_strdup(tline->text);
+ def->plus = false;
+ def->nolist = false;
+ def->in_progress = 0;
+ def->rep_nest = NULL;
+ def->nparam_min = 0;
+ def->nparam_max = 0;
+
+ tline = expand_smacro(tline->next);
+ skip_white_(tline);
+ if (!tok_type_(tline, TOK_NUMBER)) {
+ nasm_error(ERR_NONFATAL, "`%s' expects a parameter count", directive);
+ } else {
+ def->nparam_min = def->nparam_max =
+ readnum(tline->text, &err);
+ if (err)
+ nasm_error(ERR_NONFATAL,
+ "unable to parse parameter count `%s'", tline->text);
+ }
+ if (tline && tok_is_(tline->next, "-")) {
+ tline = tline->next->next;
+ if (tok_is_(tline, "*")) {
+ def->nparam_max = INT_MAX;
+ } else if (!tok_type_(tline, TOK_NUMBER)) {
+ nasm_error(ERR_NONFATAL,
+ "`%s' expects a parameter count after `-'", directive);
+ } else {
+ def->nparam_max = readnum(tline->text, &err);
+ if (err) {
+ nasm_error(ERR_NONFATAL, "unable to parse parameter count `%s'",
+ tline->text);
+ }
+ if (def->nparam_min > def->nparam_max) {
+ nasm_error(ERR_NONFATAL, "minimum parameter count exceeds maximum");
+ }
+ }
+ }
+ if (tline && tok_is_(tline->next, "+")) {
+ tline = tline->next;
+ def->plus = true;
+ }
+ if (tline && tok_type_(tline->next, TOK_ID) &&
+ !nasm_stricmp(tline->next->text, ".nolist")) {
+ tline = tline->next;
+ def->nolist = true;
+ }
+
+ /*
+ * Handle default parameters.
+ */
+ if (tline && tline->next) {
+ def->dlist = tline->next;
+ tline->next = NULL;
+ count_mmac_params(def->dlist, &def->ndefs, &def->defaults);
+ } else {
+ def->dlist = NULL;
+ def->defaults = NULL;
+ }
+ def->expansion = NULL;
+
+ if (def->defaults && def->ndefs > def->nparam_max - def->nparam_min &&
+ !def->plus)
+ nasm_error(ERR_WARNING|ERR_PASS1|ERR_WARN_MDP,
+ "too many default macro parameters");
+
+ return true;
+}
+
+
+/*
+ * Decode a size directive
+ */
+static int parse_size(const char *str) {
+ static const char *size_names[] =
+ { "byte", "dword", "oword", "qword", "tword", "word", "yword" };
+ static const int sizes[] =
+ { 0, 1, 4, 16, 8, 10, 2, 32 };
+
+ return sizes[bsii(str, size_names, ARRAY_SIZE(size_names))+1];
+}
+
+/**
+ * find and process preprocessor directive in passed line
+ * Find out if a line contains a preprocessor directive, and deal
+ * with it if so.
+ *
+ * If a directive _is_ found, it is the responsibility of this routine
+ * (and not the caller) to free_tlist() the line.
+ *
+ * @param tline a pointer to the current tokeninzed line linked list
+ * @return DIRECTIVE_FOUND or NO_DIRECTIVE_FOUND
+ *
+ */
+static int do_directive(Token * tline)
+{
+ enum preproc_token i;
+ int j;
+ bool err;
+ int nparam;
+ bool nolist;
+ bool casesense;
+ int k, m;
+ int offset;
+ char *p, *pp;
+ const char *mname;
+ Include *inc;
+ Context *ctx;
+ Cond *cond;
+ MMacro *mmac, **mmhead;
+ Token *t = NULL, *tt, *param_start, *macro_start, *last, **tptr, *origline;
+ Line *l;
+ struct tokenval tokval;
+ expr *evalresult;
+ MMacro *tmp_defining; /* Used when manipulating rep_nest */
+ int64_t count;
+ size_t len;
+ int severity;
+
+ origline = tline;
+
+ skip_white_(tline);
+ if (!tline || !tok_type_(tline, TOK_PREPROC_ID) ||
+ (tline->text[1] == '%' || tline->text[1] == '$'
+ || tline->text[1] == '!'))
+ return NO_DIRECTIVE_FOUND;
+
+ i = pp_token_hash(tline->text);
+
+ /*
+ * FIXME: We zap execution of PP_RMACRO, PP_IRMACRO, PP_EXITMACRO
+ * since they are known to be buggy at moment, we need to fix them
+ * in future release (2.09-2.10)
+ */
+ if (i == PP_RMACRO || i == PP_IRMACRO || i == PP_EXITMACRO) {
+ nasm_error(ERR_NONFATAL, "unknown preprocessor directive `%s'",
+ tline->text);
+ return NO_DIRECTIVE_FOUND;
+ }
+
+ /*
+ * If we're in a non-emitting branch of a condition construct,
+ * or walking to the end of an already terminated %rep block,
+ * we should ignore all directives except for condition
+ * directives.
+ */
+ if (((istk->conds && !emitting(istk->conds->state)) ||
+ (istk->mstk && !istk->mstk->in_progress)) && !is_condition(i)) {
+ return NO_DIRECTIVE_FOUND;
+ }
+
+ /*
+ * If we're defining a macro or reading a %rep block, we should
+ * ignore all directives except for %macro/%imacro (which nest),
+ * %endm/%endmacro, and (only if we're in a %rep block) %endrep.
+ * If we're in a %rep block, another %rep nests, so should be let through.
+ */
+ if (defining && i != PP_MACRO && i != PP_IMACRO &&
+ i != PP_RMACRO && i != PP_IRMACRO &&
+ i != PP_ENDMACRO && i != PP_ENDM &&
+ (defining->name || (i != PP_ENDREP && i != PP_REP))) {
+ return NO_DIRECTIVE_FOUND;
+ }
+
+ if (defining) {
+ if (i == PP_MACRO || i == PP_IMACRO ||
+ i == PP_RMACRO || i == PP_IRMACRO) {
+ nested_mac_count++;
+ return NO_DIRECTIVE_FOUND;
+ } else if (nested_mac_count > 0) {
+ if (i == PP_ENDMACRO) {
+ nested_mac_count--;
+ return NO_DIRECTIVE_FOUND;
+ }
+ }
+ if (!defining->name) {
+ if (i == PP_REP) {
+ nested_rep_count++;
+ return NO_DIRECTIVE_FOUND;
+ } else if (nested_rep_count > 0) {
+ if (i == PP_ENDREP) {
+ nested_rep_count--;
+ return NO_DIRECTIVE_FOUND;
+ }
+ }
+ }
+ }
+
+ switch (i) {
+ case PP_INVALID:
+ nasm_error(ERR_NONFATAL, "unknown preprocessor directive `%s'",
+ tline->text);
+ return NO_DIRECTIVE_FOUND; /* didn't get it */
+
+ case PP_STACKSIZE:
+ /* Directive to tell NASM what the default stack size is. The
+ * default is for a 16-bit stack, and this can be overriden with
+ * %stacksize large.
+ */
+ tline = tline->next;
+ if (tline && tline->type == TOK_WHITESPACE)
+ tline = tline->next;
+ if (!tline || tline->type != TOK_ID) {
+ nasm_error(ERR_NONFATAL, "`%%stacksize' missing size parameter");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ if (nasm_stricmp(tline->text, "flat") == 0) {
+ /* All subsequent ARG directives are for a 32-bit stack */
+ StackSize = 4;
+ StackPointer = "ebp";
+ ArgOffset = 8;
+ LocalOffset = 0;
+ } else if (nasm_stricmp(tline->text, "flat64") == 0) {
+ /* All subsequent ARG directives are for a 64-bit stack */
+ StackSize = 8;
+ StackPointer = "rbp";
+ ArgOffset = 16;
+ LocalOffset = 0;
+ } else if (nasm_stricmp(tline->text, "large") == 0) {
+ /* All subsequent ARG directives are for a 16-bit stack,
+ * far function call.
+ */
+ StackSize = 2;
+ StackPointer = "bp";
+ ArgOffset = 4;
+ LocalOffset = 0;
+ } else if (nasm_stricmp(tline->text, "small") == 0) {
+ /* All subsequent ARG directives are for a 16-bit stack,
+ * far function call. We don't support near functions.
+ */
+ StackSize = 2;
+ StackPointer = "bp";
+ ArgOffset = 6;
+ LocalOffset = 0;
+ } else {
+ nasm_error(ERR_NONFATAL, "`%%stacksize' invalid size type");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_ARG:
+ /* TASM like ARG directive to define arguments to functions, in
+ * the following form:
+ *
+ * ARG arg1:WORD, arg2:DWORD, arg4:QWORD
+ */
+ offset = ArgOffset;
+ do {
+ char *arg, directive[256];
+ int size = StackSize;
+
+ /* Find the argument name */
+ tline = tline->next;
+ if (tline && tline->type == TOK_WHITESPACE)
+ tline = tline->next;
+ if (!tline || tline->type != TOK_ID) {
+ nasm_error(ERR_NONFATAL, "`%%arg' missing argument parameter");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ arg = tline->text;
+
+ /* Find the argument size type */
+ tline = tline->next;
+ if (!tline || tline->type != TOK_OTHER
+ || tline->text[0] != ':') {
+ nasm_error(ERR_NONFATAL,
+ "Syntax error processing `%%arg' directive");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ tline = tline->next;
+ if (!tline || tline->type != TOK_ID) {
+ nasm_error(ERR_NONFATAL, "`%%arg' missing size type parameter");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ /* Allow macro expansion of type parameter */
+ tt = tokenize(tline->text);
+ tt = expand_smacro(tt);
+ size = parse_size(tt->text);
+ if (!size) {
+ nasm_error(ERR_NONFATAL,
+ "Invalid size type for `%%arg' missing directive");
+ free_tlist(tt);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ free_tlist(tt);
+
+ /* Round up to even stack slots */
+ size = ALIGN(size, StackSize);
+
+ /* Now define the macro for the argument */
+ snprintf(directive, sizeof(directive), "%%define %s (%s+%d)",
+ arg, StackPointer, offset);
+ do_directive(tokenize(directive));
+ offset += size;
+
+ /* Move to the next argument in the list */
+ tline = tline->next;
+ if (tline && tline->type == TOK_WHITESPACE)
+ tline = tline->next;
+ } while (tline && tline->type == TOK_OTHER && tline->text[0] == ',');
+ ArgOffset = offset;
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_LOCAL:
+ /* TASM like LOCAL directive to define local variables for a
+ * function, in the following form:
+ *
+ * LOCAL local1:WORD, local2:DWORD, local4:QWORD = LocalSize
+ *
+ * The '= LocalSize' at the end is ignored by NASM, but is
+ * required by TASM to define the local parameter size (and used
+ * by the TASM macro package).
+ */
+ offset = LocalOffset;
+ do {
+ char *local, directive[256];
+ int size = StackSize;
+
+ /* Find the argument name */
+ tline = tline->next;
+ if (tline && tline->type == TOK_WHITESPACE)
+ tline = tline->next;
+ if (!tline || tline->type != TOK_ID) {
+ nasm_error(ERR_NONFATAL,
+ "`%%local' missing argument parameter");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ local = tline->text;
+
+ /* Find the argument size type */
+ tline = tline->next;
+ if (!tline || tline->type != TOK_OTHER
+ || tline->text[0] != ':') {
+ nasm_error(ERR_NONFATAL,
+ "Syntax error processing `%%local' directive");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ tline = tline->next;
+ if (!tline || tline->type != TOK_ID) {
+ nasm_error(ERR_NONFATAL,
+ "`%%local' missing size type parameter");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ /* Allow macro expansion of type parameter */
+ tt = tokenize(tline->text);
+ tt = expand_smacro(tt);
+ size = parse_size(tt->text);
+ if (!size) {
+ nasm_error(ERR_NONFATAL,
+ "Invalid size type for `%%local' missing directive");
+ free_tlist(tt);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ free_tlist(tt);
+
+ /* Round up to even stack slots */
+ size = ALIGN(size, StackSize);
+
+ offset += size; /* Negative offset, increment before */
+
+ /* Now define the macro for the argument */
+ snprintf(directive, sizeof(directive), "%%define %s (%s-%d)",
+ local, StackPointer, offset);
+ do_directive(tokenize(directive));
+
+ /* Now define the assign to setup the enter_c macro correctly */
+ snprintf(directive, sizeof(directive),
+ "%%assign %%$localsize %%$localsize+%d", size);
+ do_directive(tokenize(directive));
+
+ /* Move to the next argument in the list */
+ tline = tline->next;
+ if (tline && tline->type == TOK_WHITESPACE)
+ tline = tline->next;
+ } while (tline && tline->type == TOK_OTHER && tline->text[0] == ',');
+ LocalOffset = offset;
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_CLEAR:
+ if (tline->next)
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after `%%clear' ignored");
+ free_macros();
+ init_macros();
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_DEPEND:
+ t = tline->next = expand_smacro(tline->next);
+ skip_white_(t);
+ if (!t || (t->type != TOK_STRING &&
+ t->type != TOK_INTERNAL_STRING)) {
+ nasm_error(ERR_NONFATAL, "`%%depend' expects a file name");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND; /* but we did _something_ */
+ }
+ if (t->next)
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after `%%depend' ignored");
+ p = t->text;
+ if (t->type != TOK_INTERNAL_STRING)
+ nasm_unquote_cstr(p, i);
+ if (dephead && !in_list(*dephead, p)) {
+ StrList *sl = nasm_malloc(strlen(p)+1+sizeof sl->next);
+ sl->next = NULL;
+ strcpy(sl->str, p);
+ *deptail = sl;
+ deptail = &sl->next;
+ }
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_INCLUDE:
+ t = tline->next = expand_smacro(tline->next);
+ skip_white_(t);
+
+ if (!t || (t->type != TOK_STRING &&
+ t->type != TOK_INTERNAL_STRING)) {
+ nasm_error(ERR_NONFATAL, "`%%include' expects a file name");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND; /* but we did _something_ */
+ }
+ if (t->next)
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after `%%include' ignored");
+ p = t->text;
+ if (t->type != TOK_INTERNAL_STRING)
+ nasm_unquote_cstr(p, i);
+ inc = nasm_malloc(sizeof(Include));
+ inc->next = istk;
+ inc->conds = NULL;
+ inc->fp = inc_fopen(p, dephead, &deptail, pass == 0, NF_TEXT);
+ if (!inc->fp) {
+ /* -MG given but file not found */
+ nasm_free(inc);
+ } else {
+ inc->fname = src_set_fname(p);
+ inc->lineno = src_set_linnum(0);
+ inc->lineinc = 1;
+ inc->expansion = NULL;
+ inc->mstk = NULL;
+ istk = inc;
+ lfmt->uplevel(LIST_INCLUDE);
+ }
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_USE:
+ {
+ static macros_t *use_pkg;
+ const char *pkg_macro = NULL;
+
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+
+ if (!tline || (tline->type != TOK_STRING &&
+ tline->type != TOK_INTERNAL_STRING &&
+ tline->type != TOK_ID)) {
+ nasm_error(ERR_NONFATAL, "`%%use' expects a package name");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND; /* but we did _something_ */
+ }
+ if (tline->next)
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after `%%use' ignored");
+ if (tline->type == TOK_STRING)
+ nasm_unquote_cstr(tline->text, i);
+ use_pkg = nasm_stdmac_find_package(tline->text);
+ if (!use_pkg)
+ nasm_error(ERR_NONFATAL, "unknown `%%use' package: %s", tline->text);
+ else
+ pkg_macro = (char *)use_pkg + 1; /* The first string will be <%define>__USE_*__ */
+ if (use_pkg && ! smacro_defined(NULL, pkg_macro, 0, NULL, true)) {
+ /* Not already included, go ahead and include it */
+ stdmacpos = use_pkg;
+ }
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ case PP_PUSH:
+ case PP_REPL:
+ case PP_POP:
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (tline) {
+ if (!tok_type_(tline, TOK_ID)) {
+ nasm_error(ERR_NONFATAL, "`%s' expects a context identifier",
+ pp_directives[i]);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND; /* but we did _something_ */
+ }
+ if (tline->next)
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after `%s' ignored",
+ pp_directives[i]);
+ p = nasm_strdup(tline->text);
+ } else {
+ p = NULL; /* Anonymous */
+ }
+
+ if (i == PP_PUSH) {
+ ctx = nasm_malloc(sizeof(Context));
+ ctx->next = cstk;
+ hash_init(&ctx->localmac, HASH_SMALL);
+ ctx->name = p;
+ ctx->number = unique++;
+ cstk = ctx;
+ } else {
+ /* %pop or %repl */
+ if (!cstk) {
+ nasm_error(ERR_NONFATAL, "`%s': context stack is empty",
+ pp_directives[i]);
+ } else if (i == PP_POP) {
+ if (p && (!cstk->name || nasm_stricmp(p, cstk->name)))
+ nasm_error(ERR_NONFATAL, "`%%pop' in wrong context: %s, "
+ "expected %s",
+ cstk->name ? cstk->name : "anonymous", p);
+ else
+ ctx_pop();
+ } else {
+ /* i == PP_REPL */
+ nasm_free(cstk->name);
+ cstk->name = p;
+ p = NULL;
+ }
+ nasm_free(p);
+ }
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ case PP_FATAL:
+ severity = ERR_FATAL;
+ goto issue_error;
+ case PP_ERROR:
+ severity = ERR_NONFATAL;
+ goto issue_error;
+ case PP_WARNING:
+ severity = ERR_WARNING|ERR_WARN_USER;
+ goto issue_error;
+
+issue_error:
+ {
+ /* Only error out if this is the final pass */
+ if (pass != 2 && i != PP_FATAL)
+ return DIRECTIVE_FOUND;
+
+ tline->next = expand_smacro(tline->next);
+ tline = tline->next;
+ skip_white_(tline);
+ t = tline ? tline->next : NULL;
+ skip_white_(t);
+ if (tok_type_(tline, TOK_STRING) && !t) {
+ /* The line contains only a quoted string */
+ p = tline->text;
+ nasm_unquote(p, NULL); /* Ignore NUL character truncation */
+ nasm_error(severity, "%s", p);
+ } else {
+ /* Not a quoted string, or more than a quoted string */
+ p = detoken(tline, false);
+ nasm_error(severity, "%s", p);
+ nasm_free(p);
+ }
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ CASE_PP_IF:
+ if (istk->conds && !emitting(istk->conds->state))
+ j = COND_NEVER;
+ else {
+ j = if_condition(tline->next, i);
+ tline->next = NULL; /* it got freed */
+ j = j < 0 ? COND_NEVER : j ? COND_IF_TRUE : COND_IF_FALSE;
+ }
+ cond = nasm_malloc(sizeof(Cond));
+ cond->next = istk->conds;
+ cond->state = j;
+ istk->conds = cond;
+ if(istk->mstk)
+ istk->mstk->condcnt ++;
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ CASE_PP_ELIF:
+ if (!istk->conds)
+ nasm_error(ERR_FATAL, "`%s': no matching `%%if'", pp_directives[i]);
+ switch(istk->conds->state) {
+ case COND_IF_TRUE:
+ istk->conds->state = COND_DONE;
+ break;
+
+ case COND_DONE:
+ case COND_NEVER:
+ break;
+
+ case COND_ELSE_TRUE:
+ case COND_ELSE_FALSE:
+ nasm_error(ERR_WARNING|ERR_PASS1|ERR_PP_PRECOND,
+ "`%%elif' after `%%else' ignored");
+ istk->conds->state = COND_NEVER;
+ break;
+
+ case COND_IF_FALSE:
+ /*
+ * IMPORTANT: In the case of %if, we will already have
+ * called expand_mmac_params(); however, if we're
+ * processing an %elif we must have been in a
+ * non-emitting mode, which would have inhibited
+ * the normal invocation of expand_mmac_params().
+ * Therefore, we have to do it explicitly here.
+ */
+ j = if_condition(expand_mmac_params(tline->next), i);
+ tline->next = NULL; /* it got freed */
+ istk->conds->state =
+ j < 0 ? COND_NEVER : j ? COND_IF_TRUE : COND_IF_FALSE;
+ break;
+ }
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_ELSE:
+ if (tline->next)
+ nasm_error(ERR_WARNING|ERR_PASS1|ERR_PP_PRECOND,
+ "trailing garbage after `%%else' ignored");
+ if (!istk->conds)
+ nasm_fatal(0, "`%%else: no matching `%%if'");
+ switch(istk->conds->state) {
+ case COND_IF_TRUE:
+ case COND_DONE:
+ istk->conds->state = COND_ELSE_FALSE;
+ break;
+
+ case COND_NEVER:
+ break;
+
+ case COND_IF_FALSE:
+ istk->conds->state = COND_ELSE_TRUE;
+ break;
+
+ case COND_ELSE_TRUE:
+ case COND_ELSE_FALSE:
+ nasm_error(ERR_WARNING|ERR_PASS1|ERR_PP_PRECOND,
+ "`%%else' after `%%else' ignored.");
+ istk->conds->state = COND_NEVER;
+ break;
+ }
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_ENDIF:
+ if (tline->next)
+ nasm_error(ERR_WARNING|ERR_PASS1|ERR_PP_PRECOND,
+ "trailing garbage after `%%endif' ignored");
+ if (!istk->conds)
+ nasm_error(ERR_FATAL, "`%%endif': no matching `%%if'");
+ cond = istk->conds;
+ istk->conds = cond->next;
+ nasm_free(cond);
+ if(istk->mstk)
+ istk->mstk->condcnt --;
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_RMACRO:
+ case PP_IRMACRO:
+ case PP_MACRO:
+ case PP_IMACRO:
+ if (defining) {
+ nasm_error(ERR_FATAL, "`%s': already defining a macro",
+ pp_directives[i]);
+ return DIRECTIVE_FOUND;
+ }
+ defining = nasm_zalloc(sizeof(MMacro));
+ defining->max_depth =
+ (i == PP_RMACRO) || (i == PP_IRMACRO) ? DEADMAN_LIMIT : 0;
+ defining->casesense = (i == PP_MACRO) || (i == PP_RMACRO);
+ if (!parse_mmacro_spec(tline, defining, pp_directives[i])) {
+ nasm_free(defining);
+ defining = NULL;
+ return DIRECTIVE_FOUND;
+ }
+
+ src_get(&defining->xline, &defining->fname);
+
+ mmac = (MMacro *) hash_findix(&mmacros, defining->name);
+ while (mmac) {
+ if (!strcmp(mmac->name, defining->name) &&
+ (mmac->nparam_min <= defining->nparam_max
+ || defining->plus)
+ && (defining->nparam_min <= mmac->nparam_max
+ || mmac->plus)) {
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "redefining multi-line macro `%s'", defining->name);
+ return DIRECTIVE_FOUND;
+ }
+ mmac = mmac->next;
+ }
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_ENDM:
+ case PP_ENDMACRO:
+ if (! (defining && defining->name)) {
+ nasm_error(ERR_NONFATAL, "`%s': not defining a macro", tline->text);
+ return DIRECTIVE_FOUND;
+ }
+ mmhead = (MMacro **) hash_findi_add(&mmacros, defining->name);
+ defining->next = *mmhead;
+ *mmhead = defining;
+ defining = NULL;
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_EXITMACRO:
+ /*
+ * We must search along istk->expansion until we hit a
+ * macro-end marker for a macro with a name. Then we
+ * bypass all lines between exitmacro and endmacro.
+ */
+ list_for_each(l, istk->expansion)
+ if (l->finishes && l->finishes->name)
+ break;
+
+ if (l) {
+ /*
+ * Remove all conditional entries relative to this
+ * macro invocation. (safe to do in this context)
+ */
+ for ( ; l->finishes->condcnt > 0; l->finishes->condcnt --) {
+ cond = istk->conds;
+ istk->conds = cond->next;
+ nasm_free(cond);
+ }
+ istk->expansion = l;
+ } else {
+ nasm_error(ERR_NONFATAL, "`%%exitmacro' not within `%%macro' block");
+ }
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_UNMACRO:
+ case PP_UNIMACRO:
+ {
+ MMacro **mmac_p;
+ MMacro spec;
+
+ spec.casesense = (i == PP_UNMACRO);
+ if (!parse_mmacro_spec(tline, &spec, pp_directives[i])) {
+ return DIRECTIVE_FOUND;
+ }
+ mmac_p = (MMacro **) hash_findi(&mmacros, spec.name, NULL);
+ while (mmac_p && *mmac_p) {
+ mmac = *mmac_p;
+ if (mmac->casesense == spec.casesense &&
+ !mstrcmp(mmac->name, spec.name, spec.casesense) &&
+ mmac->nparam_min == spec.nparam_min &&
+ mmac->nparam_max == spec.nparam_max &&
+ mmac->plus == spec.plus) {
+ *mmac_p = mmac->next;
+ free_mmacro(mmac);
+ } else {
+ mmac_p = &mmac->next;
+ }
+ }
+ free_tlist(origline);
+ free_tlist(spec.dlist);
+ return DIRECTIVE_FOUND;
+ }
+
+ case PP_ROTATE:
+ if (tline->next && tline->next->type == TOK_WHITESPACE)
+ tline = tline->next;
+ if (!tline->next) {
+ free_tlist(origline);
+ nasm_error(ERR_NONFATAL, "`%%rotate' missing rotate count");
+ return DIRECTIVE_FOUND;
+ }
+ t = expand_smacro(tline->next);
+ tline->next = NULL;
+ free_tlist(origline);
+ tline = t;
+ tptr = &t;
+ tokval.t_type = TOKEN_INVALID;
+ evalresult =
+ evaluate(ppscan, tptr, &tokval, NULL, pass, NULL);
+ free_tlist(tline);
+ if (!evalresult)
+ return DIRECTIVE_FOUND;
+ if (tokval.t_type)
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after expression ignored");
+ if (!is_simple(evalresult)) {
+ nasm_error(ERR_NONFATAL, "non-constant value given to `%%rotate'");
+ return DIRECTIVE_FOUND;
+ }
+ mmac = istk->mstk;
+ while (mmac && !mmac->name) /* avoid mistaking %reps for macros */
+ mmac = mmac->next_active;
+ if (!mmac) {
+ nasm_error(ERR_NONFATAL, "`%%rotate' invoked outside a macro call");
+ } else if (mmac->nparam == 0) {
+ nasm_error(ERR_NONFATAL,
+ "`%%rotate' invoked within macro without parameters");
+ } else {
+ int rotate = mmac->rotate + reloc_value(evalresult);
+
+ rotate %= (int)mmac->nparam;
+ if (rotate < 0)
+ rotate += mmac->nparam;
+
+ mmac->rotate = rotate;
+ }
+ return DIRECTIVE_FOUND;
+
+ case PP_REP:
+ nolist = false;
+ do {
+ tline = tline->next;
+ } while (tok_type_(tline, TOK_WHITESPACE));
+
+ if (tok_type_(tline, TOK_ID) &&
+ nasm_stricmp(tline->text, ".nolist") == 0) {
+ nolist = true;
+ do {
+ tline = tline->next;
+ } while (tok_type_(tline, TOK_WHITESPACE));
+ }
+
+ if (tline) {
+ t = expand_smacro(tline);
+ tptr = &t;
+ tokval.t_type = TOKEN_INVALID;
+ evalresult =
+ evaluate(ppscan, tptr, &tokval, NULL, pass, NULL);
+ if (!evalresult) {
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ if (tokval.t_type)
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after expression ignored");
+ if (!is_simple(evalresult)) {
+ nasm_error(ERR_NONFATAL, "non-constant value given to `%%rep'");
+ return DIRECTIVE_FOUND;
+ }
+ count = reloc_value(evalresult);
+ if (count >= REP_LIMIT) {
+ nasm_error(ERR_NONFATAL, "`%%rep' value exceeds limit");
+ count = 0;
+ } else
+ count++;
+ } else {
+ nasm_error(ERR_NONFATAL, "`%%rep' expects a repeat count");
+ count = 0;
+ }
+ free_tlist(origline);
+
+ tmp_defining = defining;
+ defining = nasm_malloc(sizeof(MMacro));
+ defining->prev = NULL;
+ defining->name = NULL; /* flags this macro as a %rep block */
+ defining->casesense = false;
+ defining->plus = false;
+ defining->nolist = nolist;
+ defining->in_progress = count;
+ defining->max_depth = 0;
+ defining->nparam_min = defining->nparam_max = 0;
+ defining->defaults = NULL;
+ defining->dlist = NULL;
+ defining->expansion = NULL;
+ defining->next_active = istk->mstk;
+ defining->rep_nest = tmp_defining;
+ return DIRECTIVE_FOUND;
+
+ case PP_ENDREP:
+ if (!defining || defining->name) {
+ nasm_error(ERR_NONFATAL, "`%%endrep': no matching `%%rep'");
+ return DIRECTIVE_FOUND;
+ }
+
+ /*
+ * Now we have a "macro" defined - although it has no name
+ * and we won't be entering it in the hash tables - we must
+ * push a macro-end marker for it on to istk->expansion.
+ * After that, it will take care of propagating itself (a
+ * macro-end marker line for a macro which is really a %rep
+ * block will cause the macro to be re-expanded, complete
+ * with another macro-end marker to ensure the process
+ * continues) until the whole expansion is forcibly removed
+ * from istk->expansion by a %exitrep.
+ */
+ l = nasm_malloc(sizeof(Line));
+ l->next = istk->expansion;
+ l->finishes = defining;
+ l->first = NULL;
+ istk->expansion = l;
+
+ istk->mstk = defining;
+
+ lfmt->uplevel(defining->nolist ? LIST_MACRO_NOLIST : LIST_MACRO);
+ tmp_defining = defining;
+ defining = defining->rep_nest;
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_EXITREP:
+ /*
+ * We must search along istk->expansion until we hit a
+ * macro-end marker for a macro with no name. Then we set
+ * its `in_progress' flag to 0.
+ */
+ list_for_each(l, istk->expansion)
+ if (l->finishes && !l->finishes->name)
+ break;
+
+ if (l)
+ l->finishes->in_progress = 1;
+ else
+ nasm_error(ERR_NONFATAL, "`%%exitrep' not within `%%rep' block");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_XDEFINE:
+ case PP_IXDEFINE:
+ case PP_DEFINE:
+ case PP_IDEFINE:
+ casesense = (i == PP_DEFINE || i == PP_XDEFINE);
+
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (!tline || (tline->type != TOK_ID &&
+ (tline->type != TOK_PREPROC_ID ||
+ tline->text[1] != '$'))) {
+ nasm_error(ERR_NONFATAL, "`%s' expects a macro identifier",
+ pp_directives[i]);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ ctx = get_ctx(tline->text, &mname);
+ last = tline;
+ param_start = tline = tline->next;
+ nparam = 0;
+
+ /* Expand the macro definition now for %xdefine and %ixdefine */
+ if ((i == PP_XDEFINE) || (i == PP_IXDEFINE))
+ tline = expand_smacro(tline);
+
+ if (tok_is_(tline, "(")) {
+ /*
+ * This macro has parameters.
+ */
+
+ tline = tline->next;
+ while (1) {
+ skip_white_(tline);
+ if (!tline) {
+ nasm_error(ERR_NONFATAL, "parameter identifier expected");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ if (tline->type != TOK_ID) {
+ nasm_error(ERR_NONFATAL,
+ "`%s': parameter identifier expected",
+ tline->text);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ tline->type = TOK_SMAC_PARAM + nparam++;
+ tline = tline->next;
+ skip_white_(tline);
+ if (tok_is_(tline, ",")) {
+ tline = tline->next;
+ } else {
+ if (!tok_is_(tline, ")")) {
+ nasm_error(ERR_NONFATAL,
+ "`)' expected to terminate macro template");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ break;
+ }
+ }
+ last = tline;
+ tline = tline->next;
+ }
+ if (tok_type_(tline, TOK_WHITESPACE))
+ last = tline, tline = tline->next;
+ macro_start = NULL;
+ last->next = NULL;
+ t = tline;
+ while (t) {
+ if (t->type == TOK_ID) {
+ list_for_each(tt, param_start)
+ if (tt->type >= TOK_SMAC_PARAM &&
+ !strcmp(tt->text, t->text))
+ t->type = tt->type;
+ }
+ tt = t->next;
+ t->next = macro_start;
+ macro_start = t;
+ t = tt;
+ }
+ /*
+ * Good. We now have a macro name, a parameter count, and a
+ * token list (in reverse order) for an expansion. We ought
+ * to be OK just to create an SMacro, store it, and let
+ * free_tlist have the rest of the line (which we have
+ * carefully re-terminated after chopping off the expansion
+ * from the end).
+ */
+ define_smacro(ctx, mname, casesense, nparam, macro_start);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_UNDEF:
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (!tline || (tline->type != TOK_ID &&
+ (tline->type != TOK_PREPROC_ID ||
+ tline->text[1] != '$'))) {
+ nasm_error(ERR_NONFATAL, "`%%undef' expects a macro identifier");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ if (tline->next) {
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after macro name ignored");
+ }
+
+ /* Find the context that symbol belongs to */
+ ctx = get_ctx(tline->text, &mname);
+ undef_smacro(ctx, mname);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_DEFSTR:
+ case PP_IDEFSTR:
+ casesense = (i == PP_DEFSTR);
+
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (!tline || (tline->type != TOK_ID &&
+ (tline->type != TOK_PREPROC_ID ||
+ tline->text[1] != '$'))) {
+ nasm_error(ERR_NONFATAL, "`%s' expects a macro identifier",
+ pp_directives[i]);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ ctx = get_ctx(tline->text, &mname);
+ last = tline;
+ tline = expand_smacro(tline->next);
+ last->next = NULL;
+
+ while (tok_type_(tline, TOK_WHITESPACE))
+ tline = delete_Token(tline);
+
+ p = detoken(tline, false);
+ macro_start = nasm_malloc(sizeof(*macro_start));
+ macro_start->next = NULL;
+ macro_start->text = nasm_quote(p, strlen(p));
+ macro_start->type = TOK_STRING;
+ macro_start->a.mac = NULL;
+ nasm_free(p);
+
+ /*
+ * We now have a macro name, an implicit parameter count of
+ * zero, and a string token to use as an expansion. Create
+ * and store an SMacro.
+ */
+ define_smacro(ctx, mname, casesense, 0, macro_start);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_DEFTOK:
+ case PP_IDEFTOK:
+ casesense = (i == PP_DEFTOK);
+
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (!tline || (tline->type != TOK_ID &&
+ (tline->type != TOK_PREPROC_ID ||
+ tline->text[1] != '$'))) {
+ nasm_error(ERR_NONFATAL,
+ "`%s' expects a macro identifier as first parameter",
+ pp_directives[i]);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ ctx = get_ctx(tline->text, &mname);
+ last = tline;
+ tline = expand_smacro(tline->next);
+ last->next = NULL;
+
+ t = tline;
+ while (tok_type_(t, TOK_WHITESPACE))
+ t = t->next;
+ /* t should now point to the string */
+ if (!tok_type_(t, TOK_STRING)) {
+ nasm_error(ERR_NONFATAL,
+ "`%s` requires string as second parameter",
+ pp_directives[i]);
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ /*
+ * Convert the string to a token stream. Note that smacros
+ * are stored with the token stream reversed, so we have to
+ * reverse the output of tokenize().
+ */
+ nasm_unquote_cstr(t->text, i);
+ macro_start = reverse_tokens(tokenize(t->text));
+
+ /*
+ * We now have a macro name, an implicit parameter count of
+ * zero, and a numeric token to use as an expansion. Create
+ * and store an SMacro.
+ */
+ define_smacro(ctx, mname, casesense, 0, macro_start);
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_PATHSEARCH:
+ {
+ FILE *fp;
+ StrList *xsl = NULL;
+ StrList **xst = &xsl;
+
+ casesense = true;
+
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (!tline || (tline->type != TOK_ID &&
+ (tline->type != TOK_PREPROC_ID ||
+ tline->text[1] != '$'))) {
+ nasm_error(ERR_NONFATAL,
+ "`%%pathsearch' expects a macro identifier as first parameter");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ ctx = get_ctx(tline->text, &mname);
+ last = tline;
+ tline = expand_smacro(tline->next);
+ last->next = NULL;
+
+ t = tline;
+ while (tok_type_(t, TOK_WHITESPACE))
+ t = t->next;
+
+ if (!t || (t->type != TOK_STRING &&
+ t->type != TOK_INTERNAL_STRING)) {
+ nasm_error(ERR_NONFATAL, "`%%pathsearch' expects a file name");
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND; /* but we did _something_ */
+ }
+ if (t->next)
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after `%%pathsearch' ignored");
+ p = t->text;
+ if (t->type != TOK_INTERNAL_STRING)
+ nasm_unquote(p, NULL);
+
+ fp = inc_fopen(p, &xsl, &xst, true, NF_TEXT);
+ if (fp) {
+ p = xsl->str;
+ fclose(fp); /* Don't actually care about the file */
+ }
+ macro_start = nasm_malloc(sizeof(*macro_start));
+ macro_start->next = NULL;
+ macro_start->text = nasm_quote(p, strlen(p));
+ macro_start->type = TOK_STRING;
+ macro_start->a.mac = NULL;
+ if (xsl)
+ nasm_free(xsl);
+
+ /*
+ * We now have a macro name, an implicit parameter count of
+ * zero, and a string token to use as an expansion. Create
+ * and store an SMacro.
+ */
+ define_smacro(ctx, mname, casesense, 0, macro_start);
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ case PP_STRLEN:
+ casesense = true;
+
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (!tline || (tline->type != TOK_ID &&
+ (tline->type != TOK_PREPROC_ID ||
+ tline->text[1] != '$'))) {
+ nasm_error(ERR_NONFATAL,
+ "`%%strlen' expects a macro identifier as first parameter");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ ctx = get_ctx(tline->text, &mname);
+ last = tline;
+ tline = expand_smacro(tline->next);
+ last->next = NULL;
+
+ t = tline;
+ while (tok_type_(t, TOK_WHITESPACE))
+ t = t->next;
+ /* t should now point to the string */
+ if (!tok_type_(t, TOK_STRING)) {
+ nasm_error(ERR_NONFATAL,
+ "`%%strlen` requires string as second parameter");
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ macro_start = nasm_malloc(sizeof(*macro_start));
+ macro_start->next = NULL;
+ make_tok_num(macro_start, nasm_unquote(t->text, NULL));
+ macro_start->a.mac = NULL;
+
+ /*
+ * We now have a macro name, an implicit parameter count of
+ * zero, and a numeric token to use as an expansion. Create
+ * and store an SMacro.
+ */
+ define_smacro(ctx, mname, casesense, 0, macro_start);
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_STRCAT:
+ casesense = true;
+
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (!tline || (tline->type != TOK_ID &&
+ (tline->type != TOK_PREPROC_ID ||
+ tline->text[1] != '$'))) {
+ nasm_error(ERR_NONFATAL,
+ "`%%strcat' expects a macro identifier as first parameter");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ ctx = get_ctx(tline->text, &mname);
+ last = tline;
+ tline = expand_smacro(tline->next);
+ last->next = NULL;
+
+ len = 0;
+ list_for_each(t, tline) {
+ switch (t->type) {
+ case TOK_WHITESPACE:
+ break;
+ case TOK_STRING:
+ len += t->a.len = nasm_unquote(t->text, NULL);
+ break;
+ case TOK_OTHER:
+ if (!strcmp(t->text, ",")) /* permit comma separators */
+ break;
+ /* else fall through */
+ default:
+ nasm_error(ERR_NONFATAL,
+ "non-string passed to `%%strcat' (%d)", t->type);
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ }
+
+ p = pp = nasm_malloc(len);
+ list_for_each(t, tline) {
+ if (t->type == TOK_STRING) {
+ memcpy(p, t->text, t->a.len);
+ p += t->a.len;
+ }
+ }
+
+ /*
+ * We now have a macro name, an implicit parameter count of
+ * zero, and a numeric token to use as an expansion. Create
+ * and store an SMacro.
+ */
+ macro_start = new_Token(NULL, TOK_STRING, NULL, 0);
+ macro_start->text = nasm_quote(pp, len);
+ nasm_free(pp);
+ define_smacro(ctx, mname, casesense, 0, macro_start);
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_SUBSTR:
+ {
+ int64_t start, count;
+ size_t len;
+
+ casesense = true;
+
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (!tline || (tline->type != TOK_ID &&
+ (tline->type != TOK_PREPROC_ID ||
+ tline->text[1] != '$'))) {
+ nasm_error(ERR_NONFATAL,
+ "`%%substr' expects a macro identifier as first parameter");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ ctx = get_ctx(tline->text, &mname);
+ last = tline;
+ tline = expand_smacro(tline->next);
+ last->next = NULL;
+
+ if (tline) /* skip expanded id */
+ t = tline->next;
+ while (tok_type_(t, TOK_WHITESPACE))
+ t = t->next;
+
+ /* t should now point to the string */
+ if (!tok_type_(t, TOK_STRING)) {
+ nasm_error(ERR_NONFATAL,
+ "`%%substr` requires string as second parameter");
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ tt = t->next;
+ tptr = &tt;
+ tokval.t_type = TOKEN_INVALID;
+ evalresult = evaluate(ppscan, tptr, &tokval, NULL, pass, NULL);
+ if (!evalresult) {
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ } else if (!is_simple(evalresult)) {
+ nasm_error(ERR_NONFATAL, "non-constant value given to `%%substr`");
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ start = evalresult->value - 1;
+
+ while (tok_type_(tt, TOK_WHITESPACE))
+ tt = tt->next;
+ if (!tt) {
+ count = 1; /* Backwards compatibility: one character */
+ } else {
+ tokval.t_type = TOKEN_INVALID;
+ evalresult = evaluate(ppscan, tptr, &tokval, NULL, pass, NULL);
+ if (!evalresult) {
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ } else if (!is_simple(evalresult)) {
+ nasm_error(ERR_NONFATAL, "non-constant value given to `%%substr`");
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ count = evalresult->value;
+ }
+
+ len = nasm_unquote(t->text, NULL);
+
+ /* make start and count being in range */
+ if (start < 0)
+ start = 0;
+ if (count < 0)
+ count = len + count + 1 - start;
+ if (start + count > (int64_t)len)
+ count = len - start;
+ if (!len || count < 0 || start >=(int64_t)len)
+ start = -1, count = 0; /* empty string */
+
+ macro_start = nasm_malloc(sizeof(*macro_start));
+ macro_start->next = NULL;
+ macro_start->text = nasm_quote((start < 0) ? "" : t->text + start, count);
+ macro_start->type = TOK_STRING;
+ macro_start->a.mac = NULL;
+
+ /*
+ * We now have a macro name, an implicit parameter count of
+ * zero, and a numeric token to use as an expansion. Create
+ * and store an SMacro.
+ */
+ define_smacro(ctx, mname, casesense, 0, macro_start);
+ free_tlist(tline);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ case PP_ASSIGN:
+ case PP_IASSIGN:
+ casesense = (i == PP_ASSIGN);
+
+ tline = tline->next;
+ skip_white_(tline);
+ tline = expand_id(tline);
+ if (!tline || (tline->type != TOK_ID &&
+ (tline->type != TOK_PREPROC_ID ||
+ tline->text[1] != '$'))) {
+ nasm_error(ERR_NONFATAL,
+ "`%%%sassign' expects a macro identifier",
+ (i == PP_IASSIGN ? "i" : ""));
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ ctx = get_ctx(tline->text, &mname);
+ last = tline;
+ tline = expand_smacro(tline->next);
+ last->next = NULL;
+
+ t = tline;
+ tptr = &t;
+ tokval.t_type = TOKEN_INVALID;
+ evalresult = evaluate(ppscan, tptr, &tokval, NULL, pass, NULL);
+ free_tlist(tline);
+ if (!evalresult) {
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ if (tokval.t_type)
+ nasm_error(ERR_WARNING|ERR_PASS1,
+ "trailing garbage after expression ignored");
+
+ if (!is_simple(evalresult)) {
+ nasm_error(ERR_NONFATAL,
+ "non-constant value given to `%%%sassign'",
+ (i == PP_IASSIGN ? "i" : ""));
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+
+ macro_start = nasm_malloc(sizeof(*macro_start));
+ macro_start->next = NULL;
+ make_tok_num(macro_start, reloc_value(evalresult));
+ macro_start->a.mac = NULL;
+
+ /*
+ * We now have a macro name, an implicit parameter count of
+ * zero, and a numeric token to use as an expansion. Create
+ * and store an SMacro.
+ */
+ define_smacro(ctx, mname, casesense, 0, macro_start);
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ case PP_LINE:
+ /*
+ * Syntax is `%line nnn[+mmm] [filename]'
+ */
+ tline = tline->next;
+ skip_white_(tline);
+ if (!tok_type_(tline, TOK_NUMBER)) {
+ nasm_error(ERR_NONFATAL, "`%%line' expects line number");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ k = readnum(tline->text, &err);
+ m = 1;
+ tline = tline->next;
+ if (tok_is_(tline, "+")) {
+ tline = tline->next;
+ if (!tok_type_(tline, TOK_NUMBER)) {
+ nasm_error(ERR_NONFATAL, "`%%line' expects line increment");
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+ }
+ m = readnum(tline->text, &err);
+ tline = tline->next;
+ }
+ skip_white_(tline);
+ src_set_linnum(k);
+ istk->lineinc = m;
+ if (tline) {
+ char *fname = detoken(tline, false);
+ src_set_fname(fname);
+ nasm_free(fname);
+ }
+ free_tlist(origline);
+ return DIRECTIVE_FOUND;
+
+ default:
+ nasm_error(ERR_FATAL,
+ "preprocessor directive `%s' not yet implemented",
+ pp_directives[i]);
+ return DIRECTIVE_FOUND;
+ }
+}
+
+/*
+ * Ensure that a macro parameter contains a condition code and
+ * nothing else. Return the condition code index if so, or -1
+ * otherwise.
+ */
+static int find_cc(Token * t)
+{
+ Token *tt;
+
+ if (!t)
+ return -1; /* Probably a %+ without a space */
+
+ skip_white_(t);
+ if (t->type != TOK_ID)
+ return -1;
+ tt = t->next;
+ skip_white_(tt);
+ if (tt && (tt->type != TOK_OTHER || strcmp(tt->text, ",")))
+ return -1;
+
+ return bsii(t->text, (const char **)conditions, ARRAY_SIZE(conditions));
+}
+
+/*
+ * This routines walks over tokens strem and hadnles tokens
+ * pasting, if @handle_explicit passed then explicit pasting
+ * term is handled, otherwise -- implicit pastings only.
+ */
+static bool paste_tokens(Token **head, const struct tokseq_match *m,
+ size_t mnum, bool handle_explicit)
+{
+ Token *tok, *next, **prev_next, **prev_nonspace;
+ bool pasted = false;
+ char *buf, *p;
+ size_t len, i;
+
+ /*
+ * The last token before pasting. We need it
+ * to be able to connect new handled tokens.
+ * In other words if there were a tokens stream
+ *
+ * A -> B -> C -> D
+ *
+ * and we've joined tokens B and C, the resulting
+ * stream should be
+ *
+ * A -> BC -> D
+ */
+ tok = *head;
+ prev_next = NULL;
+
+ if (!tok_type_(tok, TOK_WHITESPACE) && !tok_type_(tok, TOK_PASTE))
+ prev_nonspace = head;
+ else
+ prev_nonspace = NULL;
+
+ while (tok && (next = tok->next)) {
+
+ switch (tok->type) {
+ case TOK_WHITESPACE:
+ /* Zap redundant whitespaces */
+ while (tok_type_(next, TOK_WHITESPACE))
+ next = delete_Token(next);
+ tok->next = next;
+ break;
+
+ case TOK_PASTE:
+ /* Explicit pasting */
+ if (!handle_explicit)
+ break;
+ next = delete_Token(tok);
+
+ while (tok_type_(next, TOK_WHITESPACE))
+ next = delete_Token(next);
+
+ if (!pasted)
+ pasted = true;
+
+ /* Left pasting token is start of line */
+ if (!prev_nonspace)
+ nasm_error(ERR_FATAL, "No lvalue found on pasting");
+
+ /*
+ * No ending token, this might happen in two
+ * cases
+ *
+ * 1) There indeed no right token at all
+ * 2) There is a bare "%define ID" statement,
+ * and @ID does expand to whitespace.
+ *
+ * So technically we need to do a grammar analysis
+ * in another stage of parsing, but for now lets don't
+ * change the behaviour people used to. Simply allow
+ * whitespace after paste token.
+ */
+ if (!next) {
+ /*
+ * Zap ending space tokens and that's all.
+ */
+ tok = (*prev_nonspace)->next;
+ while (tok_type_(tok, TOK_WHITESPACE))
+ tok = delete_Token(tok);
+ tok = *prev_nonspace;
+ tok->next = NULL;
+ break;
+ }
+
+ tok = *prev_nonspace;
+ while (tok_type_(tok, TOK_WHITESPACE))
+ tok = delete_Token(tok);
+ len = strlen(tok->text);
+ len += strlen(next->text);
+
+ p = buf = nasm_malloc(len + 1);
+ strcpy(p, tok->text);
+ p = strchr(p, '\0');
+ strcpy(p, next->text);
+
+ delete_Token(tok);
+
+ tok = tokenize(buf);
+ nasm_free(buf);
+
+ *prev_nonspace = tok;
+ while (tok && tok->next)
+ tok = tok->next;
+
+ tok->next = delete_Token(next);
+
+ /* Restart from pasted tokens head */
+ tok = *prev_nonspace;
+ break;
+
+ default:
+ /* implicit pasting */
+ for (i = 0; i < mnum; i++) {
+ if (!(PP_CONCAT_MATCH(tok, m[i].mask_head)))
+ continue;
+
+ len = 0;
+ while (next && PP_CONCAT_MATCH(next, m[i].mask_tail)) {
+ len += strlen(next->text);
+ next = next->next;
+ }
+
+ /* No match */
+ if (tok == next)
+ break;
+
+ len += strlen(tok->text);
+ p = buf = nasm_malloc(len + 1);
+
+ while (tok != next) {
+ strcpy(p, tok->text);
+ p = strchr(p, '\0');
+ tok = delete_Token(tok);
+ }
+
+ tok = tokenize(buf);
+ nasm_free(buf);
+
+ if (prev_next)
+ *prev_next = tok;
+ else
+ *head = tok;
+
+ /*
+ * Connect pasted into original stream,
+ * ie A -> new-tokens -> B
+ */
+ while (tok && tok->next)
+ tok = tok->next;
+ tok->next = next;
+
+ if (!pasted)
+ pasted = true;
+
+ /* Restart from pasted tokens head */
+ tok = prev_next ? *prev_next : *head;
+ }
+
+ break;
+ }
+
+ prev_next = &tok->next;
+
+ if (tok->next &&
+ !tok_type_(tok->next, TOK_WHITESPACE) &&
+ !tok_type_(tok->next, TOK_PASTE))
+ prev_nonspace = prev_next;
+
+ tok = tok->next;
+ }
+
+ return pasted;
+}
+
+/*
+ * expands to a list of tokens from %{x:y}
+ */
+static Token *expand_mmac_params_range(MMacro *mac, Token *tline, Token ***last)
+{
+ Token *t = tline, **tt, *tm, *head;
+ char *pos;
+ int fst, lst, j, i;
+
+ pos = strchr(tline->text, ':');
+ nasm_assert(pos);
+
+ lst = atoi(pos + 1);
+ fst = atoi(tline->text + 1);
+
+ /*
+ * only macros params are accounted so
+ * if someone passes %0 -- we reject such
+ * value(s)
+ */
+ if (lst == 0 || fst == 0)
+ goto err;
+
+ /* the values should be sane */
+ if ((fst > (int)mac->nparam || fst < (-(int)mac->nparam)) ||
+ (lst > (int)mac->nparam || lst < (-(int)mac->nparam)))
+ goto err;
+
+ fst = fst < 0 ? fst + (int)mac->nparam + 1: fst;
+ lst = lst < 0 ? lst + (int)mac->nparam + 1: lst;
+
+ /* counted from zero */
+ fst--, lst--;
+
+ /*
+ * It will be at least one token. Note we
+ * need to scan params until separator, otherwise
+ * only first token will be passed.
+ */
+ tm = mac->params[(fst + mac->rotate) % mac->nparam];
+ head = new_Token(NULL, tm->type, tm->text, 0);
+ tt = &head->next, tm = tm->next;
+ while (tok_isnt_(tm, ",")) {
+ t = new_Token(NULL, tm->type, tm->text, 0);
+ *tt = t, tt = &t->next, tm = tm->next;
+ }
+
+ if (fst < lst) {
+ for (i = fst + 1; i <= lst; i++) {
+ t = new_Token(NULL, TOK_OTHER, ",", 0);
+ *tt = t, tt = &t->next;
+ j = (i + mac->rotate) % mac->nparam;
+ tm = mac->params[j];
+ while (tok_isnt_(tm, ",")) {
+ t = new_Token(NULL, tm->type, tm->text, 0);
+ *tt = t, tt = &t->next, tm = tm->next;
+ }
+ }
+ } else {
+ for (i = fst - 1; i >= lst; i--) {
+ t = new_Token(NULL, TOK_OTHER, ",", 0);
+ *tt = t, tt = &t->next;
+ j = (i + mac->rotate) % mac->nparam;
+ tm = mac->params[j];
+ while (tok_isnt_(tm, ",")) {
+ t = new_Token(NULL, tm->type, tm->text, 0);
+ *tt = t, tt = &t->next, tm = tm->next;
+ }
+ }
+ }
+
+ *last = tt;
+ return head;
+
+err:
+ nasm_error(ERR_NONFATAL, "`%%{%s}': macro parameters out of range",
+ &tline->text[1]);
+ return tline;
+}
+
+/*
+ * Expand MMacro-local things: parameter references (%0, %n, %+n,
+ * %-n) and MMacro-local identifiers (%%foo) as well as
+ * macro indirection (%[...]) and range (%{..:..}).
+ */
+static Token *expand_mmac_params(Token * tline)
+{
+ Token *t, *tt, **tail, *thead;
+ bool changed = false;
+ char *pos;
+
+ tail = &thead;
+ thead = NULL;
+
+ while (tline) {
+ if (tline->type == TOK_PREPROC_ID &&
+ (((tline->text[1] == '+' || tline->text[1] == '-') && tline->text[2]) ||
+ (tline->text[1] >= '0' && tline->text[1] <= '9') ||
+ tline->text[1] == '%')) {
+ char *text = NULL;
+ int type = 0, cc; /* type = 0 to placate optimisers */
+ char tmpbuf[30];
+ unsigned int n;
+ int i;
+ MMacro *mac;
+
+ t = tline;
+ tline = tline->next;
+
+ mac = istk->mstk;
+ while (mac && !mac->name) /* avoid mistaking %reps for macros */
+ mac = mac->next_active;
+ if (!mac) {
+ nasm_error(ERR_NONFATAL, "`%s': not in a macro call", t->text);
+ } else {
+ pos = strchr(t->text, ':');
+ if (!pos) {
+ switch (t->text[1]) {
+ /*
+ * We have to make a substitution of one of the
+ * forms %1, %-1, %+1, %%foo, %0.
+ */
+ case '0':
+ type = TOK_NUMBER;
+ snprintf(tmpbuf, sizeof(tmpbuf), "%d", mac->nparam);
+ text = nasm_strdup(tmpbuf);
+ break;
+ case '%':
+ type = TOK_ID;
+ snprintf(tmpbuf, sizeof(tmpbuf), "..@%"PRIu64".",
+ mac->unique);
+ text = nasm_strcat(tmpbuf, t->text + 2);
+ break;
+ case '-':
+ n = atoi(t->text + 2) - 1;
+ if (n >= mac->nparam)
+ tt = NULL;
+ else {
+ if (mac->nparam > 1)
+ n = (n + mac->rotate) % mac->nparam;
+ tt = mac->params[n];
+ }
+ cc = find_cc(tt);
+ if (cc == -1) {
+ nasm_error(ERR_NONFATAL,
+ "macro parameter %d is not a condition code",
+ n + 1);
+ text = NULL;
+ } else {
+ type = TOK_ID;
+ if (inverse_ccs[cc] == -1) {
+ nasm_error(ERR_NONFATAL,
+ "condition code `%s' is not invertible",
+ conditions[cc]);
+ text = NULL;
+ } else
+ text = nasm_strdup(conditions[inverse_ccs[cc]]);
+ }
+ break;
+ case '+':
+ n = atoi(t->text + 2) - 1;
+ if (n >= mac->nparam)
+ tt = NULL;
+ else {
+ if (mac->nparam > 1)
+ n = (n + mac->rotate) % mac->nparam;
+ tt = mac->params[n];
+ }
+ cc = find_cc(tt);
+ if (cc == -1) {
+ nasm_error(ERR_NONFATAL,
+ "macro parameter %d is not a condition code",
+ n + 1);
+ text = NULL;
+ } else {
+ type = TOK_ID;
+ text = nasm_strdup(conditions[cc]);
+ }
+ break;
+ default:
+ n = atoi(t->text + 1) - 1;
+ if (n >= mac->nparam)
+ tt = NULL;
+ else {
+ if (mac->nparam > 1)
+ n = (n + mac->rotate) % mac->nparam;
+ tt = mac->params[n];
+ }
+ if (tt) {
+ for (i = 0; i < mac->paramlen[n]; i++) {
+ *tail = new_Token(NULL, tt->type, tt->text, 0);
+ tail = &(*tail)->next;
+ tt = tt->next;
+ }
+ }
+ text = NULL; /* we've done it here */
+ break;
+ }
+ } else {
+ /*
+ * seems we have a parameters range here
+ */
+ Token *head, **last;
+ head = expand_mmac_params_range(mac, t, &last);
+ if (head != t) {
+ *tail = head;
+ *last = tline;
+ tline = head;
+ text = NULL;
+ }
+ }
+ }
+ if (!text) {
+ delete_Token(t);
+ } else {
+ *tail = t;
+ tail = &t->next;
+ t->type = type;
+ nasm_free(t->text);
+ t->text = text;
+ t->a.mac = NULL;
+ }
+ changed = true;
+ continue;
+ } else if (tline->type == TOK_INDIRECT) {
+ t = tline;
+ tline = tline->next;
+ tt = tokenize(t->text);
+ tt = expand_mmac_params(tt);
+ tt = expand_smacro(tt);
+ *tail = tt;
+ while (tt) {
+ tt->a.mac = NULL; /* Necessary? */
+ tail = &tt->next;
+ tt = tt->next;
+ }
+ delete_Token(t);
+ changed = true;
+ } else {
+ t = *tail = tline;
+ tline = tline->next;
+ t->a.mac = NULL;
+ tail = &t->next;
+ }
+ }
+ *tail = NULL;
+
+ if (changed) {
+ const struct tokseq_match t[] = {
+ {
+ PP_CONCAT_MASK(TOK_ID) |
+ PP_CONCAT_MASK(TOK_FLOAT), /* head */
+ PP_CONCAT_MASK(TOK_ID) |
+ PP_CONCAT_MASK(TOK_NUMBER) |
+ PP_CONCAT_MASK(TOK_FLOAT) |
+ PP_CONCAT_MASK(TOK_OTHER) /* tail */
+ },
+ {
+ PP_CONCAT_MASK(TOK_NUMBER), /* head */
+ PP_CONCAT_MASK(TOK_NUMBER) /* tail */
+ }
+ };
+ paste_tokens(&thead, t, ARRAY_SIZE(t), false);
+ }
+
+ return thead;
+}
+
+/*
+ * Expand all single-line macro calls made in the given line.
+ * Return the expanded version of the line. The original is deemed
+ * to be destroyed in the process. (In reality we'll just move
+ * Tokens from input to output a lot of the time, rather than
+ * actually bothering to destroy and replicate.)
+ */
+
+static Token *expand_smacro(Token * tline)
+{
+ Token *t, *tt, *mstart, **tail, *thead;
+ SMacro *head = NULL, *m;
+ Token **params;
+ int *paramsize;
+ unsigned int nparam, sparam;
+ int brackets;
+ Token *org_tline = tline;
+ Context *ctx;
+ const char *mname;
+ int deadman = DEADMAN_LIMIT;
+ bool expanded;
+
+ /*
+ * Trick: we should avoid changing the start token pointer since it can
+ * be contained in "next" field of other token. Because of this
+ * we allocate a copy of first token and work with it; at the end of
+ * routine we copy it back
+ */
+ if (org_tline) {
+ tline = new_Token(org_tline->next, org_tline->type,
+ org_tline->text, 0);
+ tline->a.mac = org_tline->a.mac;
+ nasm_free(org_tline->text);
+ org_tline->text = NULL;
+ }
+
+ expanded = true; /* Always expand %+ at least once */
+
+again:
+ thead = NULL;
+ tail = &thead;
+
+ while (tline) { /* main token loop */
+ if (!--deadman) {
+ nasm_error(ERR_NONFATAL, "interminable macro recursion");
+ goto err;
+ }
+
+ if ((mname = tline->text)) {
+ /* if this token is a local macro, look in local context */
+ if (tline->type == TOK_ID) {
+ head = (SMacro *)hash_findix(&smacros, mname);
+ } else if (tline->type == TOK_PREPROC_ID) {
+ ctx = get_ctx(mname, &mname);
+ head = ctx ? (SMacro *)hash_findix(&ctx->localmac, mname) : NULL;
+ } else
+ head = NULL;
+
+ /*
+ * We've hit an identifier. As in is_mmacro below, we first
+ * check whether the identifier is a single-line macro at
+ * all, then think about checking for parameters if
+ * necessary.
+ */
+ list_for_each(m, head)
+ if (!mstrcmp(m->name, mname, m->casesense))
+ break;
+ if (m) {
+ mstart = tline;
+ params = NULL;
+ paramsize = NULL;
+ if (m->nparam == 0) {
+ /*
+ * Simple case: the macro is parameterless. Discard the
+ * one token that the macro call took, and push the
+ * expansion back on the to-do stack.
+ */
+ if (!m->expansion) {
+ if (!strcmp("__FILE__", m->name)) {
+ const char *file = src_get_fname();
+ /* nasm_free(tline->text); here? */
+ tline->text = nasm_quote(file, strlen(file));
+ tline->type = TOK_STRING;
+ continue;
+ }
+ if (!strcmp("__LINE__", m->name)) {
+ nasm_free(tline->text);
+ make_tok_num(tline, src_get_linnum());
+ continue;
+ }
+ if (!strcmp("__BITS__", m->name)) {
+ nasm_free(tline->text);
+ make_tok_num(tline, globalbits);
+ continue;
+ }
+ tline = delete_Token(tline);
+ continue;
+ }
+ } else {
+ /*
+ * Complicated case: at least one macro with this name
+ * exists and takes parameters. We must find the
+ * parameters in the call, count them, find the SMacro
+ * that corresponds to that form of the macro call, and
+ * substitute for the parameters when we expand. What a
+ * pain.
+ */
+ /*tline = tline->next;
+ skip_white_(tline); */
+ do {
+ t = tline->next;
+ while (tok_type_(t, TOK_SMAC_END)) {
+ t->a.mac->in_progress = false;
+ t->text = NULL;
+ t = tline->next = delete_Token(t);
+ }
+ tline = t;
+ } while (tok_type_(tline, TOK_WHITESPACE));
+ if (!tok_is_(tline, "(")) {
+ /*
+ * This macro wasn't called with parameters: ignore
+ * the call. (Behaviour borrowed from gnu cpp.)
+ */
+ tline = mstart;
+ m = NULL;
+ } else {
+ int paren = 0;
+ int white = 0;
+ brackets = 0;
+ nparam = 0;
+ sparam = PARAM_DELTA;
+ params = nasm_malloc(sparam * sizeof(Token *));
+ params[0] = tline->next;
+ paramsize = nasm_malloc(sparam * sizeof(int));
+ paramsize[0] = 0;
+ while (true) { /* parameter loop */
+ /*
+ * For some unusual expansions
+ * which concatenates function call
+ */
+ t = tline->next;
+ while (tok_type_(t, TOK_SMAC_END)) {
+ t->a.mac->in_progress = false;
+ t->text = NULL;
+ t = tline->next = delete_Token(t);
+ }
+ tline = t;
+
+ if (!tline) {
+ nasm_error(ERR_NONFATAL,
+ "macro call expects terminating `)'");
+ break;
+ }
+ if (tline->type == TOK_WHITESPACE
+ && brackets <= 0) {
+ if (paramsize[nparam])
+ white++;
+ else
+ params[nparam] = tline->next;
+ continue; /* parameter loop */
+ }
+ if (tline->type == TOK_OTHER
+ && tline->text[1] == 0) {
+ char ch = tline->text[0];
+ if (ch == ',' && !paren && brackets <= 0) {
+ if (++nparam >= sparam) {
+ sparam += PARAM_DELTA;
+ params = nasm_realloc(params,
+ sparam * sizeof(Token *));
+ paramsize = nasm_realloc(paramsize,
+ sparam * sizeof(int));
+ }
+ params[nparam] = tline->next;
+ paramsize[nparam] = 0;
+ white = 0;
+ continue; /* parameter loop */
+ }
+ if (ch == '{' &&
+ (brackets > 0 || (brackets == 0 &&
+ !paramsize[nparam])))
+ {
+ if (!(brackets++)) {
+ params[nparam] = tline->next;
+ continue; /* parameter loop */
+ }
+ }
+ if (ch == '}' && brackets > 0)
+ if (--brackets == 0) {
+ brackets = -1;
+ continue; /* parameter loop */
+ }
+ if (ch == '(' && !brackets)
+ paren++;
+ if (ch == ')' && brackets <= 0)
+ if (--paren < 0)
+ break;
+ }
+ if (brackets < 0) {
+ brackets = 0;
+ nasm_error(ERR_NONFATAL, "braces do not "
+ "enclose all of macro parameter");
+ }
+ paramsize[nparam] += white + 1;
+ white = 0;
+ } /* parameter loop */
+ nparam++;
+ while (m && (m->nparam != nparam ||
+ mstrcmp(m->name, mname,
+ m->casesense)))
+ m = m->next;
+ if (!m)
+ nasm_error(ERR_WARNING|ERR_PASS1|ERR_WARN_MNP,
+ "macro `%s' exists, "
+ "but not taking %d parameters",
+ mstart->text, nparam);
+ }
+ }
+ if (m && m->in_progress)
+ m = NULL;
+ if (!m) { /* in progess or didn't find '(' or wrong nparam */
+ /*
+ * Design question: should we handle !tline, which
+ * indicates missing ')' here, or expand those
+ * macros anyway, which requires the (t) test a few
+ * lines down?
+ */
+ nasm_free(params);
+ nasm_free(paramsize);
+ tline = mstart;
+ } else {
+ /*
+ * Expand the macro: we are placed on the last token of the
+ * call, so that we can easily split the call from the
+ * following tokens. We also start by pushing an SMAC_END
+ * token for the cycle removal.
+ */
+ t = tline;
+ if (t) {
+ tline = t->next;
+ t->next = NULL;
+ }
+ tt = new_Token(tline, TOK_SMAC_END, NULL, 0);
+ tt->a.mac = m;
+ m->in_progress = true;
+ tline = tt;
+ list_for_each(t, m->expansion) {
+ if (t->type >= TOK_SMAC_PARAM) {
+ Token *pcopy = tline, **ptail = &pcopy;
+ Token *ttt, *pt;
+ int i;
+
+ ttt = params[t->type - TOK_SMAC_PARAM];
+ i = paramsize[t->type - TOK_SMAC_PARAM];
+ while (--i >= 0) {
+ pt = *ptail = new_Token(tline, ttt->type,
+ ttt->text, 0);
+ ptail = &pt->next;
+ ttt = ttt->next;
+ }
+ tline = pcopy;
+ } else if (t->type == TOK_PREPROC_Q) {
+ tt = new_Token(tline, TOK_ID, mname, 0);
+ tline = tt;
+ } else if (t->type == TOK_PREPROC_QQ) {
+ tt = new_Token(tline, TOK_ID, m->name, 0);
+ tline = tt;
+ } else {
+ tt = new_Token(tline, t->type, t->text, 0);
+ tline = tt;
+ }
+ }
+
+ /*
+ * Having done that, get rid of the macro call, and clean
+ * up the parameters.
+ */
+ nasm_free(params);
+ nasm_free(paramsize);
+ free_tlist(mstart);
+ expanded = true;
+ continue; /* main token loop */
+ }
+ }
+ }
+
+ if (tline->type == TOK_SMAC_END) {
+ tline->a.mac->in_progress = false;
+ tline = delete_Token(tline);
+ } else {
+ t = *tail = tline;
+ tline = tline->next;
+ t->a.mac = NULL;
+ t->next = NULL;
+ tail = &t->next;
+ }
+ }
+
+ /*
+ * Now scan the entire line and look for successive TOK_IDs that resulted
+ * after expansion (they can't be produced by tokenize()). The successive
+ * TOK_IDs should be concatenated.
+ * Also we look for %+ tokens and concatenate the tokens before and after
+ * them (without white spaces in between).
+ */
+ if (expanded) {
+ const struct tokseq_match t[] = {
+ {
+ PP_CONCAT_MASK(TOK_ID) |
+ PP_CONCAT_MASK(TOK_PREPROC_ID), /* head */
+ PP_CONCAT_MASK(TOK_ID) |
+ PP_CONCAT_MASK(TOK_PREPROC_ID) |
+ PP_CONCAT_MASK(TOK_NUMBER) /* tail */
+ }
+ };
+ if (paste_tokens(&thead, t, ARRAY_SIZE(t), true)) {
+ /*
+ * If we concatenated something, *and* we had previously expanded
+ * an actual macro, scan the lines again for macros...
+ */
+ tline = thead;
+ expanded = false;
+ goto again;
+ }
+ }
+
+err:
+ if (org_tline) {
+ if (thead) {
+ *org_tline = *thead;
+ /* since we just gave text to org_line, don't free it */
+ thead->text = NULL;
+ delete_Token(thead);
+ } else {
+ /* the expression expanded to empty line;
+ we can't return NULL for some reasons
+ we just set the line to a single WHITESPACE token. */
+ memset(org_tline, 0, sizeof(*org_tline));
+ org_tline->text = NULL;
+ org_tline->type = TOK_WHITESPACE;
+ }
+ thead = org_tline;
+ }
+
+ return thead;
+}
+
+/*
+ * Similar to expand_smacro but used exclusively with macro identifiers
+ * right before they are fetched in. The reason is that there can be
+ * identifiers consisting of several subparts. We consider that if there
+ * are more than one element forming the name, user wants a expansion,
+ * otherwise it will be left as-is. Example:
+ *
+ * %define %$abc cde
+ *
+ * the identifier %$abc will be left as-is so that the handler for %define
+ * will suck it and define the corresponding value. Other case:
+ *
+ * %define _%$abc cde
+ *
+ * In this case user wants name to be expanded *before* %define starts
+ * working, so we'll expand %$abc into something (if it has a value;
+ * otherwise it will be left as-is) then concatenate all successive
+ * PP_IDs into one.
+ */
+static Token *expand_id(Token * tline)
+{
+ Token *cur, *oldnext = NULL;
+
+ if (!tline || !tline->next)
+ return tline;
+
+ cur = tline;
+ while (cur->next &&
+ (cur->next->type == TOK_ID ||
+ cur->next->type == TOK_PREPROC_ID
+ || cur->next->type == TOK_NUMBER))
+ cur = cur->next;
+
+ /* If identifier consists of just one token, don't expand */
+ if (cur == tline)
+ return tline;
+
+ if (cur) {
+ oldnext = cur->next; /* Detach the tail past identifier */
+ cur->next = NULL; /* so that expand_smacro stops here */
+ }
+
+ tline = expand_smacro(tline);
+
+ if (cur) {
+ /* expand_smacro possibly changhed tline; re-scan for EOL */
+ cur = tline;
+ while (cur && cur->next)
+ cur = cur->next;
+ if (cur)
+ cur->next = oldnext;
+ }
+
+ return tline;
+}
+
+/*
+ * Determine whether the given line constitutes a multi-line macro
+ * call, and return the MMacro structure called if so. Doesn't have
+ * to check for an initial label - that's taken care of in
+ * expand_mmacro - but must check numbers of parameters. Guaranteed
+ * to be called with tline->type == TOK_ID, so the putative macro
+ * name is easy to find.
+ */
+static MMacro *is_mmacro(Token * tline, Token *** params_array)
+{
+ MMacro *head, *m;
+ Token **params;
+ int nparam;
+
+ head = (MMacro *) hash_findix(&mmacros, tline->text);
+
+ /*
+ * Efficiency: first we see if any macro exists with the given
+ * name. If not, we can return NULL immediately. _Then_ we
+ * count the parameters, and then we look further along the
+ * list if necessary to find the proper MMacro.
+ */
+ list_for_each(m, head)
+ if (!mstrcmp(m->name, tline->text, m->casesense))
+ break;
+ if (!m)
+ return NULL;
+
+ /*
+ * OK, we have a potential macro. Count and demarcate the
+ * parameters.
+ */
+ count_mmac_params(tline->next, &nparam, &params);
+
+ /*
+ * So we know how many parameters we've got. Find the MMacro
+ * structure that handles this number.
+ */
+ while (m) {
+ if (m->nparam_min <= nparam
+ && (m->plus || nparam <= m->nparam_max)) {
+ /*
+ * This one is right. Just check if cycle removal
+ * prohibits us using it before we actually celebrate...
+ */
+ if (m->in_progress > m->max_depth) {
+ if (m->max_depth > 0) {
+ nasm_error(ERR_WARNING,
+ "reached maximum recursion depth of %i",
+ m->max_depth);
+ }
+ nasm_free(params);
+ return NULL;
+ }
+ /*
+ * It's right, and we can use it. Add its default
+ * parameters to the end of our list if necessary.
+ */
+ if (m->defaults && nparam < m->nparam_min + m->ndefs) {
+ params =
+ nasm_realloc(params,
+ ((m->nparam_min + m->ndefs +
+ 1) * sizeof(*params)));
+ while (nparam < m->nparam_min + m->ndefs) {
+ params[nparam] = m->defaults[nparam - m->nparam_min];
+ nparam++;
+ }
+ }
+ /*
+ * If we've gone over the maximum parameter count (and
+ * we're in Plus mode), ignore parameters beyond
+ * nparam_max.
+ */
+ if (m->plus && nparam > m->nparam_max)
+ nparam = m->nparam_max;
+ /*
+ * Then terminate the parameter list, and leave.
+ */
+ if (!params) { /* need this special case */
+ params = nasm_malloc(sizeof(*params));
+ nparam = 0;
+ }
+ params[nparam] = NULL;
+ *params_array = params;
+ return m;
+ }
+ /*
+ * This one wasn't right: look for the next one with the
+ * same name.
+ */
+ list_for_each(m, m->next)
+ if (!mstrcmp(m->name, tline->text, m->casesense))
+ break;
+ }
+
+ /*
+ * After all that, we didn't find one with the right number of
+ * parameters. Issue a warning, and fail to expand the macro.
+ */
+ nasm_error(ERR_WARNING|ERR_PASS1|ERR_WARN_MNP,
+ "macro `%s' exists, but not taking %d parameters",
+ tline->text, nparam);
+ nasm_free(params);
+ return NULL;
+}
+
+
+/*
+ * Save MMacro invocation specific fields in
+ * preparation for a recursive macro expansion
+ */
+static void push_mmacro(MMacro *m)
+{
+ MMacroInvocation *i;
+
+ i = nasm_malloc(sizeof(MMacroInvocation));
+ i->prev = m->prev;
+ i->params = m->params;
+ i->iline = m->iline;
+ i->nparam = m->nparam;
+ i->rotate = m->rotate;
+ i->paramlen = m->paramlen;
+ i->unique = m->unique;
+ i->condcnt = m->condcnt;
+ m->prev = i;
+}
+
+
+/*
+ * Restore MMacro invocation specific fields that were
+ * saved during a previous recursive macro expansion
+ */
+static void pop_mmacro(MMacro *m)
+{
+ MMacroInvocation *i;
+
+ if (m->prev) {
+ i = m->prev;
+ m->prev = i->prev;
+ m->params = i->params;
+ m->iline = i->iline;
+ m->nparam = i->nparam;
+ m->rotate = i->rotate;
+ m->paramlen = i->paramlen;
+ m->unique = i->unique;
+ m->condcnt = i->condcnt;
+ nasm_free(i);
+ }
+}
+
+
+/*
+ * Expand the multi-line macro call made by the given line, if
+ * there is one to be expanded. If there is, push the expansion on
+ * istk->expansion and return 1. Otherwise return 0.
+ */
+static int expand_mmacro(Token * tline)
+{
+ Token *startline = tline;
+ Token *label = NULL;
+ int dont_prepend = 0;
+ Token **params, *t, *tt;
+ MMacro *m;
+ Line *l, *ll;
+ int i, nparam, *paramlen;
+ const char *mname;
+
+ t = tline;
+ skip_white_(t);
+ /* if (!tok_type_(t, TOK_ID)) Lino 02/25/02 */
+ if (!tok_type_(t, TOK_ID) && !tok_type_(t, TOK_PREPROC_ID))
+ return 0;
+ m = is_mmacro(t, &params);
+ if (m) {
+ mname = t->text;
+ } else {
+ Token *last;
+ /*
+ * We have an id which isn't a macro call. We'll assume
+ * it might be a label; we'll also check to see if a
+ * colon follows it. Then, if there's another id after
+ * that lot, we'll check it again for macro-hood.
+ */
+ label = last = t;
+ t = t->next;
+ if (tok_type_(t, TOK_WHITESPACE))
+ last = t, t = t->next;
+ if (tok_is_(t, ":")) {
+ dont_prepend = 1;
+ last = t, t = t->next;
+ if (tok_type_(t, TOK_WHITESPACE))
+ last = t, t = t->next;
+ }
+ if (!tok_type_(t, TOK_ID) || !(m = is_mmacro(t, &params)))
+ return 0;
+ last->next = NULL;
+ mname = t->text;
+ tline = t;
+ }
+
+ /*
+ * Fix up the parameters: this involves stripping leading and
+ * trailing whitespace, then stripping braces if they are
+ * present.
+ */
+ for (nparam = 0; params[nparam]; nparam++) ;
+ paramlen = nparam ? nasm_malloc(nparam * sizeof(*paramlen)) : NULL;
+
+ for (i = 0; params[i]; i++) {
+ int brace = 0;
+ int comma = (!m->plus || i < nparam - 1);
+
+ t = params[i];
+ skip_white_(t);
+ if (tok_is_(t, "{"))
+ t = t->next, brace++, comma = false;
+ params[i] = t;
+ paramlen[i] = 0;
+ while (t) {
+ if (comma && t->type == TOK_OTHER && !strcmp(t->text, ","))
+ break; /* ... because we have hit a comma */
+ if (comma && t->type == TOK_WHITESPACE
+ && tok_is_(t->next, ","))
+ break; /* ... or a space then a comma */
+ if (brace && t->type == TOK_OTHER) {
+ if (t->text[0] == '{')
+ brace++; /* ... or a nested opening brace */
+ else if (t->text[0] == '}')
+ if (!--brace)
+ break; /* ... or a brace */
+ }
+ t = t->next;
+ paramlen[i]++;
+ }
+ if (brace)
+ nasm_error(ERR_NONFATAL, "macro params should be enclosed in braces");
+ }
+
+ /*
+ * OK, we have a MMacro structure together with a set of
+ * parameters. We must now go through the expansion and push
+ * copies of each Line on to istk->expansion. Substitution of
+ * parameter tokens and macro-local tokens doesn't get done
+ * until the single-line macro substitution process; this is
+ * because delaying them allows us to change the semantics
+ * later through %rotate.
+ *
+ * First, push an end marker on to istk->expansion, mark this
+ * macro as in progress, and set up its invocation-specific
+ * variables.
+ */
+ ll = nasm_malloc(sizeof(Line));
+ ll->next = istk->expansion;
+ ll->finishes = m;
+ ll->first = NULL;
+ istk->expansion = ll;
+
+ /*
+ * Save the previous MMacro expansion in the case of
+ * macro recursion
+ */
+ if (m->max_depth && m->in_progress)
+ push_mmacro(m);
+
+ m->in_progress ++;
+ m->params = params;
+ m->iline = tline;
+ m->nparam = nparam;
+ m->rotate = 0;
+ m->paramlen = paramlen;
+ m->unique = unique++;
+ m->lineno = 0;
+ m->condcnt = 0;
+
+ m->next_active = istk->mstk;
+ istk->mstk = m;
+
+ list_for_each(l, m->expansion) {
+ Token **tail;
+
+ ll = nasm_malloc(sizeof(Line));
+ ll->finishes = NULL;
+ ll->next = istk->expansion;
+ istk->expansion = ll;
+ tail = &ll->first;
+
+ list_for_each(t, l->first) {
+ Token *x = t;
+ switch (t->type) {
+ case TOK_PREPROC_Q:
+ tt = *tail = new_Token(NULL, TOK_ID, mname, 0);
+ break;
+ case TOK_PREPROC_QQ:
+ tt = *tail = new_Token(NULL, TOK_ID, m->name, 0);
+ break;
+ case TOK_PREPROC_ID:
+ if (t->text[1] == '0' && t->text[2] == '0') {
+ dont_prepend = -1;
+ x = label;
+ if (!x)
+ continue;
+ }
+ /* fall through */
+ default:
+ tt = *tail = new_Token(NULL, x->type, x->text, 0);
+ break;
+ }
+ tail = &tt->next;
+ }
+ *tail = NULL;
+ }
+
+ /*
+ * If we had a label, push it on as the first line of
+ * the macro expansion.
+ */
+ if (label) {
+ if (dont_prepend < 0)
+ free_tlist(startline);
+ else {
+ ll = nasm_malloc(sizeof(Line));
+ ll->finishes = NULL;
+ ll->next = istk->expansion;
+ istk->expansion = ll;
+ ll->first = startline;
+ if (!dont_prepend) {
+ while (label->next)
+ label = label->next;
+ label->next = tt = new_Token(NULL, TOK_OTHER, ":", 0);
+ }
+ }
+ }
+
+ lfmt->uplevel(m->nolist ? LIST_MACRO_NOLIST : LIST_MACRO);
+
+ return 1;
+}
+
+/*
+ * This function adds macro names to error messages, and suppresses
+ * them if necessary.
+ */
+static void pp_verror(int severity, const char *fmt, va_list arg)
+{
+ char buff[BUFSIZ];
+ MMacro *mmac = NULL;
+ int delta = 0;
+
+ /*
+ * If we're in a dead branch of IF or something like it, ignore the error.
+ * However, because %else etc are evaluated in the state context
+ * of the previous branch, errors might get lost:
+ * %if 0 ... %else trailing garbage ... %endif
+ * So %else etc should set the ERR_PP_PRECOND flag.
+ */
+ if ((severity & ERR_MASK) < ERR_FATAL &&
+ istk && istk->conds &&
+ ((severity & ERR_PP_PRECOND) ?
+ istk->conds->state == COND_NEVER :
+ !emitting(istk->conds->state)))
+ return;
+
+ /* get %macro name */
+ if (!(severity & ERR_NOFILE) && istk && istk->mstk) {
+ mmac = istk->mstk;
+ /* but %rep blocks should be skipped */
+ while (mmac && !mmac->name)
+ mmac = mmac->next_active, delta++;
+ }
+
+ if (mmac) {
+ vsnprintf(buff, sizeof(buff), fmt, arg);
+
+ nasm_set_verror(real_verror);
+ nasm_error(severity, "(%s:%d) %s",
+ mmac->name, mmac->lineno - delta, buff);
+ nasm_set_verror(pp_verror);
+ } else {
+ real_verror(severity, fmt, arg);
+ }
+}
+
+static void
+pp_reset(char *file, int apass, StrList **deplist)
+{
+ Token *t;
+
+ cstk = NULL;
+ istk = nasm_malloc(sizeof(Include));
+ istk->next = NULL;
+ istk->conds = NULL;
+ istk->expansion = NULL;
+ istk->mstk = NULL;
+ istk->fp = nasm_open_read(file, NF_TEXT);
+ istk->fname = NULL;
+ src_set(0, file);
+ istk->lineinc = 1;
+ if (!istk->fp)
+ nasm_fatal(ERR_NOFILE, "unable to open input file `%s'", file);
+ defining = NULL;
+ nested_mac_count = 0;
+ nested_rep_count = 0;
+ init_macros();
+ unique = 0;
+ if (tasm_compatible_mode) {
+ stdmacpos = nasm_stdmac;
+ } else {
+ stdmacpos = nasm_stdmac_after_tasm;
+ }
+ any_extrastdmac = extrastdmac && *extrastdmac;
+ do_predef = true;
+
+ /*
+ * 0 for dependencies, 1 for preparatory passes, 2 for final pass.
+ * The caller, however, will also pass in 3 for preprocess-only so
+ * we can set __PASS__ accordingly.
+ */
+ pass = apass > 2 ? 2 : apass;
+
+ dephead = deptail = deplist;
+ if (deplist) {
+ StrList *sl = nasm_malloc(strlen(file)+1+sizeof sl->next);
+ sl->next = NULL;
+ strcpy(sl->str, file);
+ *deptail = sl;
+ deptail = &sl->next;
+ }
+
+ /*
+ * Define the __PASS__ macro. This is defined here unlike
+ * all the other builtins, because it is special -- it varies between
+ * passes.
+ */
+ t = nasm_malloc(sizeof(*t));
+ t->next = NULL;
+ make_tok_num(t, apass);
+ t->a.mac = NULL;
+ define_smacro(NULL, "__PASS__", true, 0, t);
+}
+
+static char *pp_getline(void)
+{
+ char *line;
+ Token *tline;
+
+ real_verror = nasm_set_verror(pp_verror);
+
+ while (1) {
+ /*
+ * Fetch a tokenized line, either from the macro-expansion
+ * buffer or from the input file.
+ */
+ tline = NULL;
+ while (istk->expansion && istk->expansion->finishes) {
+ Line *l = istk->expansion;
+ if (!l->finishes->name && l->finishes->in_progress > 1) {
+ Line *ll;
+
+ /*
+ * This is a macro-end marker for a macro with no
+ * name, which means it's not really a macro at all
+ * but a %rep block, and the `in_progress' field is
+ * more than 1, meaning that we still need to
+ * repeat. (1 means the natural last repetition; 0
+ * means termination by %exitrep.) We have
+ * therefore expanded up to the %endrep, and must
+ * push the whole block on to the expansion buffer
+ * again. We don't bother to remove the macro-end
+ * marker: we'd only have to generate another one
+ * if we did.
+ */
+ l->finishes->in_progress--;
+ list_for_each(l, l->finishes->expansion) {
+ Token *t, *tt, **tail;
+
+ ll = nasm_malloc(sizeof(Line));
+ ll->next = istk->expansion;
+ ll->finishes = NULL;
+ ll->first = NULL;
+ tail = &ll->first;
+
+ list_for_each(t, l->first) {
+ if (t->text || t->type == TOK_WHITESPACE) {
+ tt = *tail = new_Token(NULL, t->type, t->text, 0);
+ tail = &tt->next;
+ }
+ }
+
+ istk->expansion = ll;
+ }
+ } else {
+ /*
+ * Check whether a `%rep' was started and not ended
+ * within this macro expansion. This can happen and
+ * should be detected. It's a fatal error because
+ * I'm too confused to work out how to recover
+ * sensibly from it.
+ */
+ if (defining) {
+ if (defining->name)
+ nasm_panic(0, "defining with name in expansion");
+ else if (istk->mstk->name)
+ nasm_fatal(0, "`%%rep' without `%%endrep' within"
+ " expansion of macro `%s'",
+ istk->mstk->name);
+ }
+
+ /*
+ * FIXME: investigate the relationship at this point between
+ * istk->mstk and l->finishes
+ */
+ {
+ MMacro *m = istk->mstk;
+ istk->mstk = m->next_active;
+ if (m->name) {
+ /*
+ * This was a real macro call, not a %rep, and
+ * therefore the parameter information needs to
+ * be freed.
+ */
+ if (m->prev) {
+ pop_mmacro(m);
+ l->finishes->in_progress --;
+ } else {
+ nasm_free(m->params);
+ free_tlist(m->iline);
+ nasm_free(m->paramlen);
+ l->finishes->in_progress = 0;
+ }
+ } else
+ free_mmacro(m);
+ }
+ istk->expansion = l->next;
+ nasm_free(l);
+ lfmt->downlevel(LIST_MACRO);
+ }
+ }
+ while (1) { /* until we get a line we can use */
+
+ if (istk->expansion) { /* from a macro expansion */
+ char *p;
+ Line *l = istk->expansion;
+ if (istk->mstk)
+ istk->mstk->lineno++;
+ tline = l->first;
+ istk->expansion = l->next;
+ nasm_free(l);
+ p = detoken(tline, false);
+ lfmt->line(LIST_MACRO, p);
+ nasm_free(p);
+ break;
+ }
+ line = read_line();
+ if (line) { /* from the current input file */
+ line = prepreproc(line);
+ tline = tokenize(line);
+ nasm_free(line);
+ break;
+ }
+ /*
+ * The current file has ended; work down the istk
+ */
+ {
+ Include *i = istk;
+ fclose(i->fp);
+ if (i->conds) {
+ /* nasm_error can't be conditionally suppressed */
+ nasm_fatal(0,
+ "expected `%%endif' before end of file");
+ }
+ /* only set line and file name if there's a next node */
+ if (i->next)
+ src_set(i->lineno, i->fname);
+ istk = i->next;
+ lfmt->downlevel(LIST_INCLUDE);
+ nasm_free(i);
+ if (!istk) {
+ line = NULL;
+ goto done;
+ }
+ if (istk->expansion && istk->expansion->finishes)
+ break;
+ }
+ }
+
+ /*
+ * We must expand MMacro parameters and MMacro-local labels
+ * _before_ we plunge into directive processing, to cope
+ * with things like `%define something %1' such as STRUC
+ * uses. Unless we're _defining_ a MMacro, in which case
+ * those tokens should be left alone to go into the
+ * definition; and unless we're in a non-emitting
+ * condition, in which case we don't want to meddle with
+ * anything.
+ */
+ if (!defining && !(istk->conds && !emitting(istk->conds->state))
+ && !(istk->mstk && !istk->mstk->in_progress)) {
+ tline = expand_mmac_params(tline);
+ }
+
+ /*
+ * Check the line to see if it's a preprocessor directive.
+ */
+ if (do_directive(tline) == DIRECTIVE_FOUND) {
+ continue;
+ } else if (defining) {
+ /*
+ * We're defining a multi-line macro. We emit nothing
+ * at all, and just
+ * shove the tokenized line on to the macro definition.
+ */
+ Line *l = nasm_malloc(sizeof(Line));
+ l->next = defining->expansion;
+ l->first = tline;
+ l->finishes = NULL;
+ defining->expansion = l;
+ continue;
+ } else if (istk->conds && !emitting(istk->conds->state)) {
+ /*
+ * We're in a non-emitting branch of a condition block.
+ * Emit nothing at all, not even a blank line: when we
+ * emerge from the condition we'll give a line-number
+ * directive so we keep our place correctly.
+ */
+ free_tlist(tline);
+ continue;
+ } else if (istk->mstk && !istk->mstk->in_progress) {
+ /*
+ * We're in a %rep block which has been terminated, so
+ * we're walking through to the %endrep without
+ * emitting anything. Emit nothing at all, not even a
+ * blank line: when we emerge from the %rep block we'll
+ * give a line-number directive so we keep our place
+ * correctly.
+ */
+ free_tlist(tline);
+ continue;
+ } else {
+ tline = expand_smacro(tline);
+ if (!expand_mmacro(tline)) {
+ /*
+ * De-tokenize the line again, and emit it.
+ */
+ line = detoken(tline, true);
+ free_tlist(tline);
+ break;
+ } else {
+ continue; /* expand_mmacro calls free_tlist */
+ }
+ }
+ }
+
+done:
+ nasm_set_verror(real_verror);
+ return line;
+}
+
+static void pp_cleanup(int pass)
+{
+ real_verror = nasm_set_verror(pp_verror);
+
+ if (defining) {
+ if (defining->name) {
+ nasm_error(ERR_NONFATAL,
+ "end of file while still defining macro `%s'",
+ defining->name);
+ } else {
+ nasm_error(ERR_NONFATAL, "end of file while still in %%rep");
+ }
+
+ free_mmacro(defining);
+ defining = NULL;
+ }
+
+ nasm_set_verror(real_verror);
+
+ while (cstk)
+ ctx_pop();
+ free_macros();
+ while (istk) {
+ Include *i = istk;
+ istk = istk->next;
+ fclose(i->fp);
+ nasm_free(i);
+ }
+ while (cstk)
+ ctx_pop();
+ src_set_fname(NULL);
+ if (pass == 0) {
+ IncPath *i;
+ free_llist(predef);
+ predef = NULL;
+ delete_Blocks();
+ freeTokens = NULL;
+ while ((i = ipath)) {
+ ipath = i->next;
+ if (i->path)
+ nasm_free(i->path);
+ nasm_free(i);
+ }
+ }
+}
+
+static void pp_include_path(char *path)
+{
+ IncPath *i;
+
+ i = nasm_malloc(sizeof(IncPath));
+ i->path = path ? nasm_strdup(path) : NULL;
+ i->next = NULL;
+
+ if (ipath) {
+ IncPath *j = ipath;
+ while (j->next)
+ j = j->next;
+ j->next = i;
+ } else {
+ ipath = i;
+ }
+}
+
+static void pp_pre_include(char *fname)
+{
+ Token *inc, *space, *name;
+ Line *l;
+
+ name = new_Token(NULL, TOK_INTERNAL_STRING, fname, 0);
+ space = new_Token(name, TOK_WHITESPACE, NULL, 0);
+ inc = new_Token(space, TOK_PREPROC_ID, "%include", 0);
+
+ l = nasm_malloc(sizeof(Line));
+ l->next = predef;
+ l->first = inc;
+ l->finishes = NULL;
+ predef = l;
+}
+
+static void pp_pre_define(char *definition)
+{
+ Token *def, *space;
+ Line *l;
+ char *equals;
+
+ real_verror = nasm_set_verror(pp_verror);
+
+ equals = strchr(definition, '=');
+ space = new_Token(NULL, TOK_WHITESPACE, NULL, 0);
+ def = new_Token(space, TOK_PREPROC_ID, "%define", 0);
+ if (equals)
+ *equals = ' ';
+ space->next = tokenize(definition);
+ if (equals)
+ *equals = '=';
+
+ if (space->next->type != TOK_PREPROC_ID &&
+ space->next->type != TOK_ID)
+ nasm_error(ERR_WARNING, "pre-defining non ID `%s\'\n", definition);
+
+ l = nasm_malloc(sizeof(Line));
+ l->next = predef;
+ l->first = def;
+ l->finishes = NULL;
+ predef = l;
+
+ nasm_set_verror(real_verror);
+}
+
+static void pp_pre_undefine(char *definition)
+{
+ Token *def, *space;
+ Line *l;
+
+ space = new_Token(NULL, TOK_WHITESPACE, NULL, 0);
+ def = new_Token(space, TOK_PREPROC_ID, "%undef", 0);
+ space->next = tokenize(definition);
+
+ l = nasm_malloc(sizeof(Line));
+ l->next = predef;
+ l->first = def;
+ l->finishes = NULL;
+ predef = l;
+}
+
+static void pp_extra_stdmac(macros_t *macros)
+{
+ extrastdmac = macros;
+}
+
+static void make_tok_num(Token * tok, int64_t val)
+{
+ char numbuf[32];
+ snprintf(numbuf, sizeof(numbuf), "%"PRId64"", val);
+ tok->text = nasm_strdup(numbuf);
+ tok->type = TOK_NUMBER;
+}
+
+static void pp_list_one_macro(MMacro *m, int severity)
+{
+ if (!m)
+ return;
+
+ /* We need to print the next_active list in reverse order */
+ pp_list_one_macro(m->next_active, severity);
+
+ if (m->name && !m->nolist) {
+ src_set(m->xline + m->lineno, m->fname);
+ nasm_error(severity, "... from macro `%s' defined here", m->name);
+ }
+}
+
+static void pp_error_list_macros(int severity)
+{
+ int32_t saved_line;
+ const char *saved_fname = NULL;
+
+ severity |= ERR_PP_LISTMACRO | ERR_NO_SEVERITY;
+ src_get(&saved_line, &saved_fname);
+
+ if (istk)
+ pp_list_one_macro(istk->mstk, severity);
+
+ src_set(saved_line, saved_fname);
+}
+
+const struct preproc_ops nasmpp = {
+ pp_reset,
+ pp_getline,
+ pp_cleanup,
+ pp_extra_stdmac,
+ pp_pre_define,
+ pp_pre_undefine,
+ pp_pre_include,
+ pp_include_path,
+ pp_error_list_macros,
+};
diff --git a/asm/preproc.h b/asm/preproc.h
new file mode 100644
index 00000000..fcf8695b
--- /dev/null
+++ b/asm/preproc.h
@@ -0,0 +1,55 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * preproc.h header file for preproc.c
+ */
+
+#ifndef NASM_PREPROC_H
+#define NASM_PREPROC_H
+
+#include "nasmlib.h"
+#include "pptok.h"
+
+extern const char * const pp_directives[];
+extern const uint8_t pp_directives_len[];
+
+/* Pointer to a macro chain */
+typedef const unsigned char macros_t;
+
+enum preproc_token pp_token_hash(const char *token);
+
+/* Opens an include file or input file. This uses the include path. */
+FILE *pp_input_fopen(const char *filename, enum file_flags mode);
+
+#endif
diff --git a/asm/quote.c b/asm/quote.c
new file mode 100644
index 00000000..75a93726
--- /dev/null
+++ b/asm/quote.c
@@ -0,0 +1,479 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * quote.c
+ */
+
+#include "compiler.h"
+
+#include <stdlib.h>
+
+#include "nasmlib.h"
+#include "quote.h"
+
+char *nasm_quote(const char *str, size_t len)
+{
+ const char *p, *ep;
+ char c, c1, *q, *nstr;
+ unsigned char uc;
+ bool sq_ok, dq_ok;
+ size_t qlen;
+
+ sq_ok = dq_ok = true;
+ ep = str+len;
+ qlen = 0; /* Length if we need `...` quotes */
+ for (p = str; p < ep; p++) {
+ c = *p;
+ switch (c) {
+ case '\'':
+ sq_ok = false;
+ qlen++;
+ break;
+ case '\"':
+ dq_ok = false;
+ qlen++;
+ break;
+ case '`':
+ case '\\':
+ qlen += 2;
+ break;
+ default:
+ if (c < ' ' || c > '~') {
+ sq_ok = dq_ok = false;
+ switch (c) {
+ case '\a':
+ case '\b':
+ case '\t':
+ case '\n':
+ case '\v':
+ case '\f':
+ case '\r':
+ case 27:
+ qlen += 2;
+ break;
+ default:
+ c1 = (p+1 < ep) ? p[1] : 0;
+ if (c1 >= '0' && c1 <= '7')
+ uc = 0377; /* Must use the full form */
+ else
+ uc = c;
+ if (uc > 077)
+ qlen++;
+ if (uc > 07)
+ qlen++;
+ qlen += 2;
+ break;
+ }
+ } else {
+ qlen++;
+ }
+ break;
+ }
+ }
+
+ if (sq_ok || dq_ok) {
+ /* Use '...' or "..." */
+ nstr = nasm_malloc(len+3);
+ nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
+ nstr[len+2] = '\0';
+ if (len > 0)
+ memcpy(nstr+1, str, len);
+ } else {
+ /* Need to use `...` quoted syntax */
+ nstr = nasm_malloc(qlen+3);
+ q = nstr;
+ *q++ = '`';
+ for (p = str; p < ep; p++) {
+ c = *p;
+ switch (c) {
+ case '`':
+ case '\\':
+ *q++ = '\\';
+ *q++ = c;
+ break;
+ case 7:
+ *q++ = '\\';
+ *q++ = 'a';
+ break;
+ case 8:
+ *q++ = '\\';
+ *q++ = 'b';
+ break;
+ case 9:
+ *q++ = '\\';
+ *q++ = 't';
+ break;
+ case 10:
+ *q++ = '\\';
+ *q++ = 'n';
+ break;
+ case 11:
+ *q++ = '\\';
+ *q++ = 'v';
+ break;
+ case 12:
+ *q++ = '\\';
+ *q++ = 'f';
+ break;
+ case 13:
+ *q++ = '\\';
+ *q++ = 'r';
+ break;
+ case 27:
+ *q++ = '\\';
+ *q++ = 'e';
+ break;
+ default:
+ if (c < ' ' || c > '~') {
+ c1 = (p+1 < ep) ? p[1] : 0;
+ if (c1 >= '0' && c1 <= '7')
+ uc = 0377; /* Must use the full form */
+ else
+ uc = c;
+ *q++ = '\\';
+ if (uc > 077)
+ *q++ = ((unsigned char)c >> 6) + '0';
+ if (uc > 07)
+ *q++ = (((unsigned char)c >> 3) & 7) + '0';
+ *q++ = ((unsigned char)c & 7) + '0';
+ break;
+ } else {
+ *q++ = c;
+ }
+ break;
+ }
+ }
+ *q++ = '`';
+ *q++ = '\0';
+ nasm_assert((size_t)(q-nstr) == qlen+3);
+ }
+ return nstr;
+}
+
+static char *emit_utf8(char *q, int32_t v)
+{
+ if (v < 0) {
+ /* Impossible - do nothing */
+ } else if (v <= 0x7f) {
+ *q++ = v;
+ } else if (v <= 0x000007ff) {
+ *q++ = 0xc0 | (v >> 6);
+ *q++ = 0x80 | (v & 63);
+ } else if (v <= 0x0000ffff) {
+ *q++ = 0xe0 | (v >> 12);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ } else if (v <= 0x001fffff) {
+ *q++ = 0xf0 | (v >> 18);
+ *q++ = 0x80 | ((v >> 12) & 63);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ } else if (v <= 0x03ffffff) {
+ *q++ = 0xf8 | (v >> 24);
+ *q++ = 0x80 | ((v >> 18) & 63);
+ *q++ = 0x80 | ((v >> 12) & 63);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ } else {
+ *q++ = 0xfc | (v >> 30);
+ *q++ = 0x80 | ((v >> 24) & 63);
+ *q++ = 0x80 | ((v >> 18) & 63);
+ *q++ = 0x80 | ((v >> 12) & 63);
+ *q++ = 0x80 | ((v >> 6) & 63);
+ *q++ = 0x80 | (v & 63);
+ }
+ return q;
+}
+
+/*
+ * Do an *in-place* dequoting of the specified string, returning the
+ * resulting length (which may be containing embedded nulls.)
+ *
+ * In-place replacement is possible since the unquoted length is always
+ * shorter than or equal to the quoted length.
+ *
+ * *ep points to the final quote, or to the null if improperly quoted.
+ */
+size_t nasm_unquote(char *str, char **ep)
+{
+ char bq;
+ char *p, *q;
+ char *escp = NULL;
+ char c;
+ enum unq_state {
+ st_start,
+ st_backslash,
+ st_hex,
+ st_oct,
+ st_ucs
+ } state;
+ int ndig = 0;
+ int32_t nval = 0;
+
+ p = q = str;
+
+ bq = *p++;
+ if (!bq)
+ return 0;
+
+ switch (bq) {
+ case '\'':
+ case '\"':
+ /* '...' or "..." string */
+ while ((c = *p) && c != bq) {
+ p++;
+ *q++ = c;
+ }
+ *q = '\0';
+ break;
+
+ case '`':
+ /* `...` string */
+ state = st_start;
+
+ while ((c = *p)) {
+ p++;
+ switch (state) {
+ case st_start:
+ switch (c) {
+ case '\\':
+ state = st_backslash;
+ break;
+ case '`':
+ p--;
+ goto out;
+ default:
+ *q++ = c;
+ break;
+ }
+ break;
+
+ case st_backslash:
+ state = st_start;
+ escp = p; /* Beginning of argument sequence */
+ nval = 0;
+ switch (c) {
+ case 'a':
+ *q++ = 7;
+ break;
+ case 'b':
+ *q++ = 8;
+ break;
+ case 'e':
+ *q++ = 27;
+ break;
+ case 'f':
+ *q++ = 12;
+ break;
+ case 'n':
+ *q++ = 10;
+ break;
+ case 'r':
+ *q++ = 13;
+ break;
+ case 't':
+ *q++ = 9;
+ break;
+ case 'u':
+ state = st_ucs;
+ ndig = 4;
+ break;
+ case 'U':
+ state = st_ucs;
+ ndig = 8;
+ break;
+ case 'v':
+ *q++ = 11;
+ break;
+ case 'x':
+ case 'X':
+ state = st_hex;
+ ndig = 2;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ state = st_oct;
+ ndig = 2; /* Up to two more digits */
+ nval = c - '0';
+ break;
+ default:
+ *q++ = c;
+ break;
+ }
+ break;
+
+ case st_oct:
+ if (c >= '0' && c <= '7') {
+ nval = (nval << 3) + (c - '0');
+ if (!--ndig) {
+ *q++ = nval;
+ state = st_start;
+ }
+ } else {
+ p--; /* Process this character again */
+ *q++ = nval;
+ state = st_start;
+ }
+ break;
+
+ case st_hex:
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= 'a' && c <= 'f')) {
+ nval = (nval << 4) + numvalue(c);
+ if (!--ndig) {
+ *q++ = nval;
+ state = st_start;
+ }
+ } else {
+ p--; /* Process this character again */
+ *q++ = (p > escp) ? nval : escp[-1];
+ state = st_start;
+ }
+ break;
+
+ case st_ucs:
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= 'a' && c <= 'f')) {
+ nval = (nval << 4) + numvalue(c);
+ if (!--ndig) {
+ q = emit_utf8(q, nval);
+ state = st_start;
+ }
+ } else {
+ p--; /* Process this character again */
+ if (p > escp)
+ q = emit_utf8(q, nval);
+ else
+ *q++ = escp[-1];
+ state = st_start;
+ }
+ break;
+ }
+ }
+ switch (state) {
+ case st_start:
+ case st_backslash:
+ break;
+ case st_oct:
+ *q++ = nval;
+ break;
+ case st_hex:
+ *q++ = (p > escp) ? nval : escp[-1];
+ break;
+ case st_ucs:
+ if (p > escp)
+ q = emit_utf8(q, nval);
+ else
+ *q++ = escp[-1];
+ break;
+ }
+ out:
+ break;
+
+ default:
+ /* Not a quoted string, just return the input... */
+ p = q = strchr(str, '\0');
+ break;
+ }
+
+ if (ep)
+ *ep = p;
+ return q-str;
+}
+
+/*
+ * Find the end of a quoted string; returns the pointer to the terminating
+ * character (either the ending quote or the null character, if unterminated.)
+ */
+char *nasm_skip_string(char *str)
+{
+ char bq;
+ char *p;
+ char c;
+ enum unq_state {
+ st_start,
+ st_backslash
+ } state;
+
+ bq = str[0];
+ if (bq == '\'' || bq == '\"') {
+ /* '...' or "..." string */
+ for (p = str+1; *p && *p != bq; p++)
+ ;
+ return p;
+ } else if (bq == '`') {
+ /* `...` string */
+ state = st_start;
+ p = str+1;
+ if (!*p)
+ return p;
+
+ while ((c = *p++)) {
+ switch (state) {
+ case st_start:
+ switch (c) {
+ case '\\':
+ state = st_backslash;
+ break;
+ case '`':
+ return p-1; /* Found the end */
+ default:
+ break;
+ }
+ break;
+
+ case st_backslash:
+ /*
+ * Note: for the purpose of finding the end of the string,
+ * all successor states to st_backslash are functionally
+ * equivalent to st_start, since either a backslash or
+ * a backquote will force a return to the st_start state.
+ */
+ state = st_start;
+ break;
+ }
+ }
+ return p-1; /* Unterminated string... */
+ } else {
+ return str; /* Not a string... */
+ }
+}
diff --git a/asm/quote.h b/asm/quote.h
new file mode 100644
index 00000000..2d8ce87b
--- /dev/null
+++ b/asm/quote.h
@@ -0,0 +1,44 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+#ifndef NASM_QUOTE_H
+#define NASM_QUOTE_H
+
+#include "compiler.h"
+
+char *nasm_quote(const char *str, size_t len);
+size_t nasm_unquote(char *str, char **endptr);
+char *nasm_skip_string(char *str);
+
+#endif /* NASM_QUOTE_H */
+
diff --git a/asm/rdstrnum.c b/asm/rdstrnum.c
new file mode 100644
index 00000000..d9d2a69f
--- /dev/null
+++ b/asm/rdstrnum.c
@@ -0,0 +1,68 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * rdstrnum.c
+ *
+ * This converts a NASM string to an integer, used when a string
+ * is used in an integer constant context. This is a binary conversion,
+ * not a conversion from a numeric constant in text form.
+ */
+
+#include "compiler.h"
+#include "nasmlib.h"
+#include "nasm.h"
+
+int64_t readstrnum(char *str, int length, bool *warn)
+{
+ int64_t charconst = 0;
+ int i;
+
+ *warn = false;
+
+ str += length;
+ if (globalbits == 64) {
+ for (i = 0; i < length; i++) {
+ if (charconst & UINT64_C(0xFF00000000000000))
+ *warn = true;
+ charconst = (charconst << 8) + (uint8_t)*--str;
+ }
+ } else {
+ for (i = 0; i < length; i++) {
+ if (charconst & 0xFF000000UL)
+ *warn = true;
+ charconst = (charconst << 8) + (uint8_t)*--str;
+ }
+ }
+ return charconst;
+}
diff --git a/asm/segalloc.c b/asm/segalloc.c
new file mode 100644
index 00000000..60596221
--- /dev/null
+++ b/asm/segalloc.c
@@ -0,0 +1,51 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * nasmlib.c library routines for the Netwide Assembler
+ */
+
+#include "compiler.h"
+#include "nasm.h"
+#include "nasmlib.h"
+#include "insns.h"
+
+int32_t seg_alloc(void)
+{
+ static int32_t next_seg = 0;
+ int32_t this_seg = next_seg;
+
+ next_seg += 2;
+
+ return this_seg;
+}
diff --git a/asm/stdscan.c b/asm/stdscan.c
new file mode 100644
index 00000000..1287db26
--- /dev/null
+++ b/asm/stdscan.c
@@ -0,0 +1,344 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+#include "compiler.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "nasm.h"
+#include "nasmlib.h"
+#include "quote.h"
+#include "stdscan.h"
+#include "insns.h"
+
+/*
+ * Standard scanner routine used by parser.c and some output
+ * formats. It keeps a succession of temporary-storage strings in
+ * stdscan_tempstorage, which can be cleared using stdscan_reset.
+ */
+static char *stdscan_bufptr = NULL;
+static char **stdscan_tempstorage = NULL;
+static int stdscan_tempsize = 0, stdscan_templen = 0;
+#define STDSCAN_TEMP_DELTA 256
+
+void stdscan_set(char *str)
+{
+ stdscan_bufptr = str;
+}
+
+char *stdscan_get(void)
+{
+ return stdscan_bufptr;
+}
+
+static void stdscan_pop(void)
+{
+ nasm_free(stdscan_tempstorage[--stdscan_templen]);
+}
+
+void stdscan_reset(void)
+{
+ while (stdscan_templen > 0)
+ stdscan_pop();
+}
+
+/*
+ * Unimportant cleanup is done to avoid confusing people who are trying
+ * to debug real memory leaks
+ */
+void stdscan_cleanup(void)
+{
+ stdscan_reset();
+ nasm_free(stdscan_tempstorage);
+}
+
+static char *stdscan_copy(char *p, int len)
+{
+ char *text;
+
+ text = nasm_malloc(len + 1);
+ memcpy(text, p, len);
+ text[len] = '\0';
+
+ if (stdscan_templen >= stdscan_tempsize) {
+ stdscan_tempsize += STDSCAN_TEMP_DELTA;
+ stdscan_tempstorage = nasm_realloc(stdscan_tempstorage,
+ stdscan_tempsize *
+ sizeof(char *));
+ }
+ stdscan_tempstorage[stdscan_templen++] = text;
+
+ return text;
+}
+
+/*
+ * a token is enclosed with braces. proper token type will be assigned
+ * accordingly with the token flag.
+ */
+static int stdscan_handle_brace(struct tokenval *tv)
+{
+ if (!(tv->t_flag & TFLAG_BRC_ANY)) {
+ /* invalid token is put inside braces */
+ nasm_error(ERR_NONFATAL,
+ "`%s' is not a valid decorator with braces", tv->t_charptr);
+ tv->t_type = TOKEN_INVALID;
+ } else if (tv->t_flag & TFLAG_BRC_OPT) {
+ if (is_reg_class(OPMASKREG, tv->t_integer)) {
+ /* within braces, opmask register is now used as a mask */
+ tv->t_type = TOKEN_OPMASK;
+ }
+ }
+
+ return tv->t_type;
+}
+
+int stdscan(void *private_data, struct tokenval *tv)
+{
+ char ourcopy[MAX_KEYWORD + 1], *r, *s;
+
+ (void)private_data; /* Don't warn that this parameter is unused */
+
+ stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
+ if (!*stdscan_bufptr)
+ return tv->t_type = TOKEN_EOS;
+
+ /* we have a token; either an id, a number or a char */
+ if (isidstart(*stdscan_bufptr) ||
+ (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) {
+ /* now we've got an identifier */
+ bool is_sym = false;
+ int token_type;
+
+ if (*stdscan_bufptr == '$') {
+ is_sym = true;
+ stdscan_bufptr++;
+ }
+
+ r = stdscan_bufptr++;
+ /* read the entire buffer to advance the buffer pointer but... */
+ while (isidchar(*stdscan_bufptr))
+ stdscan_bufptr++;
+
+ /* ... copy only up to IDLEN_MAX-1 characters */
+ tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r < IDLEN_MAX ?
+ stdscan_bufptr - r : IDLEN_MAX - 1);
+
+ if (is_sym || stdscan_bufptr - r > MAX_KEYWORD)
+ return tv->t_type = TOKEN_ID; /* bypass all other checks */
+
+ for (s = tv->t_charptr, r = ourcopy; *s; s++)
+ *r++ = nasm_tolower(*s);
+ *r = '\0';
+ /* right, so we have an identifier sitting in temp storage. now,
+ * is it actually a register or instruction name, or what? */
+ token_type = nasm_token_hash(ourcopy, tv);
+
+ if (unlikely(tv->t_flag & TFLAG_WARN)) {
+ nasm_error(ERR_WARNING|ERR_PASS1|ERR_WARN_PTR,
+ "`%s' is not a NASM keyword", tv->t_charptr);
+ }
+
+ if (likely(!(tv->t_flag & TFLAG_BRC))) {
+ /* most of the tokens fall into this case */
+ return token_type;
+ } else {
+ return tv->t_type = TOKEN_ID;
+ }
+ } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) {
+ /*
+ * It's a $ sign with no following hex number; this must
+ * mean it's a Here token ($), evaluating to the current
+ * assembly location, or a Base token ($$), evaluating to
+ * the base of the current segment.
+ */
+ stdscan_bufptr++;
+ if (*stdscan_bufptr == '$') {
+ stdscan_bufptr++;
+ return tv->t_type = TOKEN_BASE;
+ }
+ return tv->t_type = TOKEN_HERE;
+ } else if (isnumstart(*stdscan_bufptr)) { /* now we've got a number */
+ bool rn_error;
+ bool is_hex = false;
+ bool is_float = false;
+ bool has_e = false;
+ char c;
+
+ r = stdscan_bufptr;
+
+ if (*stdscan_bufptr == '$') {
+ stdscan_bufptr++;
+ is_hex = true;
+ }
+
+ for (;;) {
+ c = *stdscan_bufptr++;
+
+ if (!is_hex && (c == 'e' || c == 'E')) {
+ has_e = true;
+ if (*stdscan_bufptr == '+' || *stdscan_bufptr == '-') {
+ /*
+ * e can only be followed by +/- if it is either a
+ * prefixed hex number or a floating-point number
+ */
+ is_float = true;
+ stdscan_bufptr++;
+ }
+ } else if (c == 'H' || c == 'h' || c == 'X' || c == 'x') {
+ is_hex = true;
+ } else if (c == 'P' || c == 'p') {
+ is_float = true;
+ if (*stdscan_bufptr == '+' || *stdscan_bufptr == '-')
+ stdscan_bufptr++;
+ } else if (isnumchar(c) || c == '_')
+ ; /* just advance */
+ else if (c == '.')
+ is_float = true;
+ else
+ break;
+ }
+ stdscan_bufptr--; /* Point to first character beyond number */
+
+ if (has_e && !is_hex) {
+ /* 1e13 is floating-point, but 1e13h is not */
+ is_float = true;
+ }
+
+ if (is_float) {
+ tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r);
+ return tv->t_type = TOKEN_FLOAT;
+ } else {
+ r = stdscan_copy(r, stdscan_bufptr - r);
+ tv->t_integer = readnum(r, &rn_error);
+ stdscan_pop();
+ if (rn_error) {
+ /* some malformation occurred */
+ return tv->t_type = TOKEN_ERRNUM;
+ }
+ tv->t_charptr = NULL;
+ return tv->t_type = TOKEN_NUM;
+ }
+ } else if (*stdscan_bufptr == '\'' || *stdscan_bufptr == '"' ||
+ *stdscan_bufptr == '`') {
+ /* a quoted string */
+ char start_quote = *stdscan_bufptr;
+ tv->t_charptr = stdscan_bufptr;
+ tv->t_inttwo = nasm_unquote(tv->t_charptr, &stdscan_bufptr);
+ if (*stdscan_bufptr != start_quote)
+ return tv->t_type = TOKEN_ERRSTR;
+ stdscan_bufptr++; /* Skip final quote */
+ return tv->t_type = TOKEN_STR;
+ } else if (*stdscan_bufptr == '{') {
+ /* now we've got a decorator */
+ int token_len;
+
+ stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
+
+ r = ++stdscan_bufptr;
+ /*
+ * read the entire buffer to advance the buffer pointer
+ * {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens.
+ */
+ while (isbrcchar(*stdscan_bufptr))
+ stdscan_bufptr++;
+
+ token_len = stdscan_bufptr - r;
+
+ /* ... copy only up to DECOLEN_MAX-1 characters */
+ tv->t_charptr = stdscan_copy(r, token_len < DECOLEN_MAX ?
+ token_len : DECOLEN_MAX - 1);
+
+ stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
+ /* if brace is not closed properly or token is too long */
+ if ((*stdscan_bufptr != '}') || (token_len > MAX_KEYWORD)) {
+ nasm_error(ERR_NONFATAL,
+ "invalid decorator token inside braces");
+ return tv->t_type = TOKEN_INVALID;
+ }
+
+ stdscan_bufptr++; /* skip closing brace */
+
+ for (s = tv->t_charptr, r = ourcopy; *s; s++)
+ *r++ = nasm_tolower(*s);
+ *r = '\0';
+
+ /* right, so we have a decorator sitting in temp storage. */
+ nasm_token_hash(ourcopy, tv);
+
+ /* handle tokens inside braces */
+ return stdscan_handle_brace(tv);
+ } else if (*stdscan_bufptr == ';') {
+ /* a comment has happened - stay */
+ return tv->t_type = TOKEN_EOS;
+ } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '>') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_SHR;
+ } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '<') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_SHL;
+ } else if (stdscan_bufptr[0] == '/' && stdscan_bufptr[1] == '/') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_SDIV;
+ } else if (stdscan_bufptr[0] == '%' && stdscan_bufptr[1] == '%') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_SMOD;
+ } else if (stdscan_bufptr[0] == '=' && stdscan_bufptr[1] == '=') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_EQ;
+ } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '>') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_NE;
+ } else if (stdscan_bufptr[0] == '!' && stdscan_bufptr[1] == '=') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_NE;
+ } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '=') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_LE;
+ } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '=') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_GE;
+ } else if (stdscan_bufptr[0] == '&' && stdscan_bufptr[1] == '&') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_DBL_AND;
+ } else if (stdscan_bufptr[0] == '^' && stdscan_bufptr[1] == '^') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_DBL_XOR;
+ } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
+ stdscan_bufptr += 2;
+ return tv->t_type = TOKEN_DBL_OR;
+ } else /* just an ordinary char */
+ return tv->t_type = (uint8_t)(*stdscan_bufptr++);
+}
diff --git a/asm/stdscan.h b/asm/stdscan.h
new file mode 100644
index 00000000..8dbc2d03
--- /dev/null
+++ b/asm/stdscan.h
@@ -0,0 +1,49 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * stdscan.h header file for stdscan.c
+ */
+
+#ifndef NASM_STDSCAN_H
+#define NASM_STDSCAN_H
+
+/* Standard scanner */
+void stdscan_set(char *str);
+char *stdscan_get(void);
+void stdscan_reset(void);
+int stdscan(void *private_data, struct tokenval *tv);
+int nasm_token_hash(const char *token, struct tokenval *tv);
+void stdscan_cleanup(void);
+
+#endif
diff --git a/asm/strfunc.c b/asm/strfunc.c
new file mode 100644
index 00000000..236b9d2f
--- /dev/null
+++ b/asm/strfunc.c
@@ -0,0 +1,359 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ * See the file AUTHORS included with the NASM distribution for
+ * the specific copyright holders.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * strfunc.c
+ *
+ * String transformation functions
+ */
+
+#include "nasmlib.h"
+#include "nasm.h"
+
+/*
+ * Convert a string in UTF-8 format to UTF-16LE
+ */
+static size_t utf8_to_16le(uint8_t *str, size_t len, char *op)
+{
+#define EMIT(x) do { if (op) { WRITESHORT(op,x); } outlen++; } while(0)
+
+ size_t outlen = 0;
+ int expect = 0;
+ uint8_t c;
+ uint32_t v = 0, vmin = 0;
+
+ while (len--) {
+ c = *str++;
+
+ if (expect) {
+ if ((c & 0xc0) != 0x80) {
+ expect = 0;
+ return -1;
+ } else {
+ v = (v << 6) | (c & 0x3f);
+ if (!--expect) {
+ if (v < vmin || v > 0x10ffff ||
+ (v >= 0xd800 && v <= 0xdfff)) {
+ return -1;
+ } else if (v > 0xffff) {
+ v -= 0x10000;
+ EMIT(0xd800 | (v >> 10));
+ EMIT(0xdc00 | (v & 0x3ff));
+ } else {
+ EMIT(v);
+ }
+ }
+ continue;
+ }
+ }
+
+ if (c < 0x80) {
+ EMIT(c);
+ } else if (c < 0xc0 || c >= 0xfe) {
+ /* Invalid UTF-8 */
+ return -1;
+ } else if (c < 0xe0) {
+ v = c & 0x1f;
+ expect = 1;
+ vmin = 0x80;
+ } else if (c < 0xf0) {
+ v = c & 0x0f;
+ expect = 2;
+ vmin = 0x800;
+ } else if (c < 0xf8) {
+ v = c & 0x07;
+ expect = 3;
+ vmin = 0x10000;
+ } else if (c < 0xfc) {
+ v = c & 0x03;
+ expect = 4;
+ vmin = 0x200000;
+ } else {
+ v = c & 0x01;
+ expect = 5;
+ vmin = 0x4000000;
+ }
+ }
+
+ return expect ? (size_t)-1 : outlen << 1;
+
+#undef EMIT
+}
+
+/*
+ * Convert a string in UTF-8 format to UTF-16BE
+ */
+static size_t utf8_to_16be(uint8_t *str, size_t len, char *op)
+{
+#define EMIT(x) \
+ do { \
+ uint16_t _y = (x); \
+ if (op) { \
+ WRITECHAR(op, _y >> 8); \
+ WRITECHAR(op, _y); \
+ } \
+ outlen++; \
+ } while (0) \
+
+ size_t outlen = 0;
+ int expect = 0;
+ uint8_t c;
+ uint32_t v = 0, vmin = 0;
+
+ while (len--) {
+ c = *str++;
+
+ if (expect) {
+ if ((c & 0xc0) != 0x80) {
+ expect = 0;
+ return -1;
+ } else {
+ v = (v << 6) | (c & 0x3f);
+ if (!--expect) {
+ if (v < vmin || v > 0x10ffff ||
+ (v >= 0xd800 && v <= 0xdfff)) {
+ return -1;
+ } else if (v > 0xffff) {
+ v -= 0x10000;
+ EMIT(0xdc00 | (v & 0x3ff));
+ EMIT(0xd800 | (v >> 10));
+ } else {
+ EMIT(v);
+ }
+ }
+ continue;
+ }
+ }
+
+ if (c < 0x80) {
+ EMIT(c);
+ } else if (c < 0xc0 || c >= 0xfe) {
+ /* Invalid UTF-8 */
+ return -1;
+ } else if (c < 0xe0) {
+ v = c & 0x1f;
+ expect = 1;
+ vmin = 0x80;
+ } else if (c < 0xf0) {
+ v = c & 0x0f;
+ expect = 2;
+ vmin = 0x800;
+ } else if (c < 0xf8) {
+ v = c & 0x07;
+ expect = 3;
+ vmin = 0x10000;
+ } else if (c < 0xfc) {
+ v = c & 0x03;
+ expect = 4;
+ vmin = 0x200000;
+ } else {
+ v = c & 0x01;
+ expect = 5;
+ vmin = 0x4000000;
+ }
+ }
+
+ return expect ? (size_t)-1 : outlen << 1;
+
+#undef EMIT
+}
+
+/*
+ * Convert a string in UTF-8 format to UTF-32LE
+ */
+static size_t utf8_to_32le(uint8_t *str, size_t len, char *op)
+{
+#define EMIT(x) do { if (op) { WRITELONG(op,x); } outlen++; } while(0)
+
+ size_t outlen = 0;
+ int expect = 0;
+ uint8_t c;
+ uint32_t v = 0, vmin = 0;
+
+ while (len--) {
+ c = *str++;
+
+ if (expect) {
+ if ((c & 0xc0) != 0x80) {
+ return -1;
+ } else {
+ v = (v << 6) | (c & 0x3f);
+ if (!--expect) {
+ if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
+ return -1;
+ } else {
+ EMIT(v);
+ }
+ }
+ continue;
+ }
+ }
+
+ if (c < 0x80) {
+ EMIT(c);
+ } else if (c < 0xc0 || c >= 0xfe) {
+ /* Invalid UTF-8 */
+ return -1;
+ } else if (c < 0xe0) {
+ v = c & 0x1f;
+ expect = 1;
+ vmin = 0x80;
+ } else if (c < 0xf0) {
+ v = c & 0x0f;
+ expect = 2;
+ vmin = 0x800;
+ } else if (c < 0xf8) {
+ v = c & 0x07;
+ expect = 3;
+ vmin = 0x10000;
+ } else if (c < 0xfc) {
+ v = c & 0x03;
+ expect = 4;
+ vmin = 0x200000;
+ } else {
+ v = c & 0x01;
+ expect = 5;
+ vmin = 0x4000000;
+ }
+ }
+
+ return expect ? (size_t)-1 : outlen << 2;
+
+#undef EMIT
+}
+
+/*
+ * Convert a string in UTF-8 format to UTF-32BE
+ */
+static size_t utf8_to_32be(uint8_t *str, size_t len, char *op)
+{
+#define EMIT(x) \
+ do { \
+ uint32_t _y = (x); \
+ if (op) { \
+ WRITECHAR(op,_y >> 24); \
+ WRITECHAR(op,_y >> 16); \
+ WRITECHAR(op,_y >> 8); \
+ WRITECHAR(op,_y); \
+ } \
+ outlen++; \
+ } while (0)
+
+ size_t outlen = 0;
+ int expect = 0;
+ uint8_t c;
+ uint32_t v = 0, vmin = 0;
+
+ while (len--) {
+ c = *str++;
+
+ if (expect) {
+ if ((c & 0xc0) != 0x80) {
+ return -1;
+ } else {
+ v = (v << 6) | (c & 0x3f);
+ if (!--expect) {
+ if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
+ return -1;
+ } else {
+ EMIT(v);
+ }
+ }
+ continue;
+ }
+ }
+
+ if (c < 0x80) {
+ EMIT(c);
+ } else if (c < 0xc0 || c >= 0xfe) {
+ /* Invalid UTF-8 */
+ return -1;
+ } else if (c < 0xe0) {
+ v = c & 0x1f;
+ expect = 1;
+ vmin = 0x80;
+ } else if (c < 0xf0) {
+ v = c & 0x0f;
+ expect = 2;
+ vmin = 0x800;
+ } else if (c < 0xf8) {
+ v = c & 0x07;
+ expect = 3;
+ vmin = 0x10000;
+ } else if (c < 0xfc) {
+ v = c & 0x03;
+ expect = 4;
+ vmin = 0x200000;
+ } else {
+ v = c & 0x01;
+ expect = 5;
+ vmin = 0x4000000;
+ }
+ }
+
+ return expect ? (size_t)-1 : outlen << 2;
+
+#undef EMIT
+}
+
+typedef size_t (*transform_func)(uint8_t *, size_t, char *);
+
+/*
+ * Apply a specific string transform and return it in a nasm_malloc'd
+ * buffer, returning the length. On error, returns (size_t)-1 and no
+ * buffer is allocated.
+ */
+size_t string_transform(char *str, size_t len, char **out, enum strfunc func)
+{
+ /* This should match enum strfunc in nasm.h */
+ static const transform_func str_transforms[] = {
+ utf8_to_16le,
+ utf8_to_16le,
+ utf8_to_16be,
+ utf8_to_32le,
+ utf8_to_32le,
+ utf8_to_32be,
+ };
+ transform_func transform = str_transforms[func];
+ size_t outlen;
+ uint8_t *s = (uint8_t *)str;
+ char *buf;
+
+ outlen = transform(s, len, NULL);
+ if (outlen == (size_t)-1)
+ return -1;
+
+ *out = buf = nasm_malloc(outlen+1);
+ buf[outlen] = '\0'; /* Forcibly null-terminate the buffer */
+ return transform(s, len, buf);
+}
diff --git a/asm/tokens.dat b/asm/tokens.dat
new file mode 100644
index 00000000..528f2431
--- /dev/null
+++ b/asm/tokens.dat
@@ -0,0 +1,135 @@
+## --------------------------------------------------------------------------
+##
+## Copyright 1996-2016 The NASM Authors - All Rights Reserved
+## See the file AUTHORS included with the NASM distribution for
+## the specific copyright holders.
+##
+## Redistribution and use in source and binary forms, with or without
+## modification, are permitted provided that the following
+## conditions are met:
+##
+## * Redistributions of source code must retain the above copyright
+## notice, this list of conditions and the following disclaimer.
+## * Redistributions in binary form must reproduce the above
+## copyright notice, this list of conditions and the following
+## disclaimer in the documentation and/or other materials provided
+## with the distribution.
+##
+## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+##
+## --------------------------------------------------------------------------
+
+#
+# Tokens other than instructions and registers
+#
+
+% TOKEN_PREFIX, 0, 0, P_*
+a16
+a32
+a64
+asp
+lock
+o16
+o32
+o64
+osp
+rep
+repe
+repne
+repnz
+repz
+times
+wait
+xacquire
+xrelease
+bnd
+nobnd
+
+% TOKEN_SPECIAL, 0, 0, S_*
+abs
+byte
+dword
+far
+long
+near
+nosplit
+oword
+qword
+rel
+short
+strict
+to
+tword
+word
+yword
+zword
+
+% TOKEN_ID, 0, TFLAG_WARN, 0
+ptr
+
+% TOKEN_FLOAT, 0, 0, 0
+__infinity__
+__nan__
+__qnan__
+__snan__
+
+% TOKEN_FLOATIZE, 0, 0, FLOAT_{__float*__}
+__float8__
+__float16__
+__float32__
+__float64__
+__float80m__
+__float80e__
+__float128l__
+__float128h__
+
+% TOKEN_STRFUNC, 0, 0, STRFUNC_{__*__}
+__utf16__
+__utf16le__
+__utf16be__
+__utf32__
+__utf32le__
+__utf32be__
+
+% TOKEN_IFUNC, 0, 0, IFUNC_{__*__}
+__ilog2e__
+__ilog2w__
+__ilog2f__
+__ilog2c__
+
+% TOKEN_*, 0, 0, 0
+seg
+wrt
+
+% TOKEN_DECORATOR, 0, TFLAG_BRC | TFLAG_BRDCAST , BRC_1TO{1to*}
+1to2
+1to4
+1to8
+1to16
+
+% TOKEN_DECORATOR, 0, TFLAG_BRC, BRC_{*-sae}
+rn-sae
+rd-sae
+ru-sae
+rz-sae
+
+% TOKEN_DECORATOR, 0, TFLAG_BRC, BRC_*
+sae
+z
+
+% TOKEN_PREFIX, 0, TFLAG_BRC, P_*
+evex
+vex3
+vex2
diff --git a/asm/tokhash.pl b/asm/tokhash.pl
new file mode 100755
index 00000000..07bc6abe
--- /dev/null
+++ b/asm/tokhash.pl
@@ -0,0 +1,284 @@
+#!/usr/bin/perl
+## --------------------------------------------------------------------------
+##
+## Copyright 1996-2014 The NASM Authors - All Rights Reserved
+## See the file AUTHORS included with the NASM distribution for
+## the specific copyright holders.
+##
+## Redistribution and use in source and binary forms, with or without
+## modification, are permitted provided that the following
+## conditions are met:
+##
+## * Redistributions of source code must retain the above copyright
+## notice, this list of conditions and the following disclaimer.
+## * Redistributions in binary form must reproduce the above
+## copyright notice, this list of conditions and the following
+## disclaimer in the documentation and/or other materials provided
+## with the distribution.
+##
+## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+##
+## --------------------------------------------------------------------------
+
+#
+# Generate a perfect hash for token parsing
+#
+# Usage: tokenhash.pl insns.dat regs.dat tokens.dat
+#
+
+require 'phash.ph';
+
+my($output, $insns_dat, $regs_dat, $tokens_dat) = @ARGV;
+
+%tokens = ();
+@tokendata = ();
+
+#
+# List of condition codes
+#
+@conditions = ('a', 'ae', 'b', 'be', 'c', 'e', 'g', 'ge', 'l', 'le',
+ 'na', 'nae', 'nb', 'nbe', 'nc', 'ne', 'ng', 'nge', 'nl',
+ 'nle', 'no', 'np', 'ns', 'nz', 'o', 'p', 'pe', 'po', 's', 'z');
+
+#
+# Read insns.dat
+#
+open(ID, "< ${insns_dat}") or die "$0: cannot open $insns_dat: $!\n";
+while (defined($line = <ID>)) {
+ if ($line =~ /^([A-Z0-9_]+)(|cc)\s/) {
+ $insn = $1.$2;
+ ($token = $1) =~ tr/A-Z/a-z/;
+
+ if ($2 eq '') {
+ # Single instruction token
+ if (!defined($tokens{$token})) {
+ $tokens{$token} = scalar @tokendata;
+ push(@tokendata, "\"${token}\", TOKEN_INSN, C_none, 0, I_${insn}");
+ }
+ } else {
+ # Conditional instruction
+ foreach $cc (@conditions) {
+ if (!defined($tokens{$token.$cc})) {
+ $tokens{$token.$cc} = scalar @tokendata;
+ push(@tokendata, "\"${token}${cc}\", TOKEN_INSN, C_\U$cc\E, 0, I_${insn}");
+ }
+ }
+ }
+ }
+}
+close(ID);
+
+#
+# Read regs.dat
+#
+open(RD, "< ${regs_dat}") or die "$0: cannot open $regs_dat: $!\n";
+while (defined($line = <RD>)) {
+ if ($line =~ /^([a-z0-9_-]+)\s*\S+\s*\S+\s*[0-9]+\s*(\S*)/) {
+ $reg = $1;
+ $reg_flag = $2;
+
+ if ($reg =~ /^(.*[^0-9])([0-9]+)\-([0-9]+)(|[^0-9].*)$/) {
+ $nregs = $3-$2+1;
+ $reg = $1.$2.$4;
+ $reg_nr = $2;
+ $reg_prefix = $1;
+ $reg_suffix = $4;
+ } else {
+ $nregs = 1;
+ undef $reg_prefix, $reg_suffix;
+ }
+
+ while ($nregs--) {
+ if (defined($tokens{$reg})) {
+ die "Duplicate definition: $reg\n";
+ }
+ $tokens{$reg} = scalar @tokendata;
+ if ($reg_flag eq '') {
+ push(@tokendata, "\"${reg}\", TOKEN_REG, 0, 0, R_\U${reg}\E");
+ } else {
+ push(@tokendata, "\"${reg}\", TOKEN_REG, 0, ${reg_flag}, R_\U${reg}\E");
+ }
+
+ if (defined($reg_prefix)) {
+ $reg_nr++;
+ $reg = sprintf("%s%u%s", $reg_prefix, $reg_nr, $reg_suffix);
+ } else {
+ # Not a dashed sequence
+ die if ($nregs);
+ }
+ }
+ }
+}
+close(RD);
+
+#
+# Read tokens.dat
+#
+open(TD, "< ${tokens_dat}") or die "$0: cannot open $tokens_dat: $!\n";
+while (defined($line = <TD>)) {
+ if ($line =~ /^\%\s+(.*)$/) {
+ $pattern = $1;
+ } elsif ($line =~ /^([a-z0-9_-]+)/) {
+ $token = $1;
+
+ if (defined($tokens{$token})) {
+ die "Duplicate definition: $token\n";
+ }
+ $tokens{$token} = scalar @tokendata;
+
+ $data = $pattern;
+ if ($data =~ /^(.*)\{(.*)\}(.*)$/) {
+ my $head = $1, $tail = $3;
+ my $px = $2;
+
+ $px =~ s/\*/(.*)/g;
+ if ($token =~ /$px/i) {
+ $data = $head."\U$1".$tail;
+ } else {
+ die "$0: token $token doesn't match $px\n";
+ }
+ }
+
+ $data =~ s/\*/\U$token/g;
+
+ push(@tokendata, "\"$token\", $data");
+ }
+}
+close(TD);
+
+if ($output eq 'h') {
+ #
+ # tokens.h
+ #
+
+ $max_len = 0;
+ foreach $token (keys(%tokens)) {
+ if (length($token) > $max_len) {
+ $max_len = length($token);
+ }
+ }
+
+ print "/*\n";
+ print " * This file is generated from insns.dat, regs.dat and token.dat\n";
+ print " * by tokhash.pl; do not edit.\n";
+ print " */\n";
+ print "\n";
+
+ print "#ifndef NASM_TOKENS_H\n";
+ print "#define NASM_TOKENS_H\n";
+ print "\n";
+ print "#define MAX_KEYWORD $max_len /* length of longest keyword */\n";
+ print "\n";
+ print "#endif /* NASM_TOKENS_H */\n";
+} elsif ($output eq 'c') {
+ #
+ # tokhash.c
+ #
+
+ @hashinfo = gen_perfect_hash(\%tokens);
+ if (!@hashinfo) {
+ die "$0: no hash found\n";
+ }
+
+ # Paranoia...
+ verify_hash_table(\%tokens, \@hashinfo);
+
+ ($n, $sv, $g) = @hashinfo;
+ $sv2 = $sv+2;
+
+ die if ($n & ($n-1));
+
+ print "/*\n";
+ print " * This file is generated from insns.dat, regs.dat and token.dat\n";
+ print " * by tokhash.pl; do not edit.\n";
+ print " */\n";
+ print "\n";
+
+ print "#include \"compiler.h\"\n";
+ print "#include <string.h>\n";
+ print "#include \"nasm.h\"\n";
+ print "#include \"hashtbl.h\"\n";
+ print "#include \"insns.h\"\n";
+ print "#include \"stdscan.h\"\n";
+ print "\n";
+
+ # These somewhat odd sizes and ordering thereof are due to the
+ # relative ranges of the types; this makes it fit in 16 bytes on
+ # 64-bit machines and 12 bytes on 32-bit machines.
+ print "struct tokendata {\n";
+ print " const char *string;\n";
+ print " int16_t tokentype;\n";
+ print " int8_t aux;\n";
+ print " int8_t tokflag;\n";
+ print " int32_t num;\n";
+ print "};\n";
+ print "\n";
+
+ print "int nasm_token_hash(const char *token, struct tokenval *tv)\n";
+ print "{\n";
+
+ # Put a large value in unused slots. This makes it extremely unlikely
+ # that any combination that involves unused slot will pass the range test.
+ # This speeds up rejection of unrecognized tokens, i.e. identifiers.
+ print "#define UNUSED (65535/3)\n";
+
+ print " static const int16_t hash1[$n] = {\n";
+ for ($i = 0; $i < $n; $i++) {
+ my $h = ${$g}[$i*2+0];
+ print " ", defined($h) ? $h : 'UNUSED', ",\n";
+ }
+ print " };\n";
+
+ print " static const int16_t hash2[$n] = {\n";
+ for ($i = 0; $i < $n; $i++) {
+ my $h = ${$g}[$i*2+1];
+ print " ", defined($h) ? $h : 'UNUSED', ",\n";
+ }
+ print " };\n";
+
+ printf " static const struct tokendata tokendata[%d] = {\n", scalar(@tokendata);
+ foreach $d (@tokendata) {
+ print " { ", $d, " },\n";
+ }
+ print " };\n";
+
+ print " uint32_t k1, k2;\n";
+ print " uint64_t crc;\n";
+ # For correct overflow behavior, "ix" should be unsigned of the same
+ # width as the hash arrays.
+ print " uint16_t ix;\n";
+ print " const struct tokendata *data;\n";
+ print "\n";
+ printf " tv->t_flag = 0;\n";
+ printf " crc = crc64(UINT64_C(0x%08x%08x), token);\n",
+ $$sv[0], $$sv[1];
+ print " k1 = (uint32_t)crc;\n";
+ print " k2 = (uint32_t)(crc >> 32);\n";
+ print "\n";
+ printf " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
+ printf " if (ix >= %d)\n", scalar(@tokendata);
+ print " return tv->t_type = TOKEN_ID;\n";
+ print "\n";
+ print " data = &tokendata[ix];\n";
+
+ print " if (strcmp(data->string, token))\n";
+ print " return tv->t_type = TOKEN_ID;\n";
+ print "\n";
+ print " tv->t_integer = data->num;\n";
+ print " tv->t_inttwo = data->aux;\n";
+ print " tv->t_flag = data->tokflag;\n";
+ print " return tv->t_type = data->tokentype;\n";
+ print "}\n";
+}