aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2018-02-22 14:53:46 -0800
committerH. Peter Anvin <hpa@zytor.com>2018-02-22 14:53:46 -0800
commit281f5bd92c3f2eb820e29d5d8893d718e6428372 (patch)
treeaeb41dd4ecdf24f108f680bf7717b3cfbed55aed
parent6686fc627ec2f805bc74599912c46d0ee1a4047c (diff)
parent4dbf3a96a4b17add396ea4592a7fde7cb8083d52 (diff)
downloadnasm-281f5bd92c3f2eb820e29d5d8893d718e6428372.tar.gz
nasm-281f5bd92c3f2eb820e29d5d8893d718e6428372.tar.xz
nasm-281f5bd92c3f2eb820e29d5d8893d718e6428372.zip
Merge branch 'master' of ssh://repo.or.cz/srv/git/nasm
-rw-r--r--aclocal.m427
-rw-r--r--asm/assemble.c4
-rw-r--r--asm/directiv.c122
-rw-r--r--asm/nasm.c129
-rw-r--r--configure.ac7
-rw-r--r--doc/changes.src8
-rw-r--r--doc/nasmdoc.src43
-rw-r--r--include/compiler.h24
-rw-r--r--include/iflag.h121
-rw-r--r--include/insns.h6
-rw-r--r--include/nasmlib.h4
-rw-r--r--output/outelf.c3
-rw-r--r--test/ret.asm56
-rw-r--r--test/vaesenc.asm22
-rw-r--r--x86/insns-iflags.ph212
-rw-r--r--x86/insns.dat20
16 files changed, 524 insertions, 284 deletions
diff --git a/aclocal.m4 b/aclocal.m4
index bd93193a..36d36cf8 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -172,3 +172,30 @@ AC_DEFUN(_PA_ADD_HEADER,
AC_DEFUN(PA_ADD_HEADERS,
[m4_map_args_w([$1],[_PA_ADD_HEADER(],[)])])
+
+dnl --------------------------------------------------------------------------
+dnl PA_CHECK_BAD_STDC_INLINE
+dnl
+dnl Some versions of gcc seem to apply -Wmissing-prototypes to C99
+dnl inline functions, which means we need to use GNU inline syntax
+dnl --------------------------------------------------------------------------
+AC_DEFUN(PA_CHECK_BAD_STDC_INLINE,
+[AC_MSG_CHECKING([if $CC supports C99 external inlines])
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
+AC_INCLUDES_DEFAULT
+
+/* Don't mistake GNU inlines for c99 */
+#ifdef __GNUC_GNU_INLINE__
+# error "Using gnu inline standard"
+#endif
+
+inline int foo(int x)
+{
+ return x+1;
+}
+ ])],
+ [AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_STDC_INLINE, 1,
+ [Define to 1 if your compiler supports C99 extern inline])],
+ [AC_MSG_RESULT([no])
+ PA_ADD_CFLAGS([-fgnu89-inline])])])
diff --git a/asm/assemble.c b/asm/assemble.c
index fc72065e..561bba55 100644
--- a/asm/assemble.c
+++ b/asm/assemble.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
- * Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ * Copyright 1996-2018 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@@ -1379,7 +1379,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
length++;
} else if ((ins->rex & REX_L) &&
!(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
- iflag_ffs(&cpu) >= IF_X86_64) {
+ iflag_cpu_level_ok(&cpu, IF_X86_64)) {
/* LOCK-as-REX.R */
assert_no_prefix(ins, PPS_LOCK);
lockcheck = false; /* Already errored, no need for warning */
diff --git a/asm/directiv.c b/asm/directiv.c
index 937f17af..7c741685 100644
--- a/asm/directiv.c
+++ b/asm/directiv.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
- * Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ * Copyright 1996-2018 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@@ -56,78 +56,90 @@
#include "labels.h"
#include "iflag.h"
-static iflag_t get_cpu(char *value)
+struct cpunames {
+ const char *name;
+ unsigned int level;
+ /* Eventually a table of features */
+};
+
+static iflag_t get_cpu(const char *value)
{
iflag_t r;
+ const struct cpunames *cpu;
+ static const struct cpunames cpunames[] = {
+ { "8086", IF_8086 },
+ { "186", IF_186 },
+ { "286", IF_286 },
+ { "386", IF_386 },
+ { "486", IF_486 },
+ { "586", IF_PENT },
+ { "pentium", IF_PENT },
+ { "pentiummmx", IF_PENT },
+ { "686", IF_P6 },
+ { "p6", IF_P6 },
+ { "ppro", IF_P6 },
+ { "pentiumpro", IF_P6 },
+ { "p2", IF_P6 }, /* +MMX */
+ { "pentiumii", IF_P6 },
+ { "p3", IF_KATMAI },
+ { "katmai", IF_KATMAI },
+ { "p4", IF_WILLAMETTE },
+ { "willamette", IF_WILLAMETTE },
+ { "prescott", IF_PRESCOTT },
+ { "x64", IF_X86_64 },
+ { "x86-64", IF_X86_64 },
+ { "ia64", IF_IA64 },
+ { "ia-64", IF_IA64 },
+ { "itanium", IF_IA64 },
+ { "itanic", IF_IA64 },
+ { "merced", IF_IA64 },
+ { "any", IF_PLEVEL },
+ { "default", IF_PLEVEL },
+ { "all", IF_PLEVEL },
+ { NULL, IF_PLEVEL } /* Error and final default entry */
+ };
+
+ for (cpu = cpunames; cpu->name; cpu++) {
+ if (!strcmp(value, cpu->name))
+ break;
+ }
- iflag_clear_all(&r);
-
- if (!strcmp(value, "8086"))
- iflag_set(&r, IF_8086);
- else if (!strcmp(value, "186"))
- iflag_set(&r, IF_186);
- else if (!strcmp(value, "286"))
- iflag_set(&r, IF_286);
- else if (!strcmp(value, "386"))
- iflag_set(&r, IF_386);
- else if (!strcmp(value, "486"))
- iflag_set(&r, IF_486);
- else if (!strcmp(value, "586") ||
- !nasm_stricmp(value, "pentium"))
- iflag_set(&r, IF_PENT);
- else if (!strcmp(value, "686") ||
- !nasm_stricmp(value, "ppro") ||
- !nasm_stricmp(value, "pentiumpro") ||
- !nasm_stricmp(value, "p2"))
- iflag_set(&r, IF_P6);
- else if (!nasm_stricmp(value, "p3") ||
- !nasm_stricmp(value, "katmai"))
- iflag_set(&r, IF_KATMAI);
- else if (!nasm_stricmp(value, "p4") || /* is this right? -- jrc */
- !nasm_stricmp(value, "willamette"))
- iflag_set(&r, IF_WILLAMETTE);
- else if (!nasm_stricmp(value, "prescott"))
- iflag_set(&r, IF_PRESCOTT);
- else if (!nasm_stricmp(value, "x64") ||
- !nasm_stricmp(value, "x86-64"))
- iflag_set(&r, IF_X86_64);
- else if (!nasm_stricmp(value, "ia64") ||
- !nasm_stricmp(value, "ia-64") ||
- !nasm_stricmp(value, "itanium")||
- !nasm_stricmp(value, "itanic") ||
- !nasm_stricmp(value, "merced"))
- iflag_set(&r, IF_IA64);
- else {
- iflag_set(&r, IF_PLEVEL);
+ if (!cpu->name) {
nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
- "unknown 'cpu' type");
+ "unknown 'cpu' type '%s'", value);
}
+
+ iflag_set_cpu(&r, cpu->level);
return r;
}
-static int get_bits(char *value)
+static int get_bits(const char *value)
{
- int i;
+ int i = atoi(value);
- if ((i = atoi(value)) == 16)
- return i; /* set for a 16-bit segment */
- else if (i == 32) {
- if (iflag_ffs(&cpu) < IF_386) {
+ switch (i) {
+ case 16:
+ break; /* Always safe */
+ case 32:
+ if (!iflag_cpu_level_ok(&cpu, IF_386)) {
nasm_error(ERR_NONFATAL,
- "cannot specify 32-bit segment on processor below a 386");
+ "cannot specify 32-bit segment on processor below a 386");
i = 16;
}
- } else if (i == 64) {
- if (iflag_ffs(&cpu) < IF_X86_64) {
+ break;
+ case 64:
+ if (!iflag_cpu_level_ok(&cpu, IF_X86_64)) {
nasm_error(ERR_NONFATAL,
- "cannot specify 64-bit segment on processor below an x86-64");
+ "cannot specify 64-bit segment on processor below an x86-64");
i = 16;
}
- } else {
+ break;
+ default:
nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
- "`%s' is not a valid segment size; must be 16, 32 or 64",
- value);
+ "`%s' is not a valid segment size; must be 16, 32 or 64",
+ value);
i = 16;
+ break;
}
return i;
}
diff --git a/asm/nasm.c b/asm/nasm.c
index 0ffb036c..666c3375 100644
--- a/asm/nasm.c
+++ b/asm/nasm.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
- * Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ * Copyright 1996-2018 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@@ -86,6 +86,11 @@ static void usage(void);
static bool using_debug_info, opt_verbose_info;
static const char *debug_format;
+#ifndef ABORT_ON_PANIC
+# define ABORT_ON_PANIC 0
+#endif
+static bool abort_on_panic = ABORT_ON_PANIC;
+
bool tasm_compatible_mode = false;
int pass0, passn;
static int pass1, pass2; /* XXX: Get rid of these, they are redundant */
@@ -323,8 +328,8 @@ int main(int argc, char **argv)
timestamp();
- iflag_set(&cpu, IF_PLEVEL);
- iflag_set(&cmd_cpu, IF_PLEVEL);
+ iflag_set_default_cpu(&cpu);
+ iflag_set_default_cpu(&cmd_cpu);
pass0 = 0;
want_usage = terminate_after_phase = false;
@@ -690,19 +695,25 @@ static char *quote_for_wmake(const char *str)
return os;
}
-struct textargs {
- const char *label;
- int value;
-};
-
enum text_options {
+ OPT_BOGUS,
+ OPT_VERSION,
+ OPT_ABORT_ON_PANIC,
OPT_PREFIX,
OPT_POSTFIX
};
+struct textargs {
+ const char *label;
+ enum text_options opt;
+ bool need_arg;
+};
static const struct textargs textopts[] = {
- {"prefix", OPT_PREFIX},
- {"postfix", OPT_POSTFIX},
- {NULL, 0}
+ {"v", OPT_VERSION, false},
+ {"version", OPT_VERSION, false},
+ {"abort-on-panic", OPT_ABORT_ON_PANIC, false},
+ {"prefix", OPT_PREFIX, true},
+ {"postfix", OPT_POSTFIX, true},
+ {NULL, OPT_BOGUS, false}
};
static void show_version(void)
@@ -1022,61 +1033,49 @@ static bool process_arg(char *p, char *q, int pass)
case '-':
{
- int s;
+ const struct textargs *tx;
if (p[2] == 0) { /* -- => stop processing options */
- stopoptions = 1;
+ stopoptions = true;
break;
}
- if (!nasm_stricmp(p, "--v"))
- show_version();
-
- if (!nasm_stricmp(p, "--version"))
- show_version();
+ for (tx = textopts; tx->label; tx++) {
+ if (!nasm_stricmp(p + 2, tx->label))
+ break;
+ }
- for (s = 0; textopts[s].label; s++) {
- if (!nasm_stricmp(p + 2, textopts[s].label)) {
+ if (tx->need_arg) {
+ if (!q) {
+ nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "option `--%s' requires an argument",
+ p + 2);
break;
}
+ advance = true;
}
- switch (s) {
+ switch (tx->opt) {
+ case OPT_VERSION:
+ show_version();
+ break;
+ case OPT_ABORT_ON_PANIC:
+ abort_on_panic = true;
+ break;
case OPT_PREFIX:
+ if (pass == 2)
+ strlcpy(lprefix, q, PREFIX_MAX);
+ break;
case OPT_POSTFIX:
- {
- if (!q) {
- nasm_error(ERR_NONFATAL | ERR_NOFILE |
- ERR_USAGE,
- "option `--%s' requires an argument",
- p + 2);
- break;
- } else {
- advance = 1, param = q;
- }
-
- switch (s) {
- case OPT_PREFIX:
- if (pass == 2)
- strlcpy(lprefix, param, PREFIX_MAX);
- break;
- case OPT_POSTFIX:
- if (pass == 2)
- strlcpy(lpostfix, param, POSTFIX_MAX);
- break;
- default:
- panic();
- break;
- }
- break;
- }
-
+ if (pass == 2)
+ strlcpy(lpostfix, q, POSTFIX_MAX);
+ break;
+ case OPT_BOGUS:
+ nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
+ "unrecognized option `--%s'", p + 2);
+ break;
default:
- {
- nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE,
- "unrecognised option `--%s'", p + 2);
- break;
- }
+ panic();
}
break;
}
@@ -1289,8 +1288,21 @@ static void assemble_file(const char *fname, StrList **depend_ptr)
uint64_t prev_offset_changed;
unsigned int stall_count = 0; /* Make sure we make forward progress... */
- if (cmd_sb == 32 && iflag_ffs(&cmd_cpu) < IF_386)
- nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu");
+ switch (cmd_sb) {
+ case 16:
+ break;
+ case 32:
+ if (!iflag_cpu_level_ok(&cmd_cpu, IF_386))
+ nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu");
+ break;
+ case 64:
+ if (!iflag_cpu_level_ok(&cmd_cpu, IF_X86_64))
+ nasm_fatal(0, "command line: 64-bit segment size requires a higher cpu");
+ break;
+ default:
+ panic();
+ break;
+ }
pass_max = prev_offset_changed = (INT_MAX >> 1) + 2; /* Almost unlimited */
for (passn = 1; pass0 <= 2; passn++) {
@@ -1812,9 +1824,10 @@ static void nasm_verror_common(int severity, const char *fmt, va_list args)
break; /* placate silly compilers */
case ERR_PANIC:
fflush(NULL);
-#ifdef ABORT_ON_PANIC
- abort(); /* halt, catch fire, dump core/stop debugger */
-#endif
+
+ if (abort_on_panic)
+ abort(); /* halt, catch fire, dump core/stop debugger */
+
if (ofile) {
fclose(ofile);
remove(outname);
diff --git a/configure.ac b/configure.ac
index 4e27bb3e..44c9e179 100644
--- a/configure.ac
+++ b/configure.ac
@@ -288,6 +288,13 @@ PA_ARG_ENABLED([werror],
)
dnl
+dnl On some versions of gcc, -Werror=missing-prototypes causes problems
+dnl with C99-style external inlines. Test this *after* adding the -Werror
+dnl options.
+dnl
+PA_CHECK_BAD_STDC_INLINE
+
+dnl
dnl support ccache
dnl
PA_ARG_ENABLED([ccache], [compile with ccache], [CC="ccache $CC"], [])
diff --git a/doc/changes.src b/doc/changes.src
index 54a12e21..ddfe6b38 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -7,10 +7,18 @@
The NASM 2 series supports x86-64, and is the production version of NASM
since 2007.
+\S{cl-2.13.04} Version 2.13.04
+
+\b Added \c{-W}, \c{-D}, and \c{-Q} suffix aliases for \c{RET}
+ instructions so the operand sizes of these instructions can be
+ encoded without using \c{o16}, \c{o32} or \c{o64}.
+
\S{cl-2.13.03} Version 2.13.03
\b Added AVX and AVX512 \c{VAES*} and \c{VPCLMULQDQ} instructions.
+\b Fixed missing dwarf record in x32 ELF output format.
+
\S{cl-2.13.02} Version 2.13.02
\b Fix false positive in testing of numeric overflows.
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src
index f998cc60..7b331d35 100644
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -1,6 +1,6 @@
\# --------------------------------------------------------------------------
\#
-\# Copyright 1996-2017 The NASM Authors - All Rights Reserved
+\# Copyright 1996-2018 The NASM Authors - All Rights Reserved
\# See the file AUTHORS included with the NASM distribution for
\# the specific copyright holders.
\#
@@ -7704,10 +7704,15 @@ platforms pass arguments in registers rather than on the stack.
Furthermore, 64-bit platforms use SSE2 by default for floating point.
Please see the ABI documentation for your platform.
-64-bit platforms differ in the sizes of the fundamental datatypes, not
-just from 32-bit platforms but from each other. If a specific size
-data type is desired, it is probably best to use the types defined in
-the Standard C header \c{<inttypes.h>}.
+64-bit platforms differ in the sizes of the C/C++ fundamental
+datatypes, not just from 32-bit platforms but from each other. If a
+specific size data type is desired, it is probably best to use the
+types defined in the standard C header \c{<inttypes.h>}.
+
+All known 64-bit platforms except some embedded platforms require that
+the stack is 16-byte aligned at the entry to a function. In order to
+enforce that, the stack pointer (\c{RSP}) needs to be aligned on an
+\c{odd} multiple of 8 bytes before the \c{CALL} instruction.
In 64-bit mode, the default instruction size is still 32 bits. When
loading a value into a 32-bit register (but not an 8- or 16-bit
@@ -7755,12 +7760,30 @@ immediate as \c{DWORD}:
The length of these instructions are 10, 5 and 7 bytes, respectively.
+If optimization is enabled and NASM can determine at assembly time
+that a shorter instruction will suffice, the shorter instruction will
+be emitted unless of course \c{STRICT QWORD} or \c{STRICT DWORD} is
+specified (see \k{strict}):
+
+\c mov rax,1 ; Assembles as "mov eax,1" (5 bytes)
+\c mov rax,strict qword 1 ; Full 10-byte instruction
+\c mov rax,strict dword 1 ; 7-byte instruction
+\c mov rax,symbol ; 10 bytes, not known at assembly time
+\c lea rax,[rel symbol] ; 7 bytes, usually preferred by the ABI
+
+Note that \c{lea rax,[rel symbol]} is position-independent, whereas
+\c{mov rax,symbol} is not. Most ABIs prefer or even require
+position-independent code in 64-bit mode. However, the \c{MOV}
+instruction is able to reference a symbol anywhere in the 64-bit
+address space, whereas \c{LEA} is only able to access a symbol within
+within 2 GB of the instruction itself (see below.)
+
The only instructions which take a full \I{64-bit displacement}64-bit
\e{displacement} is loading or storing, using \c{MOV}, \c{AL}, \c{AX},
\c{EAX} or \c{RAX} (but no other registers) to an absolute 64-bit address.
Since this is a relatively rarely used instruction (64-bit code generally uses
relative addressing), the programmer has to explicitly declare the
-displacement size as \c{QWORD}:
+displacement size as \c{ABS QWORD}:
\c default abs
\c
@@ -7797,9 +7820,11 @@ calls, and thus are available for use by the function without saving.
Integer return values are passed in \c{RAX} and \c{RDX}, in that order.
Floating point is done using SSE registers, except for \c{long
-double}. Floating-point arguments are passed in \c{XMM0} to \c{XMM7};
-return is \c{XMM0} and \c{XMM1}. \c{long double} are passed on the
-stack, and returned in \c{ST0} and \c{ST1}.
+double}, which is 80 bits (\c{TWORD}) on most platforms (Android is
+one exception; there \c{long double} is 64 bits and treated the same
+as \c{double}.) Floating-point arguments are passed in \c{XMM0} to
+\c{XMM7}; return is \c{XMM0} and \c{XMM1}. \c{long double} are passed
+on the stack, and returned in \c{ST0} and \c{ST1}.
All SSE and x87 registers are destroyed by function calls.
diff --git a/include/compiler.h b/include/compiler.h
index 6c7e20c5..aba773e7 100644
--- a/include/compiler.h
+++ b/include/compiler.h
@@ -214,15 +214,20 @@ size_t strnlen(const char *s, size_t maxlen);
/*
* Hack to support external-linkage inline functions
*/
-#ifdef __GNUC__
-# ifdef __GNUC_STDC_INLINE__
-# define HAVE_STDC_INLINE
-# else
-# define HAVE_GNU_INLINE
-# endif
-#elif defined(__STDC_VERSION__)
-# if __STDC_VERSION__ >= 199901L
-# define HAVE_STDC_INLINE
+#ifndef HAVE_STDC_INLINE
+# ifdef __GNUC__
+# ifdef __GNUC_STDC_INLINE__
+# define HAVE_STDC_INLINE
+# else
+# define HAVE_GNU_INLINE
+# endif
+# elif defined(__GNUC_GNU_INLINE__)
+/* Some other compiler implementing only GNU inline semantics? */
+# define HAVE_GNU_INLINE
+# elif defined(__STDC_VERSION__)
+# if __STDC_VERSION__ >= 199901L
+# define HAVE_STDC_INLINE
+# endif
# endif
#endif
@@ -230,6 +235,7 @@ size_t strnlen(const char *s, size_t maxlen);
# define extern_inline inline
#elif defined(HAVE_GNU_INLINE)
# define extern_inline extern inline
+# define inline_prototypes
#else
# define inline_prototypes
#endif
diff --git a/include/iflag.h b/include/iflag.h
index 289e4272..5280703e 100644
--- a/include/iflag.h
+++ b/include/iflag.h
@@ -1,30 +1,28 @@
#ifndef NASM_IFLAG_H
#define NASM_IFLAG_H
-#include <string.h>
-
#include "compiler.h"
#include "ilog2.h"
+
+#include <string.h>
+
#include "iflaggen.h"
#define IF_GENBIT(bit) (UINT32_C(1) << (bit))
-static inline unsigned int iflag_test(const iflag_t *f, unsigned int bit)
+static inline bool iflag_test(const iflag_t *f, unsigned int bit)
{
- unsigned int index = bit / 32;
- return f->field[index] & (UINT32_C(1) << (bit - (index * 32)));
+ return !!(f->field[bit >> 5] & IF_GENBIT(bit & 31));
}
static inline void iflag_set(iflag_t *f, unsigned int bit)
{
- unsigned int index = bit / 32;
- f->field[index] |= (UINT32_C(1) << (bit - (index * 32)));
+ f->field[bit >> 5] |= IF_GENBIT(bit & 31);
}
static inline void iflag_clear(iflag_t *f, unsigned int bit)
{
- unsigned int index = bit / 32;
- f->field[index] &= ~(UINT32_C(1) << (bit - (index * 32)));
+ f->field[bit >> 5] &= ~IF_GENBIT(bit & 31);
}
static inline void iflag_clear_all(iflag_t *f)
@@ -34,39 +32,21 @@ static inline void iflag_clear_all(iflag_t *f)
static inline void iflag_set_all(iflag_t *f)
{
- memset(f, 0xff, sizeof(*f));
+ memset(f, ~0, sizeof(*f));
}
+#define iflag_for_each_field(v) for ((v) = 0; (v) < IF_FIELD_COUNT; (v)++)
+
static inline int iflag_cmp(const iflag_t *a, const iflag_t *b)
{
int i;
- for (i = sizeof(a->field) / sizeof(a->field[0]) - 1; i >= 0; i--) {
+ /* This is intentionally a reverse loop! */
+ for (i = IF_FIELD_COUNT-1; i >= 0; i--) {
if (a->field[i] == b->field[i])
continue;
- return (a->field[i] > b->field[i]) ? 1 : -1;
- }
-
- return 0;
-}
-
-static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b)
-{
- if (a->field[3] < b->field[3])
- return -1;
- else if (a->field[3] > b->field[3])
- return 1;
- return 0;
-}
-
-static inline unsigned int iflag_ffs(const iflag_t *a)
-{
- unsigned int i;
-
- for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++) {
- if (a->field[i])
- return ilog2_32(a->field[i]) + (i * 32);
+ return (int)(a->field[i] - b->field[i]);
}
return 0;
@@ -78,7 +58,7 @@ static inline unsigned int iflag_ffs(const iflag_t *a)
unsigned int i; \
iflag_t res; \
\
- for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++) \
+ iflag_for_each_field(i) \
res.field[i] = a->field[i] op b->field[i]; \
\
return res; \
@@ -86,13 +66,6 @@ static inline unsigned int iflag_ffs(const iflag_t *a)
IF_GEN_HELPER(xor, ^)
-
-/* Use this helper to test instruction template flags */
-#define itemp_has(itemp, bit) iflag_test(&insns_flags[(itemp)->iflag_idx], bit)
-
-
-/* Maximum processor level at moment */
-#define IF_PLEVEL IF_IA64
/* Some helpers which are to work with predefined masks */
#define IF_SMASK \
(IF_GENBIT(IF_SB) |\
@@ -118,23 +91,67 @@ IF_GEN_HELPER(xor, ^)
#define itemp_arg(itemp) _itemp_arg((itemp)->iflag_idx)
#define itemp_armask(itemp) _itemp_armask((itemp)->iflag_idx)
+/*
+ * IF_8086 is the first CPU level flag and IF_PLEVEL the last
+ */
+#if IF_8086 & 31
+#error "IF_8086 must be on a uint32_t boundary"
+#endif
+#define IF_PLEVEL IF_IA64
+#define IF_CPU_FIELD (IF_8086 >> 5)
+#define IF_CPU_LEVEL_MASK ((IF_GENBIT(IF_PLEVEL & 31) << 1) - 1)
+
+/*
+ * IF_PRIV is the firstr instruction filtering flag
+ */
+#if IF_PRIV & 31
+#error "IF_PRIV must be on a uint32_t boundary"
+#endif
+#define IF_FEATURE_FIELD (IF_PRIV >> 5)
+
+static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b)
+{
+ return (int)(a->field[IF_CPU_FIELD] - b->field[IF_CPU_FIELD]);
+}
+
+static inline uint32_t _iflag_cpu_level(const iflag_t *a)
+{
+ return a->field[IF_CPU_FIELD] & IF_CPU_LEVEL_MASK;
+}
+
static inline int iflag_cmp_cpu_level(const iflag_t *a, const iflag_t *b)
{
- iflag_t v1 = *a;
- iflag_t v2 = *b;
+ uint32_t aa = _iflag_cpu_level(a);
+ uint32_t bb = _iflag_cpu_level(b);
- iflag_clear(&v1, IF_CYRIX);
- iflag_clear(&v1, IF_AMD);
+ return (int)(aa - bb);
+}
- iflag_clear(&v2, IF_CYRIX);
- iflag_clear(&v2, IF_AMD);
+/* Returns true if the CPU level is at least a certain value */
+static inline bool iflag_cpu_level_ok(const iflag_t *a, unsigned int bit)
+{
+ return _iflag_cpu_level(a) >= IF_GENBIT(bit & 31);
+}
- if (v1.field[3] < v2.field[3])
- return -1;
- else if (v1.field[3] > v2.field[3])
- return 1;
+static inline void iflag_set_all_features(iflag_t *a)
+{
+ size_t i;
- return 0;
+ for (i = IF_FEATURE_FIELD; i < IF_CPU_FIELD; i++)
+ a->field[i] = ~UINT32_C(0);
+}
+
+static inline void iflag_set_cpu(iflag_t *a, unsigned int cpu)
+{
+ a->field[0] = 0; /* Not applicable to the CPU type */
+ iflag_set_all_features(a); /* All feature masking bits set for now */
+ a->field[IF_CPU_FIELD] &= ~IF_CPU_LEVEL_MASK;
+ iflag_set(a, cpu);
+}
+
+static inline void iflag_set_default_cpu(iflag_t *a)
+{
+ iflag_set_cpu(a, IF_PLEVEL);
}
static inline iflag_t _iflag_pfmask(const iflag_t *a)
diff --git a/include/insns.h b/include/insns.h
index ac2d7924..00de2887 100644
--- a/include/insns.h
+++ b/include/insns.h
@@ -23,6 +23,12 @@ struct itemplate {
uint32_t iflag_idx; /* some flags referenced by index */
};
+/* Use this helper to test instruction template flags */
+static inline bool itemp_has(const struct itemplate *itemp, unsigned int bit)
+{
+ return iflag_test(&insns_flags[itemp->iflag_idx], bit);
+}
+
/* Disassembler table structure */
/*
diff --git a/include/nasmlib.h b/include/nasmlib.h
index ae0473a1..bb1becdf 100644
--- a/include/nasmlib.h
+++ b/include/nasmlib.h
@@ -189,11 +189,9 @@ int64_t readnum(char *str, bool *error);
int64_t readstrnum(char *str, int length, bool *warn);
/*
- * seg_init: Initialise the segment-number allocator.
* seg_alloc: allocate a hitherto unused segment number.
*/
-void pure_func seg_init(void);
-int32_t pure_func seg_alloc(void);
+int32_t seg_alloc(void);
/*
* Add/replace or remove an extension to the end of a filename
diff --git a/output/outelf.c b/output/outelf.c
index 3ca2c7c3..7ac2d54b 100644
--- a/output/outelf.c
+++ b/output/outelf.c
@@ -3273,6 +3273,9 @@ static void dwarf_generate(void)
if (is_elf32()) {
WRITELONG(pbuf,0); /* null beginning offset */
WRITELONG(pbuf,0); /* null ending offset */
+ } else if (is_elfx32()) {
+ WRITELONG(pbuf,0); /* null beginning offset */
+ WRITELONG(pbuf,0); /* null ending offset */
} else {
nasm_assert(is_elf64());
WRITEDLONG(pbuf,0); /* null beginning offset */
diff --git a/test/ret.asm b/test/ret.asm
new file mode 100644
index 00000000..a6aa5332
--- /dev/null
+++ b/test/ret.asm
@@ -0,0 +1,56 @@
+ ;; All the flavors of RET
+%ifndef ERROR
+ %define ERROR 0
+%endif
+
+
+ bits 16
+
+ ret
+ retn
+ retf
+ retw
+ retnw
+ retfw
+ retd
+ retnd
+ retfd
+%if ERROR
+ retq
+ retnq
+ retfq
+%endif
+
+ bits 32
+
+ ret
+ retn
+ retf
+ retw
+ retnw
+ retfw
+ retd
+ retnd
+ retfd
+%if ERROR
+ retq
+ retnq
+ retfq
+%endif
+
+ bits 64
+
+ ret
+ retn
+ retf ; Probably should have been RETFQ, but: legacy...
+ retw
+ retnw
+ retfw
+%if ERROR
+ retd
+ retnd
+%endif
+ retfd
+ retq
+ retnq
+ retfq
diff --git a/test/vaesenc.asm b/test/vaesenc.asm
new file mode 100644
index 00000000..9edca705
--- /dev/null
+++ b/test/vaesenc.asm
@@ -0,0 +1,22 @@
+;; BR 3392454, 3392460
+
+ bits 64
+ aesenc xmm0,xmm4
+ vaesenc zmm0,zmm0,zmm4
+ vpclmullqlqdq zmm1,zmm1,zmm5
+ vpclmulqdq zmm0, zmm1, zmm2, 0
+ vaesenclast zmm0, zmm1, zmm2
+
+ bits 32
+ aesenc xmm0,xmm4
+ vaesenc zmm0,zmm0,zmm4
+ vpclmullqlqdq zmm1,zmm1,zmm5
+ vpclmulqdq zmm0, zmm1, zmm2, 0
+ vaesenclast zmm0, zmm1, zmm2
+
+ bits 16
+ aesenc xmm0,xmm4
+ vaesenc zmm0,zmm0,zmm4
+ vpclmullqlqdq zmm1,zmm1,zmm5
+ vpclmulqdq zmm0, zmm1, zmm2, 0
+ vaesenclast zmm0, zmm1, zmm2
diff --git a/x86/insns-iflags.ph b/x86/insns-iflags.ph
index 989276f2..43bf70e8 100644
--- a/x86/insns-iflags.ph
+++ b/x86/insns-iflags.ph
@@ -64,109 +64,117 @@
# for a set of flags, so be careful moving bits (and
# don't forget to update C code generation then).
#
+sub dword_align($) {
+ my($n) = @_;
+
+ $$n = ($$n + 31) & ~31;
+ return $n;
+}
+
+my $f = 0;
my %insns_flag_bit = (
#
# dword bound, index 0 - specific flags
#
- "SM" => [ 0, "Size match"],
- "SM2" => [ 1, "Size match first two operands"],
- "SB" => [ 2, "Unsized operands can't be non-byte"],
- "SW" => [ 3, "Unsized operands can't be non-word"],
- "SD" => [ 4, "Unsized operands can't be non-dword"],
- "SQ" => [ 5, "Unsized operands can't be non-qword"],
- "SO" => [ 6, "Unsized operands can't be non-oword"],
- "SY" => [ 7, "Unsized operands can't be non-yword"],
- "SZ" => [ 8, "Unsized operands can't be non-zword"],
- "SIZE" => [ 9, "Unsized operands must match the bitsize"],
- "SX" => [ 10, "Unsized operands not allowed"],
- "AR0" => [ 11, "SB, SW, SD applies to argument 0"],
- "AR1" => [ 12, "SB, SW, SD applies to argument 1"],
- "AR2" => [ 13, "SB, SW, SD applies to argument 2"],
- "AR3" => [ 14, "SB, SW, SD applies to argument 3"],
- "AR4" => [ 15, "SB, SW, SD applies to argument 4"],
- "OPT" => [ 16, "Optimizing assembly only"],
+ "SM" => [$f++, "Size match"],
+ "SM2" => [$f++, "Size match first two operands"],
+ "SB" => [$f++, "Unsized operands can't be non-byte"],
+ "SW" => [$f++, "Unsized operands can't be non-word"],
+ "SD" => [$f++, "Unsized operands can't be non-dword"],
+ "SQ" => [$f++, "Unsized operands can't be non-qword"],
+ "SO" => [$f++, "Unsized operands can't be non-oword"],
+ "SY" => [$f++, "Unsized operands can't be non-yword"],
+ "SZ" => [$f++, "Unsized operands can't be non-zword"],
+ "SIZE" => [$f++, "Unsized operands must match the bitsize"],
+ "SX" => [$f++, "Unsized operands not allowed"],
+ "AR0" => [$f++, "SB, SW, SD applies to argument 0"],
+ "AR1" => [$f++, "SB, SW, SD applies to argument 1"],
+ "AR2" => [$f++, "SB, SW, SD applies to argument 2"],
+ "AR3" => [$f++, "SB, SW, SD applies to argument 3"],
+ "AR4" => [$f++, "SB, SW, SD applies to argument 4"],
+ "OPT" => [$f++, "Optimizing assembly only"],
#
- # dword bound, index 1 - instruction filtering flags
+ # dword bound - instruction filtering flags
#
- "PRIV" => [ 32, "Privileged instruction"],
- "SMM" => [ 33, "Only valid in SMM"],
- "PROT" => [ 34, "Protected mode only"],
- "LOCK" => [ 35, "Lockable if operand 0 is memory"],
- "NOLONG" => [ 36, "Not available in long mode"],
- "LONG" => [ 37, "Long mode"],
- "NOHLE" => [ 38, "HLE prefixes forbidden"],
- "MIB" => [ 39, "disassemble with split EA"],
- "BND" => [ 40, "BND (0xF2) prefix available"],
- "UNDOC" => [ 41, "Undocumented"],
- "HLE" => [ 42, "HLE prefixed"],
- "FPU" => [ 43, "FPU"],
- "MMX" => [ 44, "MMX"],
- "3DNOW" => [ 45, "3DNow!"],
- "SSE" => [ 46, "SSE (KNI, MMX2)"],
- "SSE2" => [ 47, "SSE2"],
- "SSE3" => [ 48, "SSE3 (PNI)"],
- "VMX" => [ 49, "VMX"],
- "SSSE3" => [ 50, "SSSE3"],
- "SSE4A" => [ 51, "AMD SSE4a"],
- "SSE41" => [ 52, "SSE4.1"],
- "SSE42" => [ 53, "SSE4.2"],
- "SSE5" => [ 54, "SSE5"],
- "AVX" => [ 55, "AVX (128b)"],
- "AVX2" => [ 56, "AVX2 (256b)"],
- "FMA" => [ 57, ""],
- "BMI1" => [ 58, ""],
- "BMI2" => [ 59, ""],
- "TBM" => [ 60, ""],
- "RTM" => [ 61, ""],
- "INVPCID" => [ 62, ""],
+ "PRIV" => [${dword_align(\$f)}++, "Privileged instruction"],
+ "SMM" => [$f++, "Only valid in SMM"],
+ "PROT" => [$f++, "Protected mode only"],
+ "LOCK" => [$f++, "Lockable if operand 0 is memory"],
+ "NOLONG" => [$f++, "Not available in long mode"],
+ "LONG" => [$f++, "Long mode"],
+ "NOHLE" => [$f++, "HLE prefixes forbidden"],
+ "MIB" => [$f++, "disassemble with split EA"],
+ "BND" => [$f++, "BND (0xF2) prefix available"],
+ "UNDOC" => [$f++, "Undocumented"],
+ "HLE" => [$f++, "HLE prefixed"],
+ "FPU" => [$f++, "FPU"],
+ "MMX" => [$f++, "MMX"],
+ "3DNOW" => [$f++, "3DNow!"],
+ "SSE" => [$f++, "SSE (KNI, MMX2)"],
+ "SSE2" => [$f++, "SSE2"],
+ "SSE3" => [$f++, "SSE3 (PNI)"],
+ "VMX" => [$f++, "VMX"],
+ "SSSE3" => [$f++, "SSSE3"],
+ "SSE4A" => [$f++, "AMD SSE4a"],
+ "SSE41" => [$f++, "SSE4.1"],
+ "SSE42" => [$f++, "SSE4.2"],
+ "SSE5" => [$f++, "SSE5"],
+ "AVX" => [$f++, "AVX (256-bit floating point)"],
+ "AVX2" => [$f++, "AVX2 (256-bit integer)"],
+ "FMA" => [$f++, ""],
+ "BMI1" => [$f++, ""],
+ "BMI2" => [$f++, ""],
+ "TBM" => [$f++, ""],
+ "RTM" => [$f++, ""],
+ "INVPCID" => [$f++, ""],
+ "AVX512" => [$f++, "AVX-512F (512-bit base architecture)"],
+ "AVX512CD" => [$f++, "AVX-512 Conflict Detection"],
+ "AVX512ER" => [$f++, "AVX-512 Exponential and Reciprocal"],
+ "AVX512PF" => [$f++, "AVX-512 Prefetch"],
+ "MPX" => [$f++, "MPX"],
+ "SHA" => [$f++, "SHA"],
+ "PREFETCHWT1" => [$f++, "PREFETCHWT1"],
+ "AVX512VL" => [$f++, "AVX-512 Vector Length Orthogonality"],
+ "AVX512DQ" => [$f++, "AVX-512 Dword and Qword"],
+ "AVX512BW" => [$f++, "AVX-512 Byte and Word"],
+ "AVX512IFMA" => [$f++, "AVX-512 IFMA instructions"],
+ "AVX512VBMI" => [$f++, "AVX-512 VBMI instructions"],
+ "AES" => [$f++, "AES instructions"],
+ "VAES" => [$f++, "AES AVX instructions"],
+ "VPCLMULQDQ" => [$f++, "Carry-Less Multiplication extention"],
- #
- # dword bound, index 2 - instruction filtering flags
- #
- "AVX512" => [ 64, "AVX-512F (512b)"],
- "AVX512CD" => [ 65, "AVX-512 Conflict Detection"],
- "AVX512ER" => [ 66, "AVX-512 Exponential and Reciprocal"],
- "AVX512PF" => [ 67, "AVX-512 Prefetch"],
- "MPX" => [ 68 ,"MPX"],
- "SHA" => [ 69 ,"SHA"],
- "PREFETCHWT1" => [ 70 ,"PREFETCHWT1"],
- "AVX512VL" => [ 71, "AVX-512 Vector Length Orthogonality"],
- "AVX512DQ" => [ 72, "AVX-512 Dword and Qword"],
- "AVX512BW" => [ 73, "AVX-512 Byte and Word"],
- "AVX512IFMA" => [ 74, "AVX-512 IFMA instructions"],
- "AVX512VBMI" => [ 75, "AVX-512 VBMI instructions"],
- "OBSOLETE" => [ 93, "Instruction removed from architecture"],
- "VEX" => [ 94, "VEX or XOP encoded instruction"],
- "EVEX" => [ 95, "EVEX encoded instruction"],
- "AES" => [ 96, "AES instructions"],
- "VAES" => [ 97, "AES AVX instructions"],
- "VPCLMULQDQ" => [ 98, "Carry-Less Multiplication extention"],
+ # Put these last
+ "OBSOLETE" => [$f++, "Instruction removed from architecture"],
+ "VEX" => [$f++, "VEX or XOP encoded instruction"],
+ "EVEX" => [$f++, "EVEX encoded instruction"],
#
- # dword bound, cpu type flags
+ # dword bound - cpu type flags
#
# The CYRIX and AMD flags should have the highest bit values; the
# disassembler selection algorithm depends on it.
#
- "8086" => [128, "8086"],
- "186" => [129, "186+"],
- "286" => [130, "286+"],
- "386" => [131, "386+"],
- "486" => [132, "486+"],
- "PENT" => [133, "Pentium"],
- "P6" => [134, "P6"],
- "KATMAI" => [135, "Katmai"],
- "WILLAMETTE" => [136, "Willamette"],
- "PRESCOTT" => [137, "Prescott"],
- "X86_64" => [138, "x86-64 (long or legacy mode)"],
- "NEHALEM" => [139, "Nehalem"],
- "WESTMERE" => [140, "Westmere"],
- "SANDYBRIDGE" => [141, "Sandy Bridge"],
- "FUTURE" => [142, "Future processor (not yet disclosed)"],
- "IA64" => [143, "IA64 (in x86 mode)"],
- "CYRIX" => [144, "Cyrix-specific"],
- "AMD" => [145, "AMD-specific"],
+ "8086" => [${dword_align(\$f)}++, "8086"],
+ "186" => [$f++, "186+"],
+ "286" => [$f++, "286+"],
+ "386" => [$f++, "386+"],
+ "486" => [$f++, "486+"],
+ "PENT" => [$f++, "Pentium"],
+ "P6" => [$f++, "P6"],
+ "KATMAI" => [$f++, "Katmai"],
+ "WILLAMETTE" => [$f++, "Willamette"],
+ "PRESCOTT" => [$f++, "Prescott"],
+ "X86_64" => [$f++, "x86-64 (long or legacy mode)"],
+ "NEHALEM" => [$f++, "Nehalem"],
+ "WESTMERE" => [$f++, "Westmere"],
+ "SANDYBRIDGE" => [$f++, "Sandy Bridge"],
+ "FUTURE" => [$f++, "Future processor (not yet disclosed)"],
+ "IA64" => [$f++, "IA64 (in x86 mode)"],
+
+ # Put these last
+ "CYRIX" => [$f++, "Cyrix-specific"],
+ "AMD" => [$f++, "AMD-specific"],
);
my %insns_flag_hash = ();
@@ -176,9 +184,9 @@ my $iflag_words;
sub get_flag_words() {
my $max = -1;
- foreach my $key (keys(%insns_flag_bit)) {
- if (${$insns_flag_bit{$key}}[0] > $max) {
- $max = ${$insns_flag_bit{$key}}[0];
+ foreach my $vp (values(%insns_flag_bit)) {
+ if ($vp->[0] > $max) {
+ $max = $vp->[0];
}
}
@@ -218,14 +226,28 @@ sub write_iflaggen_h() {
print N "#ifndef NASM_IFLAGGEN_H\n";
print N "#define NASM_IFLAGGEN_H 1\n\n";
- foreach my $key (sort { $insns_flag_bit{$a}[0] <=> $insns_flag_bit{$b}[0] } keys(%insns_flag_bit)) {
+ my @flagnames = keys(%insns_flag_bit);
+ @flagnames = sort {
+ $insns_flag_bit{$a}->[0] <=> $insns_flag_bit{$b}->[0]
+ } @flagnames;
+ my $next = 0;
+ foreach my $key (@flagnames) {
+ my $v = $insns_flag_bit{$key};
+ if ($v->[0] > $next) {
+ printf N "%-31s /* %-64s */\n", '',
+ ($next != $v->[0]-1) ?
+ sprintf("%d...%d unused", $next, $v->[0]-1) :
+ sprintf("%d unused", $next);
+ }
print N sprintf("#define IF_%-16s %3d /* %-64s */\n",
- $key, $insns_flag_bit{$key}[0], $insns_flag_bit{$key}[1]);
+ $key, $v->[0], $v->[1]);
+ $next = $v->[0] + 1;
}
print N "\n";
+ printf N "#define IF_FIELD_COUNT %d\n", $iflag_words;
print N "typedef struct {\n";
- printf N " uint32_t field[%d];\n", $iflag_words;
+ print N " uint32_t field[IF_FIELD_COUNT];\n";
print N "} iflag_t;\n";
print N "\n";
diff --git a/x86/insns.dat b/x86/insns.dat
index 99746302..1bda0e3c 100644
--- a/x86/insns.dat
+++ b/x86/insns.dat
@@ -1,6 +1,6 @@
;; --------------------------------------------------------------------------
;;
-;; Copyright 1996-2017 The NASM Authors - All Rights Reserved
+;; Copyright 1996-2018 The NASM Authors - All Rights Reserved
;; See the file AUTHORS included with the NASM distribution for
;; the specific copyright holders.
;;
@@ -1129,6 +1129,24 @@ RETF void [ cb] 8086
RETF imm [i: ca iw] 8086,SW
RETN void [ c3] 8086,BND
RETN imm [i: c2 iw] 8086,SW,BND
+RETW void [ o16 c3] 8086,BND
+RETW imm [i: c2 iw] 8086,SW,BND
+RETFW void [ o16 cb] 8086
+RETFW imm [i: o16 ca iw] 8086,SW
+RETNW void [ o16 c3] 8086,BND
+RETNW imm [i: o16 c2 iw] 8086,SW,BND
+RETD void [ o32 c3] 8086,BND,NOLONG
+RETD imm [i: o32 c2 iw] 8086,SW,BND,NOLONG
+RETFD void [ o32 cb] 8086
+RETFD imm [i: o32 ca iw] 8086,SW
+RETND void [ o32 c3] 8086,BND,NOLONG
+RETND imm [i: o32 c2 iw] 8086,SW,BND,NOLONG
+RETQ void [ o64nw c3] X64,BND
+RETQ imm [i: o64nw c2 iw] X64,SW,BND
+RETFQ void [ o64 cb] X64
+RETFQ imm [i: o64 ca iw] X64,SW
+RETNQ void [ o64nw c3] X64,BND
+RETNQ imm [i: o64nw c2 iw] X64,SW,BND
ROL rm8,unity [m-: d0 /0] 8086
ROL rm8,reg_cl [m-: d2 /0] 8086