aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin (Intel) <hpa@zytor.com>2020-07-30 16:56:52 -0700
committerH. Peter Anvin (Intel) <hpa@zytor.com>2020-07-30 16:56:52 -0700
commitbae6b070ffdcf250fb84abc75768daa5ed2391f5 (patch)
treee292cc435d2cb100528aebf18c8508783ce7d6e0
parent6299a3114ce0f3acd55d07de201a8ca2f0a83059 (diff)
downloadnasm-bae6b070ffdcf250fb84abc75768daa5ed2391f5.tar.gz
nasm-bae6b070ffdcf250fb84abc75768daa5ed2391f5.tar.xz
nasm-bae6b070ffdcf250fb84abc75768daa5ed2391f5.zip
BR 3392705: AVX512: reinstate the SSE-like opcodes for VPCMPEQ/GT
The VPCMP instructions are controlled by an immediate byte, but there is also a set of SSE-derived legacy opcodes for VPCMPEQ and VPCMPGT. For the specific cases of VPCMPEQ and VPCMPGT, prefer those opcodes since they are one byte shorter. Reported-by: ig <glucksmann@avast.com> Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
-rw-r--r--test/vpcmp.asm27
-rw-r--r--x86/insns.dat27
2 files changed, 54 insertions, 0 deletions
diff --git a/test/vpcmp.asm b/test/vpcmp.asm
new file mode 100644
index 00000000..16377cb0
--- /dev/null
+++ b/test/vpcmp.asm
@@ -0,0 +1,27 @@
+ bits 64
+ vpcmpeqb k2{k2},zmm0,zmm1
+ vpcmpgtb k2{k2},zmm0,zmm1
+ vpcmpeqw k2{k2},zmm0,zmm1
+ vpcmpgtw k2{k2},zmm0,zmm1
+ vpcmpeqd k2{k2},zmm0,zmm1
+ vpcmpgtd k2{k2},zmm0,zmm1
+ vpcmpeqq k2{k2},zmm0,zmm1
+ vpcmpgtq k2{k2},zmm0,zmm1
+
+ vpcmpb k2{k2},zmm0,zmm1,0
+ vpcmpb k2{k2},zmm0,zmm1,6
+ vpcmpw k2{k2},zmm0,zmm1,0
+ vpcmpw k2{k2},zmm0,zmm1,6
+ vpcmpd k2{k2},zmm0,zmm1,0
+ vpcmpd k2{k2},zmm0,zmm1,6
+ vpcmpq k2{k2},zmm0,zmm1,0
+ vpcmpq k2{k2},zmm0,zmm1,6
+
+ vpcmpneqb k2{k2},zmm0,zmm1
+ vpcmpleb k2{k2},zmm0,zmm1
+ vpcmpneqw k2{k2},zmm0,zmm1
+ vpcmplew k2{k2},zmm0,zmm1
+ vpcmpneqd k2{k2},zmm0,zmm1
+ vpcmpled k2{k2},zmm0,zmm1
+ vpcmpneqq k2{k2},zmm0,zmm1
+ vpcmpleq k2{k2},zmm0,zmm1
diff --git a/x86/insns.dat b/x86/insns.dat
index e24c2479..18b573a6 100644
--- a/x86/insns.dat
+++ b/x86/insns.dat
@@ -4811,6 +4811,33 @@ VPBROADCASTW ymmreg|mask|z,reg64 [rm: evex.256.66.0f38.w0 7b
VPBROADCASTW zmmreg|mask|z,reg16 [rm: evex.512.66.0f38.w0 7b /r ] AVX512BW,FUTURE
VPBROADCASTW zmmreg|mask|z,reg32 [rm: evex.512.66.0f38.w0 7b /r ] AVX512BW,FUTURE
VPBROADCASTW zmmreg|mask|z,reg64 [rm: evex.512.66.0f38.w0 7b /r ] AVX512BW,FUTURE
+; VPCMPEQx and VPCMPGTx come in two flavors: SSE-like, and VPCMP with immediate. They are both
+; valid, but prefer the SSE version as it is one byte shorter.
+VPCMPEQB kreg|mask,xmmreg,xmmrm128 [rvm:fvm: evex.nds.128.66.0f.wig 74 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPEQB kreg|mask,ymmreg,ymmrm256 [rvm:fvm: evex.nds.256.66.0f.wig 74 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPEQB kreg|mask,zmmreg,zmmrm512 [rvm:fvm: evex.nds.512.66.0f.wig 74 /r ] AVX512BW,FUTURE
+VPCMPEQD kreg|mask,xmmreg,xmmrm128|b32 [rvm:fv: evex.nds.128.66.0f.w0 76 /r ] AVX512VL,AVX512,FUTURE
+VPCMPEQD kreg|mask,ymmreg,ymmrm256|b32 [rvm:fv: evex.nds.256.66.0f.w0 76 /r ] AVX512VL,AVX512,FUTURE
+VPCMPEQD kreg|mask,zmmreg,zmmrm512|b32 [rvm:fv: evex.nds.512.66.0f.w0 76 /r ] AVX512,FUTURE
+VPCMPEQQ kreg|mask,xmmreg,xmmrm128|b64 [rvm:fv: evex.nds.128.66.0f38.w1 29 /r ] AVX512VL,AVX512,FUTURE
+VPCMPEQQ kreg|mask,ymmreg,ymmrm256|b64 [rvm:fv: evex.nds.256.66.0f38.w1 29 /r ] AVX512VL,AVX512,FUTURE
+VPCMPEQQ kreg|mask,zmmreg,zmmrm512|b64 [rvm:fv: evex.nds.512.66.0f38.w1 29 /r ] AVX512,FUTURE
+VPCMPEQW kreg|mask,xmmreg,xmmrm128 [rvm:fvm: evex.nds.128.66.0f.wig 75 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPEQW kreg|mask,ymmreg,ymmrm256 [rvm:fvm: evex.nds.256.66.0f.wig 75 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPEQW kreg|mask,zmmreg,zmmrm512 [rvm:fvm: evex.nds.512.66.0f.wig 75 /r ] AVX512BW,FUTURE
+VPCMPGTB kreg|mask,xmmreg,xmmrm128 [rvm:fvm: evex.nds.128.66.0f.wig 64 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPGTB kreg|mask,ymmreg,ymmrm256 [rvm:fvm: evex.nds.256.66.0f.wig 64 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPGTB kreg|mask,zmmreg,zmmrm512 [rvm:fvm: evex.nds.512.66.0f.wig 64 /r ] AVX512BW,FUTURE
+VPCMPGTD kreg|mask,xmmreg,xmmrm128|b32 [rvm:fv: evex.nds.128.66.0f.w0 66 /r ] AVX512VL,AVX512,FUTURE
+VPCMPGTD kreg|mask,ymmreg,ymmrm256|b32 [rvm:fv: evex.nds.256.66.0f.w0 66 /r ] AVX512VL,AVX512,FUTURE
+VPCMPGTD kreg|mask,zmmreg,zmmrm512|b32 [rvm:fv: evex.nds.512.66.0f.w0 66 /r ] AVX512,FUTURE
+VPCMPGTQ kreg|mask,xmmreg,xmmrm128|b64 [rvm:fv: evex.nds.128.66.0f38.w1 37 /r ] AVX512VL,AVX512,FUTURE
+VPCMPGTQ kreg|mask,ymmreg,ymmrm256|b64 [rvm:fv: evex.nds.256.66.0f38.w1 37 /r ] AVX512VL,AVX512,FUTURE
+VPCMPGTQ kreg|mask,zmmreg,zmmrm512|b64 [rvm:fv: evex.nds.512.66.0f38.w1 37 /r ] AVX512,FUTURE
+VPCMPGTW kreg|mask,xmmreg,xmmrm128 [rvm:fvm: evex.nds.128.66.0f.wig 65 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPGTW kreg|mask,ymmreg,ymmrm256 [rvm:fvm: evex.nds.256.66.0f.wig 65 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPGTW kreg|mask,zmmreg,zmmrm512 [rvm:fvm: evex.nds.512.66.0f.wig 65 /r ] AVX512BW,FUTURE
+; The systematic VPCMP with immediate instructions
VPCMPEQB kreg|mask,xmmreg,xmmrm128 [rvmi:fvm: evex.nds.128.66.0f3a.w0 3f /r 00 ] AVX512VL,AVX512BW,FUTURE
VPCMPEQB kreg|mask,ymmreg,ymmrm256 [rvmi:fvm: evex.nds.256.66.0f3a.w0 3f /r 00 ] AVX512VL,AVX512BW,FUTURE
VPCMPEQB kreg|mask,zmmreg,zmmrm512 [rvmi:fvm: evex.nds.512.66.0f3a.w0 3f /r 00 ] AVX512BW,FUTURE