From 9804c67d2192887c3603eafd295766b061da0f38 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 10 Mar 2018 03:12:00 +0000 Subject: [PATCH] [X86] Rewrite printMasking code in X86InstComments to use TSFlags to determine whether the instruction is masked. This should have been NFC, but it looks like we were missing PUNPCKLHQDQ/PUNPCKLQDQ instructions in there. llvm-svn: 327200 --- .../Target/X86/InstPrinter/X86ATTInstPrinter.cpp | 2 +- .../lib/Target/X86/InstPrinter/X86InstComments.cpp | 203 ++------------------- llvm/lib/Target/X86/InstPrinter/X86InstComments.h | 2 + .../Target/X86/InstPrinter/X86IntelInstPrinter.cpp | 2 +- .../CodeGen/X86/avx512-intrinsics-fast-isel.ll | 16 +- llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 6 +- .../CodeGen/X86/avx512-shuffles/partial_permute.ll | 16 +- .../CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 8 +- 8 files changed, 43 insertions(+), 212 deletions(-) diff --git a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index d12706e..5860285 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -48,7 +48,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, // If verbose assembly is enabled, we can print some informative comments. if (CommentStream) HasCustomInstComment = - EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); + EmitAnyX86InstComments(MI, *CommentStream, MII, getRegisterName); unsigned Flags = MI->getFlags(); if ((TSFlags & X86II::LOCK) || (Flags & X86::IP_HAS_LOCK)) diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp index a46f22f..2939597 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -13,10 +13,12 @@ //===----------------------------------------------------------------------===// #include "X86InstComments.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "Utils/X86ShuffleDecode.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -206,195 +208,21 @@ static MVT getZeroExtensionResultType(const MCInst *MI) { /// Wraps the destination register name with AVX512 mask/maskz filtering. static void printMasking(raw_ostream &OS, const MCInst *MI, + const MCInstrInfo &MCII, const char *(*getRegName)(unsigned)) { - bool MaskWithZero = false; - const char *MaskRegName = nullptr; + const MCInstrDesc &Desc = MCII.get(MI->getOpcode()); + uint64_t TSFlags = Desc.TSFlags; - switch (MI->getOpcode()) { - default: + if (!(TSFlags & X86II::EVEX_K)) return; - CASE_MASKZ_MOVDUP(MOVDDUP, m) - CASE_MASKZ_MOVDUP(MOVDDUP, r) - CASE_MASKZ_MOVDUP(MOVSHDUP, m) - CASE_MASKZ_MOVDUP(MOVSHDUP, r) - CASE_MASKZ_MOVDUP(MOVSLDUP, m) - CASE_MASKZ_MOVDUP(MOVSLDUP, r) - CASE_MASKZ_PMOVZX(PMOVZXBD, m) - CASE_MASKZ_PMOVZX(PMOVZXBD, r) - CASE_MASKZ_PMOVZX(PMOVZXBQ, m) - CASE_MASKZ_PMOVZX(PMOVZXBQ, r) - CASE_MASKZ_PMOVZX(PMOVZXBW, m) - CASE_MASKZ_PMOVZX(PMOVZXBW, r) - CASE_MASKZ_PMOVZX(PMOVZXDQ, m) - CASE_MASKZ_PMOVZX(PMOVZXDQ, r) - CASE_MASKZ_PMOVZX(PMOVZXWD, m) - CASE_MASKZ_PMOVZX(PMOVZXWD, r) - CASE_MASKZ_PMOVZX(PMOVZXWQ, m) - CASE_MASKZ_PMOVZX(PMOVZXWQ, r) - CASE_MASKZ_UNPCK(PUNPCKHBW, m) - CASE_MASKZ_UNPCK(PUNPCKHBW, r) - CASE_MASKZ_UNPCK(PUNPCKHWD, m) - CASE_MASKZ_UNPCK(PUNPCKHWD, r) - CASE_MASKZ_UNPCK(PUNPCKHDQ, m) - CASE_MASKZ_UNPCK(PUNPCKHDQ, r) - CASE_MASKZ_UNPCK(PUNPCKLBW, m) - CASE_MASKZ_UNPCK(PUNPCKLBW, r) - CASE_MASKZ_UNPCK(PUNPCKLWD, m) - CASE_MASKZ_UNPCK(PUNPCKLWD, r) - CASE_MASKZ_UNPCK(PUNPCKLDQ, m) - CASE_MASKZ_UNPCK(PUNPCKLDQ, r) - CASE_MASKZ_UNPCK(UNPCKHPD, m) - CASE_MASKZ_UNPCK(UNPCKHPD, r) - CASE_MASKZ_UNPCK(UNPCKHPS, m) - CASE_MASKZ_UNPCK(UNPCKHPS, r) - CASE_MASKZ_UNPCK(UNPCKLPD, m) - CASE_MASKZ_UNPCK(UNPCKLPD, r) - CASE_MASKZ_UNPCK(UNPCKLPS, m) - CASE_MASKZ_UNPCK(UNPCKLPS, r) - CASE_MASKZ_SHUF(PALIGNR, r) - CASE_MASKZ_SHUF(PALIGNR, m) - CASE_MASKZ_SHUF(ALIGNQ, r) - CASE_MASKZ_SHUF(ALIGNQ, m) - CASE_MASKZ_SHUF(ALIGND, r) - CASE_MASKZ_SHUF(ALIGND, m) - CASE_MASKZ_SHUF(SHUFPD, m) - CASE_MASKZ_SHUF(SHUFPD, r) - CASE_MASKZ_SHUF(SHUFPS, m) - CASE_MASKZ_SHUF(SHUFPS, r) - CASE_MASKZ_VPERMILPI(PERMILPD, m) - CASE_MASKZ_VPERMILPI(PERMILPD, r) - CASE_MASKZ_VPERMILPI(PERMILPS, m) - CASE_MASKZ_VPERMILPI(PERMILPS, r) - CASE_MASKZ_VPERMILPI(PSHUFD, m) - CASE_MASKZ_VPERMILPI(PSHUFD, r) - CASE_MASKZ_VPERMILPI(PSHUFHW, m) - CASE_MASKZ_VPERMILPI(PSHUFHW, r) - CASE_MASKZ_VPERMILPI(PSHUFLW, m) - CASE_MASKZ_VPERMILPI(PSHUFLW, r) - CASE_MASKZ_VPERM(PERMPD, m) - CASE_MASKZ_VPERM(PERMPD, r) - CASE_MASKZ_VPERM(PERMQ, m) - CASE_MASKZ_VPERM(PERMQ, r) - CASE_MASKZ_VSHUF(64X2, m) - CASE_MASKZ_VSHUF(64X2, r) - CASE_MASKZ_VSHUF(32X4, m) - CASE_MASKZ_VSHUF(32X4, r) - CASE_MASKZ_INS_COMMON(BROADCASTF64X2, Z128, rm) - CASE_MASKZ_INS_COMMON(BROADCASTI64X2, Z128, rm) - CASE_MASKZ_INS_COMMON(BROADCASTF64X2, , rm) - CASE_MASKZ_INS_COMMON(BROADCASTI64X2, , rm) - CASE_MASKZ_INS_COMMON(BROADCASTF64X4, , rm) - CASE_MASKZ_INS_COMMON(BROADCASTI64X4, , rm) - CASE_MASKZ_INS_COMMON(BROADCASTF32X4, Z256, rm) - CASE_MASKZ_INS_COMMON(BROADCASTI32X4, Z256, rm) - CASE_MASKZ_INS_COMMON(BROADCASTF32X4, , rm) - CASE_MASKZ_INS_COMMON(BROADCASTI32X4, , rm) - CASE_MASKZ_INS_COMMON(BROADCASTF32X8, , rm) - CASE_MASKZ_INS_COMMON(BROADCASTI32X8, , rm) - CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z128, r) - CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z128, m) - CASE_MASKZ_INS_COMMON(BROADCASTF32X2, Z256, r) - CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z256, r) - CASE_MASKZ_INS_COMMON(BROADCASTF32X2, Z256, m) - CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z256, m) - CASE_MASKZ_INS_COMMON(BROADCASTF32X2, Z, r) - CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z, r) - CASE_MASKZ_INS_COMMON(BROADCASTF32X2, Z, m) - CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z, m) - MaskWithZero = true; - MaskRegName = getRegName(MI->getOperand(1).getReg()); - break; - CASE_MASK_MOVDUP(MOVDDUP, m) - CASE_MASK_MOVDUP(MOVDDUP, r) - CASE_MASK_MOVDUP(MOVSHDUP, m) - CASE_MASK_MOVDUP(MOVSHDUP, r) - CASE_MASK_MOVDUP(MOVSLDUP, m) - CASE_MASK_MOVDUP(MOVSLDUP, r) - CASE_MASK_PMOVZX(PMOVZXBD, m) - CASE_MASK_PMOVZX(PMOVZXBD, r) - CASE_MASK_PMOVZX(PMOVZXBQ, m) - CASE_MASK_PMOVZX(PMOVZXBQ, r) - CASE_MASK_PMOVZX(PMOVZXBW, m) - CASE_MASK_PMOVZX(PMOVZXBW, r) - CASE_MASK_PMOVZX(PMOVZXDQ, m) - CASE_MASK_PMOVZX(PMOVZXDQ, r) - CASE_MASK_PMOVZX(PMOVZXWD, m) - CASE_MASK_PMOVZX(PMOVZXWD, r) - CASE_MASK_PMOVZX(PMOVZXWQ, m) - CASE_MASK_PMOVZX(PMOVZXWQ, r) - CASE_MASK_UNPCK(PUNPCKHBW, m) - CASE_MASK_UNPCK(PUNPCKHBW, r) - CASE_MASK_UNPCK(PUNPCKHWD, m) - CASE_MASK_UNPCK(PUNPCKHWD, r) - CASE_MASK_UNPCK(PUNPCKHDQ, m) - CASE_MASK_UNPCK(PUNPCKHDQ, r) - CASE_MASK_UNPCK(PUNPCKLBW, m) - CASE_MASK_UNPCK(PUNPCKLBW, r) - CASE_MASK_UNPCK(PUNPCKLWD, m) - CASE_MASK_UNPCK(PUNPCKLWD, r) - CASE_MASK_UNPCK(PUNPCKLDQ, m) - CASE_MASK_UNPCK(PUNPCKLDQ, r) - CASE_MASK_UNPCK(UNPCKHPD, m) - CASE_MASK_UNPCK(UNPCKHPD, r) - CASE_MASK_UNPCK(UNPCKHPS, m) - CASE_MASK_UNPCK(UNPCKHPS, r) - CASE_MASK_UNPCK(UNPCKLPD, m) - CASE_MASK_UNPCK(UNPCKLPD, r) - CASE_MASK_UNPCK(UNPCKLPS, m) - CASE_MASK_UNPCK(UNPCKLPS, r) - CASE_MASK_SHUF(PALIGNR, r) - CASE_MASK_SHUF(PALIGNR, m) - CASE_MASK_SHUF(ALIGNQ, r) - CASE_MASK_SHUF(ALIGNQ, m) - CASE_MASK_SHUF(ALIGND, r) - CASE_MASK_SHUF(ALIGND, m) - CASE_MASK_SHUF(SHUFPD, m) - CASE_MASK_SHUF(SHUFPD, r) - CASE_MASK_SHUF(SHUFPS, m) - CASE_MASK_SHUF(SHUFPS, r) - CASE_MASK_VPERMILPI(PERMILPD, m) - CASE_MASK_VPERMILPI(PERMILPD, r) - CASE_MASK_VPERMILPI(PERMILPS, m) - CASE_MASK_VPERMILPI(PERMILPS, r) - CASE_MASK_VPERMILPI(PSHUFD, m) - CASE_MASK_VPERMILPI(PSHUFD, r) - CASE_MASK_VPERMILPI(PSHUFHW, m) - CASE_MASK_VPERMILPI(PSHUFHW, r) - CASE_MASK_VPERMILPI(PSHUFLW, m) - CASE_MASK_VPERMILPI(PSHUFLW, r) - CASE_MASK_VPERM(PERMPD, m) - CASE_MASK_VPERM(PERMPD, r) - CASE_MASK_VPERM(PERMQ, m) - CASE_MASK_VPERM(PERMQ, r) - CASE_MASK_VSHUF(64X2, m) - CASE_MASK_VSHUF(64X2, r) - CASE_MASK_VSHUF(32X4, m) - CASE_MASK_VSHUF(32X4, r) - CASE_MASK_INS_COMMON(BROADCASTF64X2, Z128, rm) - CASE_MASK_INS_COMMON(BROADCASTI64X2, Z128, rm) - CASE_MASK_INS_COMMON(BROADCASTF64X2, , rm) - CASE_MASK_INS_COMMON(BROADCASTI64X2, , rm) - CASE_MASK_INS_COMMON(BROADCASTF64X4, , rm) - CASE_MASK_INS_COMMON(BROADCASTI64X4, , rm) - CASE_MASK_INS_COMMON(BROADCASTF32X4, Z256, rm) - CASE_MASK_INS_COMMON(BROADCASTI32X4, Z256, rm) - CASE_MASK_INS_COMMON(BROADCASTF32X4, , rm) - CASE_MASK_INS_COMMON(BROADCASTI32X4, , rm) - CASE_MASK_INS_COMMON(BROADCASTF32X8, , rm) - CASE_MASK_INS_COMMON(BROADCASTI32X8, , rm) - CASE_MASK_INS_COMMON(BROADCASTI32X2, Z128, r) - CASE_MASK_INS_COMMON(BROADCASTI32X2, Z128, m) - CASE_MASK_INS_COMMON(BROADCASTF32X2, Z256, r) - CASE_MASK_INS_COMMON(BROADCASTI32X2, Z256, r) - CASE_MASK_INS_COMMON(BROADCASTF32X2, Z256, m) - CASE_MASK_INS_COMMON(BROADCASTI32X2, Z256, m) - CASE_MASK_INS_COMMON(BROADCASTF32X2, Z, r) - CASE_MASK_INS_COMMON(BROADCASTI32X2, Z, r) - CASE_MASK_INS_COMMON(BROADCASTF32X2, Z, m) - CASE_MASK_INS_COMMON(BROADCASTI32X2, Z, m) - MaskRegName = getRegName(MI->getOperand(2).getReg()); - break; - } + + bool MaskWithZero = (TSFlags & X86II::EVEX_Z); + unsigned MaskOp = Desc.getNumDefs(); + + if (Desc.getOperandConstraint(MaskOp, MCOI::TIED_TO) != -1) + ++MaskOp; + + const char *MaskRegName = getRegName(MI->getOperand(MaskOp).getReg()); // MASK: zmmX {%kY} OS << " {%" << MaskRegName << "}"; @@ -412,6 +240,7 @@ static void printMasking(raw_ostream &OS, const MCInst *MI, /// newline terminated strings to the specified string if desired. This /// information is shown in disassembly dumps when verbose assembly is enabled. bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, + const MCInstrInfo &MCII, const char *(*getRegName)(unsigned)) { // If this is a shuffle operation, the switch should fill in this state. SmallVector ShuffleMask; @@ -1156,7 +985,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, if (!DestName) DestName = Src1Name; if (DestName) { OS << DestName; - printMasking(OS, MI, getRegName); + printMasking(OS, MI, MCII, getRegName); } else OS << "mem"; diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.h b/llvm/lib/Target/X86/InstPrinter/X86InstComments.h index 629c02c..b128279 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.h +++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.h @@ -25,8 +25,10 @@ namespace llvm { }; class MCInst; + class MCInstrInfo; class raw_ostream; bool EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, + const MCInstrInfo &MCII, const char *(*getRegName)(unsigned)); } diff --git a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 79a8e30..ccfe140 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -56,7 +56,7 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, // If verbose assembly is enabled, we can print some informative comments. if (CommentStream) - EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); + EmitAnyX86InstComments(MI, *CommentStream, MII, getRegisterName); } void X86IntelInstPrinter::printSSEAVXCC(const MCInst *MI, unsigned Op, diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index 9b0e301..2758353 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -1383,13 +1383,13 @@ define <8 x i64> @test_mm512_mask_unpackhi_epi64(<8 x i64> %a0, i8 %a1, <8 x i64 ; X32: # %bb.0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7] +; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_mask_unpackhi_epi64: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7] +; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7] ; X64-NEXT: retq %arg1 = bitcast i8 %a1 to <8 x i1> %res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> @@ -1402,13 +1402,13 @@ define <8 x i64> @test_mm512_maskz_unpackhi_epi64(i8 %a0, <8 x i64> %a1, <8 x i6 ; X32: # %bb.0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_maskz_unpackhi_epi64: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; X64-NEXT: retq %arg0 = bitcast i8 %a0 to <8 x i1> %res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> @@ -1597,13 +1597,13 @@ define <8 x i64> @test_mm512_mask_unpacklo_epi64(<8 x i64> %a0, i8 %a1, <8 x i64 ; X32: # %bb.0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] +; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_mask_unpacklo_epi64: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] +; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] ; X64-NEXT: retq %arg1 = bitcast i8 %a1 to <8 x i1> %res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> @@ -1616,13 +1616,13 @@ define <8 x i64> @test_mm512_maskz_unpacklo_epi64(i8 %a0, <8 x i64> %a1, <8 x i6 ; X32: # %bb.0: ; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: kmovw %eax, %k1 -; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_maskz_unpacklo_epi64: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; X64-NEXT: retq %arg0 = bitcast i8 %a0 to <8 x i1> %res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index 000dc1c..8cff13a 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -725,9 +725,9 @@ define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i6 ; CHECK: ## %bb.0: ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm2 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) @@ -745,7 +745,7 @@ define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i6 ; CHECK: ## %bb.0: ; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm2 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index c4eeda3..62c83f3 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -1904,7 +1904,7 @@ define <2 x i64> @test_masked_4xi64_to_2xi64_perm_mask0(<4 x i64> %vec, <2 x i64 ; CHECK: # %bb.0: ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3 ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm0[0] +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm1 {%k1} = xmm3[0],xmm0[0] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1919,7 +1919,7 @@ define <2 x i64> @test_masked_z_4xi64_to_2xi64_perm_mask0(<4 x i64> %vec, <2 x i ; CHECK: # %bb.0: ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 {%k1} {z} = xmm2[0],xmm0[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <2 x i32> @@ -1974,7 +1974,7 @@ define <2 x i64> @test_masked_4xi64_to_2xi64_perm_mem_mask0(<4 x i64>* %vp, <2 x ; CHECK-NEXT: vmovdqa (%rdi), %ymm2 ; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3 ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 -; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm3[1] +; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 {%k1} = xmm2[1],xmm3[1] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp @@ -1990,7 +1990,7 @@ define <2 x i64> @test_masked_z_4xi64_to_2xi64_perm_mem_mask0(<4 x i64>* %vp, <2 ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1] +; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 {%k1} {z} = xmm1[1],xmm2[1] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp @@ -2311,7 +2311,7 @@ define <2 x i64> @test_masked_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec, <2 x i64 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3 ; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] ; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm0[0] +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm1 {%k1} = xmm3[0],xmm0[0] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -2327,7 +2327,7 @@ define <2 x i64> @test_masked_z_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec, <2 x i ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2 ; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 {%k1} {z} = xmm2[0],xmm0[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> @@ -2681,7 +2681,7 @@ define <2 x i64> @test_masked_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp, <2 x ; CHECK-NEXT: vextracti32x4 $2, %zmm2, %xmm3 ; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm2[0] +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 {%k1} = xmm3[0],xmm2[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp @@ -2698,7 +2698,7 @@ define <2 x i64> @test_masked_z_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp, <2 ; CHECK-NEXT: vextracti32x4 $2, %zmm1, %xmm2 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 {%k1} {z} = xmm2[0],xmm1[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index 81c36a4..41809ce 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -1484,7 +1484,7 @@ define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i6 ; CHECK-NEXT: ## xmm3 = xmm0[1],xmm1[1] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1] -; CHECK-NEXT: ## xmm2 = xmm0[1],xmm1[1] +; CHECK-NEXT: ## xmm2 {%k1} = xmm0[1],xmm1[1] ; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) @@ -1502,7 +1502,7 @@ define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i6 ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1] -; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[0] +; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0] ; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) @@ -1520,7 +1520,7 @@ define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i6 ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1] -; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) @@ -1538,7 +1538,7 @@ define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i6 ; CHECK-NEXT: ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1] -; CHECK-NEXT: ## ymm2 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; CHECK-NEXT: ## ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) -- 2.7.4