[llvm-exegesis] `ExegesisX86Target::setRegTo()`: support mask (K) regs
authorRoman Lebedev <lebedev.ri@gmail.com>
Sun, 11 Dec 2022 01:49:18 +0000 (04:49 +0300)
committerRoman Lebedev <lebedev.ri@gmail.com>
Sun, 11 Dec 2022 02:03:00 +0000 (05:03 +0300)
This only supports the obvious case, where the requested width
is supported by the available ISA sed, and there is
an appropriate, or wider, `KMOV?` instruction.

This doesn't deal with the 32/64 bit mask without BW instruction set.

This was the missing functionality that was
causing crashes in fd52305fdc7572534867247c8fb66093faf52e5c.

llvm/tools/llvm-exegesis/lib/X86/Target.cpp
llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp

index 8ab6a67..3770431 100644 (file)
@@ -880,6 +880,35 @@ std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI,
     return {loadImmediate(Reg, 32, Value)};
   if (X86::GR64RegClass.contains(Reg))
     return {loadImmediate(Reg, 64, Value)};
+  if (X86::VK8RegClass.contains(Reg) || X86::VK16RegClass.contains(Reg) ||
+      X86::VK32RegClass.contains(Reg) || X86::VK64RegClass.contains(Reg)) {
+    switch (Value.getBitWidth()) {
+    case 8:
+      if (STI.getFeatureBits()[X86::FeatureDQI]) {
+        ConstantInliner CI(Value);
+        return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVBkm);
+      }
+      [[fallthrough]];
+    case 16:
+      if (STI.getFeatureBits()[X86::FeatureAVX512]) {
+        ConstantInliner CI(Value.zextOrTrunc(16));
+        return CI.loadAndFinalize(Reg, 16, X86::KMOVWkm);
+      }
+      break;
+    case 32:
+      if (STI.getFeatureBits()[X86::FeatureBWI]) {
+        ConstantInliner CI(Value);
+        return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVDkm);
+      }
+      break;
+    case 64:
+      if (STI.getFeatureBits()[X86::FeatureBWI]) {
+        ConstantInliner CI(Value);
+        return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVQkm);
+      }
+      break;
+    }
+  }
   ConstantInliner CI(Value);
   if (X86::VR64RegClass.contains(Reg))
     return CI.loadAndFinalize(Reg, 64, X86::MMX_MOVQ64rm);
index a76dddc..cc3882a 100644 (file)
@@ -55,6 +55,7 @@ using testing::ElementsAre;
 using testing::ElementsAreArray;
 using testing::Eq;
 using testing::Gt;
+using testing::IsEmpty;
 using testing::Matcher;
 using testing::NotNull;
 using testing::Property;
@@ -142,6 +143,21 @@ public:
   X86Core2Avx512TargetTest() : X86TargetTest("+avx512vl") {}
 };
 
+class X86Core2Avx512DQTargetTest : public X86TargetTest {
+public:
+  X86Core2Avx512DQTargetTest() : X86TargetTest("+avx512dq") {}
+};
+
+class X86Core2Avx512BWTargetTest : public X86TargetTest {
+public:
+  X86Core2Avx512BWTargetTest() : X86TargetTest("+avx512bw") {}
+};
+
+class X86Core2Avx512DQBWTargetTest : public X86TargetTest {
+public:
+  X86Core2Avx512DQBWTargetTest() : X86TargetTest("+avx512dq,+avx512bw") {}
+};
+
 TEST_F(X86Core2TargetTest, NoHighByteRegs) {
   EXPECT_TRUE(State.getRATC().reservedRegisters().test(X86::AH));
 }
@@ -291,6 +307,174 @@ TEST_F(X86Core2Avx512TargetTest, SetRegToVR512Value) {
                         IsStackDeallocate(64)}));
 }
 
+TEST_F(X86Core2Avx512TargetTest, SetRegToK0_16Bits) {
+  const uint16_t Value = 0xABCDU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 16;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)),
+              ElementsAre(IsStackAllocate(2),
+                          IsMovValueToStack(X86::MOV16mi, Value, 0),
+                          IsMovValueFromStack(X86::KMOVWkm, Reg),
+                          IsStackDeallocate(2)));
+}
+
+TEST_F(X86Core2Avx512DQTargetTest, SetRegToK0_16Bits) {
+  const uint16_t Value = 0xABCDU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 16;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)),
+              ElementsAre(IsStackAllocate(2),
+                          IsMovValueToStack(X86::MOV16mi, Value, 0),
+                          IsMovValueFromStack(X86::KMOVWkm, Reg),
+                          IsStackDeallocate(2)));
+}
+
+TEST_F(X86Core2Avx512BWTargetTest, SetRegToK0_16Bits) {
+  const uint16_t Value = 0xABCDU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 16;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)),
+              ElementsAre(IsStackAllocate(RegBitWidth / 8),
+                          IsMovValueToStack(X86::MOV16mi, Value, 0),
+                          IsMovValueFromStack(X86::KMOVWkm, Reg),
+                          IsStackDeallocate(RegBitWidth / 8)));
+}
+
+TEST_F(X86Core2Avx512DQBWTargetTest, SetRegToK0_16Bits) {
+  const uint16_t Value = 0xABCDU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 16;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)),
+              ElementsAre(IsStackAllocate(RegBitWidth / 8),
+                          IsMovValueToStack(X86::MOV16mi, Value, 0),
+                          IsMovValueFromStack(X86::KMOVWkm, Reg),
+                          IsStackDeallocate(RegBitWidth / 8)));
+}
+
+TEST_F(X86Core2Avx512TargetTest, SetRegToK0_8Bits) {
+  const uint8_t Value = 0xABU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 8;
+  EXPECT_THAT(
+      setRegTo(Reg, APInt(RegBitWidth, Value)),
+      ElementsAre(IsStackAllocate(2),
+                  IsMovValueToStack(
+                      X86::MOV16mi,
+                      APInt(RegBitWidth, Value).zext(16).getZExtValue(), 0),
+                  IsMovValueFromStack(X86::KMOVWkm, Reg),
+                  IsStackDeallocate(2)));
+}
+
+TEST_F(X86Core2Avx512DQTargetTest, SetRegToK0_8Bits) {
+  const uint8_t Value = 0xABU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 8;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)),
+              ElementsAre(IsStackAllocate(RegBitWidth / 8),
+                          IsMovValueToStack(X86::MOV8mi, Value, 0),
+                          IsMovValueFromStack(X86::KMOVBkm, Reg),
+                          IsStackDeallocate(RegBitWidth / 8)));
+}
+
+TEST_F(X86Core2Avx512BWTargetTest, SetRegToK0_8Bits) {
+  const uint8_t Value = 0xABU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 8;
+  EXPECT_THAT(
+      setRegTo(Reg, APInt(RegBitWidth, Value)),
+      ElementsAre(IsStackAllocate(2),
+                  IsMovValueToStack(
+                      X86::MOV16mi,
+                      APInt(RegBitWidth, Value).zext(16).getZExtValue(), 0),
+                  IsMovValueFromStack(X86::KMOVWkm, Reg),
+                  IsStackDeallocate(2)));
+}
+
+TEST_F(X86Core2Avx512DQBWTargetTest, SetRegToK0_8Bits) {
+  const uint8_t Value = 0xABU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 8;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)),
+              ElementsAre(IsStackAllocate(RegBitWidth / 8),
+                          IsMovValueToStack(X86::MOV8mi, Value, 0),
+                          IsMovValueFromStack(X86::KMOVBkm, Reg),
+                          IsStackDeallocate(RegBitWidth / 8)));
+}
+
+TEST_F(X86Core2Avx512TargetTest, SetRegToK0_32Bits) {
+  const uint32_t Value = 0xABCDCABDU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 32;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)), IsEmpty());
+}
+
+TEST_F(X86Core2Avx512DQTargetTest, SetRegToK0_32Bits) {
+  const uint32_t Value = 0xABCDCABDU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 32;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)), IsEmpty());
+}
+
+TEST_F(X86Core2Avx512BWTargetTest, SetRegToK0_32Bits) {
+  const uint32_t Value = 0xABCDCABDU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 32;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)),
+              ElementsAre(IsStackAllocate(RegBitWidth / 8),
+                          IsMovValueToStack(X86::MOV32mi, Value, 0),
+                          IsMovValueFromStack(X86::KMOVDkm, Reg),
+                          IsStackDeallocate(RegBitWidth / 8)));
+}
+
+TEST_F(X86Core2Avx512DQBWTargetTest, SetRegToK0_32Bits) {
+  const uint32_t Value = 0xABCDCABDU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 32;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)),
+              ElementsAre(IsStackAllocate(RegBitWidth / 8),
+                          IsMovValueToStack(X86::MOV32mi, Value, 0),
+                          IsMovValueFromStack(X86::KMOVDkm, Reg),
+                          IsStackDeallocate(RegBitWidth / 8)));
+}
+
+TEST_F(X86Core2Avx512TargetTest, SetRegToK0_64Bits) {
+  const uint64_t Value = 0xABCDABCDCABDCABDU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 64;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)), IsEmpty());
+}
+
+TEST_F(X86Core2Avx512DQTargetTest, SetRegToK0_64Bits) {
+  const uint64_t Value = 0xABCDABCDCABDCABDU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 64;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)), IsEmpty());
+}
+
+TEST_F(X86Core2Avx512BWTargetTest, SetRegToK0_64Bits) {
+  const uint64_t Value = 0xABCDABCDCABDCABDUL;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 64;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)),
+              ElementsAre(IsStackAllocate(RegBitWidth / 8),
+                          IsMovValueToStack(X86::MOV32mi, 0XCABDCABDUL, 0),
+                          IsMovValueToStack(X86::MOV32mi, 0xABCDABCDUL, 4),
+                          IsMovValueFromStack(X86::KMOVQkm, Reg),
+                          IsStackDeallocate(RegBitWidth / 8)));
+}
+
+TEST_F(X86Core2Avx512DQBWTargetTest, SetRegToK0_64Bits) {
+  const uint64_t Value = 0xABCDABCDCABDCABDU;
+  const unsigned Reg = X86::K0;
+  const unsigned RegBitWidth = 64;
+  EXPECT_THAT(setRegTo(Reg, APInt(RegBitWidth, Value)),
+              ElementsAre(IsStackAllocate(RegBitWidth / 8),
+                          IsMovValueToStack(X86::MOV32mi, 0XCABDCABDUL, 0),
+                          IsMovValueToStack(X86::MOV32mi, 0xABCDABCDUL, 4),
+                          IsMovValueFromStack(X86::KMOVQkm, Reg),
+                          IsStackDeallocate(RegBitWidth / 8)));
+}
+
 // Note: We always put 80 bits on the stack independently of the size of the
 // value. This uses a bit more space but makes the code simpler.