[RISCV] Add vendor-defined XTheadMAC (multiply-accumulate) extension
authorManolis Tsamis <manolis.tsamis@vrull.eu>
Tue, 14 Feb 2023 18:43:00 +0000 (19:43 +0100)
committerPhilipp Tomsich <philipp.tomsich@vrull.eu>
Tue, 14 Feb 2023 19:25:47 +0000 (20:25 +0100)
The vendor-defined XTHeadMAC (no comparable standard extension exists
at the time of writing) extension adds multiply accumulate instructions.

It is supported by the C9xx cores (e.g., found in the wild in the
Allwinner D1) by Alibaba T-Head.

The current (as of this commit) public documentation for this
extension is available at:
  https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf

Support for these instructions has already landed in GNU Binutils:
  https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=4041e11db3ec3611921d10150572a92689aa3154

Co-authored-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D143847

15 files changed:
llvm/docs/RISCVUsage.rst
llvm/docs/ReleaseNotes.rst
llvm/lib/Support/RISCVISAInfo.cpp
llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
llvm/lib/Target/RISCV/RISCVFeatures.td
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
llvm/lib/Target/RISCV/RISCVInstrInfo.td
llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
llvm/test/CodeGen/RISCV/attributes.ll
llvm/test/CodeGen/RISCV/xtheadmac.ll [new file with mode: 0644]
llvm/test/MC/RISCV/rv32xtheadmac-invalid.s [new file with mode: 0644]
llvm/test/MC/RISCV/rv32xtheadmac-valid.s [new file with mode: 0644]
llvm/test/MC/RISCV/rv64xtheadmac-valid.s [new file with mode: 0644]
llvm/test/MC/RISCV/xtheadmac-invalid.s [new file with mode: 0644]

index 5fd4fb3..8242707 100644 (file)
@@ -185,6 +185,9 @@ The current vendor extensions supported are:
 ``XTHeadBs``
   LLVM implements `the THeadBs (single-bit operations) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_  by T-HEAD of Alibaba.  Instructions are prefixed with `th.` as described in the specification.
 
+``XTheadMac``
+  LLVM implements `the XTheadMac (multiply-accumulate instructions) vendor-defined instructions specified in <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.2/xthead-2023-01-30-2.2.2.pdf>`_  by T-HEAD of Alibaba.  Instructions are prefixed with `th.` as described in the specification.
+
 ``XTHeadVdot``
   LLVM implements `version 1.0.0 of the THeadV-family custom instructions specification <https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.2.0/xthead-2022-12-04-2.2.0.pdf>`_ by T-HEAD of Alibaba.  All instructions are prefixed with `th.` as described in the specification, and the riscv-toolchain-convention document linked above.
 
index de60ed9..c0bc8e0 100644 (file)
@@ -111,6 +111,7 @@ Changes to the RISC-V Backend
 * Adds support for the vendor-defined XTHeadBa (address-generation) extension.
 * Adds support for the vendor-defined XTHeadBb (basic bit-manipulation) extension.
 * Adds support for the vendor-defined XTHeadBs (single-bit) extension.
+* Adds support for the vendor-defined XTHeadMac (multiply-accumulate instructions) extension.
 
 Changes to the WebAssembly Backend
 ----------------------------------
index 70095d0..92ac074 100644 (file)
@@ -112,6 +112,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
     {"xtheadba", RISCVExtensionVersion{1, 0}},
     {"xtheadbb", RISCVExtensionVersion{1, 0}},
     {"xtheadbs", RISCVExtensionVersion{1, 0}},
+    {"xtheadmac", RISCVExtensionVersion{1, 0}},
     {"xtheadvdot", RISCVExtensionVersion{1, 0}},
     {"xventanacondops", RISCVExtensionVersion{1, 0}},
 };
index 647607a..b379976 100644 (file)
@@ -492,6 +492,13 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       if (Result != MCDisassembler::Fail)
         return Result;
     }
+    if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadMac]) {
+      LLVM_DEBUG(dbgs() << "Trying XTHeadMac custom opcode table:\n");
+      Result = decodeInstruction(DecoderTableTHeadMac32, MI, Insn, Address,
+                                 this, STI);
+      if (Result != MCDisassembler::Fail)
+        return Result;
+    }
     if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadVdot]) {
       LLVM_DEBUG(dbgs() << "Trying XTHeadVdot custom opcode table:\n");
       Result =
index e6cd334..3284d8e 100644 (file)
@@ -484,6 +484,13 @@ def HasVendorXTHeadBs : Predicate<"Subtarget->hasVendorXTHeadBs()">,
                                   AssemblerPredicate<(all_of FeatureVendorXTHeadBs),
                                   "'xtheadbs' (T-Head single-bit instructions)">;
 
+def FeatureVendorXTHeadMac
+    : SubtargetFeature<"xtheadmac", "HasVendorXTHeadMac", "true",
+                       "'xtheadmac' (T-Head Multiply-Accumulate Instructions)">;
+def HasVendorXTHeadMac : Predicate<"Subtarget->hasVendorXTHeadMac()">,
+                                   AssemblerPredicate<(all_of FeatureVendorXTHeadMac),
+                                   "'xtheadmac' (T-Head Multiply-Accumulate Instructions)">;
+
 def FeatureVendorXTHeadVdot
     : SubtargetFeature<"xtheadvdot", "HasVendorXTHeadVdot", "true",
                        "'xtheadvdot' (T-Head Vector Extensions for Dot)",
index e9ef19a..cd9831e 100644 (file)
@@ -2165,15 +2165,29 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
   return true;
 }
 
-bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) {
+bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
   if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
-      cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
+      cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
     Val = N.getOperand(0);
     return true;
   }
+
+  auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
+    if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
+      return N;
+
+    SDValue N0 = N.getOperand(0);
+    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+        N.getConstantOperandVal(1) == ShiftAmt &&
+        N0.getConstantOperandVal(1) == ShiftAmt)
+      return N0.getOperand(0);
+
+    return N;
+  };
+
   MVT VT = N.getSimpleValueType();
-  if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) {
-    Val = N;
+  if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
+    Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
     return true;
   }
 
@@ -2681,6 +2695,10 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
   case RISCV::MULW:
   case RISCV::SLLIW:
   case RISCV::PACKW:
+  case RISCV::TH_MULAW:
+  case RISCV::TH_MULAH:
+  case RISCV::TH_MULSW:
+  case RISCV::TH_MULSH:
     // Result is already sign extended just remove the sext.w.
     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
     ReplaceUses(N, N0.getNode());
index c800ea9..8195bce 100644 (file)
@@ -60,7 +60,10 @@ public:
     return selectShiftMask(N, 32, ShAmt);
   }
 
-  bool selectSExti32(SDValue N, SDValue &Val);
+  bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val);
+  template <unsigned Bits> bool selectSExtBits(SDValue N, SDValue &Val) {
+    return selectSExtBits(N, Bits, Val);
+  }
   bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val);
   template <unsigned Bits> bool selectZExtBits(SDValue N, SDValue &Val) {
     return selectZExtBits(N, Bits, Val);
index 81c2537..dd11ad7 100644 (file)
@@ -1195,7 +1195,8 @@ class PatGprUimmLog2XLen<SDPatternOperator OpNode, RVInstIShift Inst>
 def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
   return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
 }]>;
-def sexti32 : ComplexPattern<i64, 1, "selectSExti32">;
+def sexti16 : ComplexPattern<XLenVT, 1, "selectSExtBits<16>">;
+def sexti32 : ComplexPattern<i64, 1, "selectSExtBits<32>">;
 def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
   return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
 }]>;
index f85af11..679eaed 100644 (file)
@@ -86,6 +86,17 @@ class THShiftW_ri<bits<7> funct7, bits<3> funct3, string opcodestr>
                     (ins GPR:$rs1, uimm5:$shamt),
                    opcodestr, "$rd, $rs1, $shamt">;
 
+
+let Predicates = [HasVendorXTHeadMac], DecoderNamespace = "THeadMac",
+    hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in
+class THMulAccumulate_rr<bits<7> funct7, string opcodestr>
+    : RVInstR<funct7, 0b001, OPC_CUSTOM_0, (outs GPR:$rd_wb),
+             (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
+             opcodestr, "$rd, $rs1, $rs2"> {
+  let Constraints = "$rd_wb = $rd";
+}
+
+
 //===----------------------------------------------------------------------===//
 // Combination of instruction classes.
 // Use these multiclasses to define instructions more easily.
@@ -128,6 +139,21 @@ def TH_TST : RVBShift_ri<0b10001, 0b001, OPC_CUSTOM_0, "th.tst">,
              Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
 } // Predicates = [HasVendorXTHeadBs]
 
+let Predicates = [HasVendorXTHeadMac] in {
+def TH_MULA  : THMulAccumulate_rr<0b0010000, "th.mula">;
+def TH_MULS  : THMulAccumulate_rr<0b0010001, "th.muls">;
+}  // Predicates = [HasVendorXTHeadMac]
+
+let Predicates = [HasVendorXTHeadMac], IsSignExtendingOpW = 1 in {
+def TH_MULAH : THMulAccumulate_rr<0b0010100, "th.mulah">;
+def TH_MULSH : THMulAccumulate_rr<0b0010101, "th.mulsh">;
+} // Predicates = [HasVendorXTHeadMac], IsSignExtendingOpW = 1
+
+let Predicates = [HasVendorXTHeadMac, IsRV64], IsSignExtendingOpW = 1 in {
+def TH_MULAW : THMulAccumulate_rr<0b0010010, "th.mulaw">;
+def TH_MULSW : THMulAccumulate_rr<0b0010011, "th.mulsw">;
+} // Predicates = [HasVendorXTHeadMac, IsRV64]
+
 let Predicates = [HasVendorXTHeadVdot],
     Constraints = "@earlyclobber $vd",
     RVVConstraint = WidenV in {
@@ -327,6 +353,34 @@ def : Pat<(seteq (and GPR:$rs1, SingleBitSetMask:$mask), 0),
           (TH_TST (XORI GPR:$rs1, -1), SingleBitSetMask:$mask)>;
 } // Predicates = [HasVendorXTHeadBs]
 
+let Predicates = [HasVendorXTHeadMac] in {
+def : Pat<(add GPR:$rd, (mul GPR:$rs1, GPR:$rs2)), (TH_MULA GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sub GPR:$rd, (mul GPR:$rs1, GPR:$rs2)), (TH_MULS GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasVendorXTHeadMac]
+
+let Predicates = [HasVendorXTHeadMac, IsRV64] in {
+// mulaw, mulsw are available only in RV64.
+def : Pat<(binop_allwusers<add> GPR:$rd, (mul GPR:$rs1, GPR:$rs2)),
+          (TH_MULAW GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+def : Pat<(binop_allwusers<sub> GPR:$rd, (mul GPR:$rs1, GPR:$rs2)),
+          (TH_MULSW GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+// mulah, mulsh produce a sign-extended result.
+def : Pat<(binop_allwusers<add> GPR:$rd, (mul
+            (sexti16 GPR:$rs1),
+            (sexti16 GPR:$rs2))),
+          (TH_MULAH GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+def : Pat<(binop_allwusers<sub> GPR:$rd, (mul
+            (sexti16 GPR:$rs1),
+            (sexti16 GPR:$rs2))),
+          (TH_MULSH GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasVendorXTHeadMac, IsRV64]
+
+let Predicates = [HasVendorXTHeadMac, IsRV32] in {
+def : Pat<(add GPR:$rd, (mul (sexti16 GPR:$rs1), (sexti16 GPR:$rs2))),
+         (TH_MULAH GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sub GPR:$rd, (mul (sexti16 GPR:$rs1), (sexti16 GPR:$rs2))),
+         (TH_MULSH GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasVendorXTHeadMac, IsRV32]
 
 defm PseudoTHVdotVMAQA      : VPseudoVMAQA_VV_VX;
 defm PseudoTHVdotVMAQAU     : VPseudoVMAQA_VV_VX;
index 68638ff..dfdb2dd 100644 (file)
@@ -41,6 +41,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+svnapot %s -o - | FileCheck --check-prefixes=CHECK,RV32SVNAPOT %s
 ; RUN: llc -mtriple=riscv32 -mattr=+svpbmt %s -o - | FileCheck --check-prefixes=CHECK,RV32SVPBMT %s
 ; RUN: llc -mtriple=riscv32 -mattr=+svinval %s -o - | FileCheck --check-prefixes=CHECK,RV32SVINVAL %s
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac %s -o - | FileCheck --check-prefixes=CHECK,RV32XTHEADMAC %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zca %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCA %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcb %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCB %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcd %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCD %s
@@ -91,6 +92,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadba %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBA %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBB %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadbs %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBS %s
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadmac %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADMAC %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADVDOT %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zawrs %s -o - | FileCheck --check-prefixes=CHECK,RV64ZAWRS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-ztso %s -o - | FileCheck --check-prefixes=CHECK,RV64ZTSO %s
 ; RV32SVNAPOT: .attribute 5, "rv32i2p0_svnapot1p0"
 ; RV32SVPBMT: .attribute 5, "rv32i2p0_svpbmt1p0"
 ; RV32SVINVAL: .attribute 5, "rv32i2p0_svinval1p0"
+; RV32XTHEADMAC: .attribute 5, "rv32i2p0_xtheadmac1p0"
 ; RV32ZCA: .attribute 5, "rv32i2p0_zca1p0"
 ; RV32ZCB: .attribute 5, "rv32i2p0_zca1p0_zcb1p0"
 ; RV32ZCD: .attribute 5, "rv32i2p0_zcd1p0"
 ; RV64XTHEADBA: .attribute 5, "rv64i2p0_xtheadba1p0"
 ; RV64XTHEADBB: .attribute 5, "rv64i2p0_xtheadbb1p0"
 ; RV64XTHEADBS: .attribute 5, "rv64i2p0_xtheadbs1p0"
+; RV64XTHEADMAC: .attribute 5, "rv64i2p0_xtheadmac1p0"
 ; RV64XTHEADVDOT: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xtheadvdot1p0"
 ; RV64ZTSO: .attribute 5, "rv64i2p0_ztso0p1"
 ; RV64ZCA: .attribute 5, "rv64i2p0_zca1p0"
diff --git a/llvm/test/CodeGen/RISCV/xtheadmac.ll b/llvm/test/CodeGen/RISCV/xtheadmac.ll
new file mode 100644 (file)
index 0000000..9e849f2
--- /dev/null
@@ -0,0 +1,152 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac -mattr=+m -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32XTHEADMAC
+; RUN: llc -mtriple=riscv64 -mattr=+xtheadmac -mattr=+m -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64XTHEADMAC
+
+define i32 @mula_i32(i32 %a, i32 %b, i32 %c) {
+; RV32XTHEADMAC-LABEL: mula_i32:
+; RV32XTHEADMAC:       # %bb.0:
+; RV32XTHEADMAC-NEXT:    th.mula a0, a1, a2
+; RV32XTHEADMAC-NEXT:    ret
+;
+; RV64XTHEADMAC-LABEL: mula_i32:
+; RV64XTHEADMAC:       # %bb.0:
+; RV64XTHEADMAC-NEXT:    th.mulaw a0, a1, a2
+; RV64XTHEADMAC-NEXT:    ret
+  %d = mul i32 %b, %c
+  %e = add i32 %a, %d
+  ret i32 %e
+}
+
+define i32 @muls_i32(i32 %a, i32 %b, i32 %c) {
+; RV32XTHEADMAC-LABEL: muls_i32:
+; RV32XTHEADMAC:       # %bb.0:
+; RV32XTHEADMAC-NEXT:    th.muls a0, a1, a2
+; RV32XTHEADMAC-NEXT:    ret
+;
+; RV64XTHEADMAC-LABEL: muls_i32:
+; RV64XTHEADMAC:       # %bb.0:
+; RV64XTHEADMAC-NEXT:    th.mulsw a0, a1, a2
+; RV64XTHEADMAC-NEXT:    ret
+  %d = mul i32 %b, %c
+  %e = sub i32 %a, %d
+  ret i32 %e
+}
+
+define i64 @mula_i64(i64 %a, i64 %b, i64 %c) {
+; RV32XTHEADMAC-LABEL: mula_i64:
+; RV32XTHEADMAC:       # %bb.0:
+; RV32XTHEADMAC-NEXT:    mulhu a6, a2, a4
+; RV32XTHEADMAC-NEXT:    th.mula a6, a2, a5
+; RV32XTHEADMAC-NEXT:    th.mula a6, a3, a4
+; RV32XTHEADMAC-NEXT:    th.mula a2, a0, a4
+; RV32XTHEADMAC-NEXT:    sltu a0, a2, a0
+; RV32XTHEADMAC-NEXT:    add a0, a1, a0
+; RV32XTHEADMAC-NEXT:    add a1, a0, a6
+; RV32XTHEADMAC-NEXT:    mv a0, a2
+; RV32XTHEADMAC-NEXT:    ret
+;
+; RV64XTHEADMAC-LABEL: mula_i64:
+; RV64XTHEADMAC:       # %bb.0:
+; RV64XTHEADMAC-NEXT:    th.mula a0, a1, a2
+; RV64XTHEADMAC-NEXT:    ret
+  %d = mul i64 %b, %c
+  %f = add i64 %a, %d
+  ret i64 %f
+}
+
+define i64 @mulaw_i64(i32 %a, i32 %b, i32 %c) {
+; RV32XTHEADMAC-LABEL: mulaw_i64:
+; RV32XTHEADMAC:       # %bb.0:
+; RV32XTHEADMAC-NEXT:    th.mula a0, a1, a2
+; RV32XTHEADMAC-NEXT:    srai a1, a0, 31
+; RV32XTHEADMAC-NEXT:    ret
+;
+; RV64XTHEADMAC-LABEL: mulaw_i64:
+; RV64XTHEADMAC:       # %bb.0:
+; RV64XTHEADMAC-NEXT:    th.mulaw a0, a1, a2
+; RV64XTHEADMAC-NEXT:    ret
+  %d = mul i32 %b, %c
+  %e = add i32 %a, %d
+  %f = sext i32 %e to i64
+  ret i64 %f
+}
+
+define i64 @mulah_i64(i32 %a, i16 %b, i16 %c) {
+; RV32XTHEADMAC-LABEL: mulah_i64:
+; RV32XTHEADMAC:       # %bb.0:
+; RV32XTHEADMAC-NEXT:    th.mulah a0, a1, a2
+; RV32XTHEADMAC-NEXT:    srai a1, a0, 31
+; RV32XTHEADMAC-NEXT:    ret
+;
+; RV64XTHEADMAC-LABEL: mulah_i64:
+; RV64XTHEADMAC:       # %bb.0:
+; RV64XTHEADMAC-NEXT:    th.mulah a0, a1, a2
+; RV64XTHEADMAC-NEXT:    ret
+  %d = sext i16 %b to i32
+  %e = sext i16 %c to i32
+  %f = mul i32 %d, %e
+  %g = add i32 %a, %f
+  %h = sext i32 %g to i64
+  ret i64 %h
+}
+
+define i64 @muls_i64(i64 %a, i64 %b, i64 %c) {
+; RV32XTHEADMAC-LABEL: muls_i64:
+; RV32XTHEADMAC:       # %bb.0:
+; RV32XTHEADMAC-NEXT:    mulhu a6, a2, a4
+; RV32XTHEADMAC-NEXT:    th.mula a6, a2, a5
+; RV32XTHEADMAC-NEXT:    th.mula a6, a3, a4
+; RV32XTHEADMAC-NEXT:    mul a3, a2, a4
+; RV32XTHEADMAC-NEXT:    th.muls a2, a0, a4
+; RV32XTHEADMAC-NEXT:    sltu a0, a0, a3
+; RV32XTHEADMAC-NEXT:    sub a1, a1, a0
+; RV32XTHEADMAC-NEXT:    sub a1, a1, a6
+; RV32XTHEADMAC-NEXT:    mv a0, a2
+; RV32XTHEADMAC-NEXT:    ret
+;
+; RV64XTHEADMAC-LABEL: muls_i64:
+; RV64XTHEADMAC:       # %bb.0:
+; RV64XTHEADMAC-NEXT:    th.muls a0, a1, a2
+; RV64XTHEADMAC-NEXT:    ret
+  %d = mul i64 %b, %c
+  %f = sub i64 %a, %d
+  ret i64 %f
+}
+
+define i64 @mulsw_i64(i32 %a, i32 %b, i32 %c) {
+; RV32XTHEADMAC-LABEL: mulsw_i64:
+; RV32XTHEADMAC:       # %bb.0:
+; RV32XTHEADMAC-NEXT:    th.muls a0, a1, a2
+; RV32XTHEADMAC-NEXT:    srai a1, a0, 31
+; RV32XTHEADMAC-NEXT:    ret
+;
+; RV64XTHEADMAC-LABEL: mulsw_i64:
+; RV64XTHEADMAC:       # %bb.0:
+; RV64XTHEADMAC-NEXT:    th.mulsw a0, a1, a2
+; RV64XTHEADMAC-NEXT:    ret
+  %d = mul i32 %b, %c
+  %e = sub i32 %a, %d
+  %f = sext i32 %e to i64
+  ret i64 %f
+}
+
+define i64 @mulsh_i64(i32 %a, i16 %b, i16 %c) {
+; RV32XTHEADMAC-LABEL: mulsh_i64:
+; RV32XTHEADMAC:       # %bb.0:
+; RV32XTHEADMAC-NEXT:    th.mulsh a0, a1, a2
+; RV32XTHEADMAC-NEXT:    srai a1, a0, 31
+; RV32XTHEADMAC-NEXT:    ret
+;
+; RV64XTHEADMAC-LABEL: mulsh_i64:
+; RV64XTHEADMAC:       # %bb.0:
+; RV64XTHEADMAC-NEXT:    th.mulsh a0, a1, a2
+; RV64XTHEADMAC-NEXT:    ret
+  %d = sext i16 %b to i32
+  %e = sext i16 %c to i32
+  %f = mul i32 %d, %e
+  %g = sub i32 %a, %f
+  %h = sext i32 %g to i64
+  ret i64 %h
+}
diff --git a/llvm/test/MC/RISCV/rv32xtheadmac-invalid.s b/llvm/test/MC/RISCV/rv32xtheadmac-invalid.s
new file mode 100644 (file)
index 0000000..1a81133
--- /dev/null
@@ -0,0 +1,4 @@
+# RUN: not llvm-mc -triple riscv32 -mattr=+xtheadmac < %s 2>&1 | FileCheck %s
+
+th.mulaw  t0, t1, t2     # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
+th.mulsw  t0, t1, t2     # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
diff --git a/llvm/test/MC/RISCV/rv32xtheadmac-valid.s b/llvm/test/MC/RISCV/rv32xtheadmac-valid.s
new file mode 100644 (file)
index 0000000..75bf8b0
--- /dev/null
@@ -0,0 +1,21 @@
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+xtheadmac -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+xtheadmac < %s \
+# RUN:     | llvm-objdump --mattr=+xtheadmac -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: th.mula a0, a1, a2
+# CHECK-ASM: encoding: [0x0b,0x95,0xc5,0x20]
+th.mula  a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: th.mulah a0, a1, a2
+# CHECK-ASM: encoding: [0x0b,0x95,0xc5,0x28]
+th.mulah a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: th.muls a0, a1, a2
+# CHECK-ASM: encoding: [0x0b,0x95,0xc5,0x22]
+th.muls         a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: th.mulsh a0, a1, a2
+# CHECK-ASM: encoding: [0x0b,0x95,0xc5,0x2a]
+th.mulsh a0, a1, a2
diff --git a/llvm/test/MC/RISCV/rv64xtheadmac-valid.s b/llvm/test/MC/RISCV/rv64xtheadmac-valid.s
new file mode 100644 (file)
index 0000000..e7e5414
--- /dev/null
@@ -0,0 +1,29 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+xtheadmac -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+xtheadmac < %s \
+# RUN:     | llvm-objdump --mattr=+xtheadmac -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: th.mula a0, a1, a2
+# CHECK-ASM: encoding: [0x0b,0x95,0xc5,0x20]
+th.mula         a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: th.mulah a0, a1, a2
+# CHECK-ASM: encoding: [0x0b,0x95,0xc5,0x28]
+th.mulah a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: th.mulaw a0, a1, a2
+# CHECK-ASM: encoding: [0x0b,0x95,0xc5,0x24]
+th.mulaw a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: th.muls a0, a1, a2
+# CHECK-ASM: encoding: [0x0b,0x95,0xc5,0x22]
+th.muls         a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: th.mulsh a0, a1, a2
+# CHECK-ASM: encoding: [0x0b,0x95,0xc5,0x2a]
+th.mulsh a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: th.mulsw a0, a1, a2
+# CHECK-ASM: encoding: [0x0b,0x95,0xc5,0x26]
+th.mulsw a0, a1, a2
diff --git a/llvm/test/MC/RISCV/xtheadmac-invalid.s b/llvm/test/MC/RISCV/xtheadmac-invalid.s
new file mode 100644 (file)
index 0000000..48bedbe
--- /dev/null
@@ -0,0 +1,7 @@
+# RUN: not llvm-mc -triple riscv32 -mattr=+xtheadmac < %s 2>&1 | FileCheck %s
+# RUN: not llvm-mc -triple riscv64 -mattr=+xtheadmac < %s 2>&1 | FileCheck %s
+
+th.mula  t0, t1, 0(t2)  # CHECK: :[[@LINE]]:18: error: invalid operand for instruction
+th.muls  t0, 12, t2     # CHECK: :[[@LINE]]:14: error: invalid operand for instruction
+th.mulsh t0, t1, 34     # CHECK: :[[@LINE]]:18: error: invalid operand for instruction
+th.mulsw t0, -56, t2    # CHECK: :[[@LINE]]:14: error: invalid operand for instruction