[CodeGen][SVE] Don't drop scalable flag in DAGCombiner::visitEXTRACT_SUBVECTOR

author Sander de Smalen <sander.desmalen@arm.com>

Thu, 2 Jul 2020 09:06:41 +0000 (10:06 +0100)

committer Sander de Smalen <sander.desmalen@arm.com>

Thu, 2 Jul 2020 09:16:43 +0000 (10:16 +0100)
author Sander de Smalen <sander.desmalen@arm.com>
Thu, 2 Jul 2020 09:06:41 +0000 (10:06 +0100)
committer Sander de Smalen <sander.desmalen@arm.com>
Thu, 2 Jul 2020 09:16:43 +0000 (10:16 +0100)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

index f671739..4d950d4 100644 (file)
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2387,7 +2387,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
    // Lower the actual args into this basic block.
    SmallVector<ArrayRef<Register>, 8> VRegArgs;
    for (const Argument &Arg: F.args()) {
-    if (DL->getTypeStoreSize(Arg.getType()) == 0)
+    if (DL->getTypeStoreSize(Arg.getType()).isZero())
        continue; // Don't handle zero sized types.
      ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
      VRegArgs.push_back(VRegs);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 18133a1..bd7d94e 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19218,13 +19218,13 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
        V.getOperand(0).getValueType().isVector()) {
      SDValue SrcOp = V.getOperand(0);
      EVT SrcVT = SrcOp.getValueType();
-    unsigned SrcNumElts = SrcVT.getVectorNumElements();
-    unsigned DestNumElts = V.getValueType().getVectorNumElements();
+    unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
+    unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
      if ((SrcNumElts % DestNumElts) == 0) {
        unsigned SrcDestRatio = SrcNumElts / DestNumElts;
-      unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
+      ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
        EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
-                                      NewExtNumElts);
+                                      NewExtEC);
        if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
          SDLoc DL(N);
          SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
@@ -19235,14 +19235,14 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
      }
      if ((DestNumElts % SrcNumElts) == 0) {
        unsigned DestSrcRatio = DestNumElts / SrcNumElts;
-      if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) {
-        unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio;
+      if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) {
+        ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio;
          EVT ScalarVT = SrcVT.getScalarType();
          if ((ExtIdx % DestSrcRatio) == 0) {
            SDLoc DL(N);
            unsigned IndexValScaled = ExtIdx / DestSrcRatio;
            EVT NewExtVT =
-              EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtNumElts);
+              EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
            if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
              SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
              SDValue NewExtract =
@@ -19250,7 +19250,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
                              V.getOperand(0), NewIndex);
              return DAG.getBitcast(NVT, NewExtract);
            }
-          if (NewExtNumElts == 1 &&
+          if (NewExtEC == 1 &&
                TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
              SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
              SDValue NewExtract =
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

index 9a976f4..3662d00 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13853,9 +13853,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
        SDValue Idx = N->getOperand(3);
  
        uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
-      if (IdxConst > Src1->getNumOperands() - 1)
-        report_fatal_error("index larger than expected");
-
        EVT ResVT = N->getValueType(0);
        uint64_t NumLanes = ResVT.getVectorElementCount().Min;
        SDValue Val =
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll

new file mode 100644 (file)

index 0000000..29ad127
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; WARN-NOT: warning
+
+; Test that DAGCombiner doesn't drop the scalable flag when it tries to fold:
+;   extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
+define <vscale x 16 x i8> @extract_nxv16i8_nxv4i64(<vscale x 4 x i64> %z0_z1) {
+; CHECK-LABEL: extract_nxv16i8_nxv4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %z0_z1_bc = bitcast <vscale x 4 x i64> %z0_z1 to <vscale x 32 x i8>
+  %ext = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv32i8(<vscale x 32 x i8> %z0_z1_bc, i32 1)
+  ret <vscale x 16 x i8> %ext
+}
+
+
+define <vscale x 2 x i64> @extract_nxv2i64_nxv32i8(<vscale x 32 x i8> %z0_z1) {
+; CHECK-LABEL: extract_nxv2i64_nxv32i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %z0_z1_bc = bitcast <vscale x 32 x i8> %z0_z1 to <vscale x 4 x i64>
+  %ext = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv4i64(<vscale x 4 x i64> %z0_z1_bc, i32 1)
+  ret <vscale x 2 x i64> %ext
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv4i64(<vscale x 4 x i64>, i32)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv32i8(<vscale x 32 x i8>, i32)
author	Sander de Smalen <sander.desmalen@arm.com>
	Thu, 2 Jul 2020 09:06:41 +0000 (10:06 +0100)
committer	Sander de Smalen <sander.desmalen@arm.com>
	Thu, 2 Jul 2020 09:16:43 +0000 (10:16 +0100)
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-extract-subvector.ll	[new file with mode: 0644]	patch \| blob