From 10d10102a44318972a985385f6156e72afbfb513 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Wed, 25 Sep 2019 10:16:48 +0000
Subject: [PATCH] [ARM] Ensure we do not attempt to create lsll #0

During legalisation we can end up with some pretty strange nodes, like shifts
of 0. We need to make sure we don't try to make long shifts of these, ending up
with invalid assembly instructions. A long shift with a zero immediate actually
encodes a shift by 32.

Differential Revision: https://reviews.llvm.org/D67664

llvm-svn: 372839
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp |  2 +-
 llvm/lib/Target/ARM/ARMInstrMVE.td      |  6 ++---
 llvm/lib/Target/ARM/ARMInstrThumb2.td   |  3 ++-
 llvm/test/CodeGen/Thumb2/lsll0.ll       | 48 +++++++++++++++++++++++++++++++++
 4 files changed, 54 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/Thumb2/lsll0.ll
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1e02395..1866f79 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -6011,7 +6011,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
     // If the shift amount is greater than 32 or has a greater bitwidth than 64
     // then do the default optimisation
     if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
-        (Con && Con->getZExtValue() >= 32))
+        (Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32)))
       return SDValue();
 
     // Extract the lower 32 bits of the shift amount if it's not an i32
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 74de3a4..3ba23d9 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -453,16 +453,16 @@ def MVE_ASRLr   : MVE_ScalarShiftDRegReg<"asrl",    0b1,  [(set tGPREven:$RdaLo,
                                         tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
 def MVE_ASRLi   : MVE_ScalarShiftDRegImm<"asrl",    0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
                                         (ARMasrl tGPREven:$RdaLo_src,
-                                        tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+                                        tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
 def MVE_LSLLr   : MVE_ScalarShiftDRegReg<"lsll",    0b0,  [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
                                         (ARMlsll tGPREven:$RdaLo_src,
                                         tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
 def MVE_LSLLi   : MVE_ScalarShiftDRegImm<"lsll",    0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
                                         (ARMlsll tGPREven:$RdaLo_src,
-                                        tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+                                        tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
 def MVE_LSRL    : MVE_ScalarShiftDRegImm<"lsrl",    0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
                                         (ARMlsrl tGPREven:$RdaLo_src,
-                                        tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+                                        tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
 
 def MVE_SQRSHRL : MVE_ScalarShiftDRegRegWithSat<"sqrshrl", 0b1>;
 def MVE_SQSHLL  : MVE_ScalarShiftDRegImm<"sqshll",  0b11, 0b1>;
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 98de738..b165b85 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -45,7 +45,8 @@ def mve_shift_imm : AsmOperandClass {
   let RenderMethod = "addImmOperands";
   let DiagnosticString = "operand must be an immediate in the range [1,32]";
 }
-def long_shift : Operand<i32> {
+def long_shift : Operand<i32>,
+                 ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> {
   let ParserMatchClass = mve_shift_imm;
   let DecoderMethod = "DecodeLongShiftOperand";
 }
diff --git a/llvm/test/CodeGen/Thumb2/lsll0.ll b/llvm/test/CodeGen/Thumb2/lsll0.ll
new file mode 100644
index 0000000..f11e669
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/lsll0.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+
+define void @_Z4loopPxS_iS_i(i64* %d) {
+; CHECK-LABEL: _Z4loopPxS_iS_i:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-NEXT:    vmov r1, s2
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    sxth r1, r1
+; CHECK-NEXT:    sxth r2, r2
+; CHECK-NEXT:    rsbs r1, r1, #0
+; CHECK-NEXT:    rsbs r2, r2, #0
+; CHECK-NEXT:    sxth r1, r1
+; CHECK-NEXT:    sxth r2, r2
+; CHECK-NEXT:    asr.w r12, r1, #31
+; CHECK-NEXT:    asrs r3, r2, #31
+; CHECK-NEXT:    strd r2, r3, [r0]
+; CHECK-NEXT:    strd r1, r12, [r0, #8]
+; CHECK-NEXT:    bx lr
+entry:
+  %wide.load = load <2 x i64>, <2 x i64>* undef, align 8
+  %0 = trunc <2 x i64> %wide.load to <2 x i32>
+  %1 = shl <2 x i32> %0, <i32 16, i32 16>
+  %2 = ashr exact <2 x i32> %1, <i32 16, i32 16>
+  %3 = sub <2 x i32> %2, %0
+  %4 = and <2 x i32> %3, <i32 7, i32 7>
+  %5 = shl <2 x i32> %2, %4
+  %6 = extractelement <2 x i32> %5, i32 0
+  %7 = zext i32 %6 to i64
+  %8 = select i1 false, i64 %7, i64 undef
+  %9 = trunc i64 %8 to i16
+  %10 = sub i16 0, %9
+  %11 = sext i16 %10 to i64
+  %12 = getelementptr inbounds i64, i64* %d, i64 undef
+  store i64 %11, i64* %12, align 8
+  %13 = extractelement <2 x i32> %5, i32 1
+  %14 = zext i32 %13 to i64
+  %15 = select i1 false, i64 %14, i64 undef
+  %16 = trunc i64 %15 to i16
+  %17 = sub i16 0, %16
+  %18 = sext i16 %17 to i64
+  %19 = or i32 0, 1
+  %20 = sext i32 %19 to i64
+  %21 = getelementptr inbounds i64, i64* %d, i64 %20
+  store i64 %18, i64* %21, align 8
+  ret void
+}
-- 
2.7.4