defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
BinOpFrag<(trunc (AArch64roundingvlshr node:$LHS, node:$RHS))>>;
defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
+
+// X << 1 ==> X + X
+class SHLToADDPat<ValueType ty, RegisterClass regtype>
+ : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
+ (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>;
+
+def : SHLToADDPat<v16i8, FPR128>;
+def : SHLToADDPat<v8i16, FPR128>;
+def : SHLToADDPat<v4i32, FPR128>;
+def : SHLToADDPat<v2i64, FPR128>;
+def : SHLToADDPat<v8i8, FPR64>;
+def : SHLToADDPat<v4i16, FPR64>;
+def : SHLToADDPat<v2i32, FPR64>;
+
defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
; CHECK-LABEL: testLeftBad8x8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.8b v2, #165
-; CHECK-NEXT: shl.8b v1, v1, #1
+; CHECK-NEXT: add.8b v1, v1, v1
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: orr.8b v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-LABEL: testLeftBad16x8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.16b v2, #165
-; CHECK-NEXT: shl.16b v1, v1, #1
+; CHECK-NEXT: add.16b v1, v1, v1
; CHECK-NEXT: and.16b v0, v0, v2
; CHECK-NEXT: orr.16b v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-LABEL: neon.ushl8_noext_constant_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: shl.8h v0, v0, #1
+; CHECK-NEXT: add.8h v0, v0, v0
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI160_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI160_0]
-; CHECK-NEXT: shl.4s v0, v0, #1
+; CHECK-NEXT: add.4s v0, v0, v0
; CHECK-NEXT: ret
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
; CHECK-LABEL: neon.sshl16b_constant_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: shl.16b v0, v0, #1
+; CHECK-NEXT: add.16b v0, v0, v0
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-LABEL: neon.sshl4s_no_fold:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: shl.4s v0, v0, #1
+; CHECK-NEXT: add.4s v0, v0, v0
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI179_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI179_0]
-; CHECK-NEXT: shl.2d v0, v0, #1
+; CHECK-NEXT: add.2d v0, v0, v0
; CHECK-NEXT: ret
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: shl.8b v0, v0, #1
+; CHECK-NEXT: add.8b v0, v0, v0
; CHECK-NEXT: orr.8b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: shl.4h v0, v0, #1
+; CHECK-NEXT: add.4h v0, v0, v0
; CHECK-NEXT: orr.8b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: shl.2s v0, v0, #1
+; CHECK-NEXT: add.2s v0, v0, v0
; CHECK-NEXT: orr.8b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: shl.16b v0, v0, #1
+; CHECK-NEXT: add.16b v0, v0, v0
; CHECK-NEXT: orr.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: shl.8h v0, v0, #1
+; CHECK-NEXT: add.8h v0, v0, v0
; CHECK-NEXT: orr.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: shl.4s v0, v0, #1
+; CHECK-NEXT: add.4s v0, v0, v0
; CHECK-NEXT: orr.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
-; CHECK-NEXT: shl.2d v0, v0, #1
+; CHECK-NEXT: add.2d v0, v0, v0
; CHECK-NEXT: orr.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
; NOSHA3-LABEL: rax1:
; NOSHA3: // %bb.0:
; NOSHA3-NEXT: ushr v2.2d, v1.2d, #63
-; NOSHA3-NEXT: shl v1.2d, v1.2d, #1
+; NOSHA3-NEXT: add v1.2d, v1.2d, v1.2d
; NOSHA3-NEXT: orr v1.16b, v1.16b, v2.16b
; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b
; NOSHA3-NEXT: ret
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o - | FileCheck %s
+
+define <16 x i8> @shl_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: shl_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.16b, v0.16b, v0.16b
+; CHECK-NEXT: ret
+entry:
+ %add.i = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %add.i
+}
+
+define <8 x i16> @shl_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: shl_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.8h, v0.8h, v0.8h
+; CHECK-NEXT: ret
+entry:
+ %add.i = shl <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @shl_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: shl_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %add.i = shl <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @shl_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: shl_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.2d, v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %add.i = shl <2 x i64> %a, <i64 1, i64 1>
+ ret <2 x i64> %add.i
+}
+
+define <8 x i8> @shl_v8i8(<8 x i8> %a) {
+; CHECK-LABEL: shl_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.8b, v0.8b, v0.8b
+; CHECK-NEXT: ret
+entry:
+ %add.i = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <8 x i8> %add.i
+}
+
+define <4 x i16> @shl_v4i16(<4 x i16> %a) {
+; CHECK-LABEL: shl_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.4h, v0.4h, v0.4h
+; CHECK-NEXT: ret
+entry:
+ %add.i = shl <4 x i16> %a, <i16 1, i16 1, i16 1, i16 1>
+ ret <4 x i16> %add.i
+}
+
+define <2 x i32> @shl_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: shl_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %add.i = shl <2 x i32> %a, <i32 1, i32 1>
+ ret <2 x i32> %add.i
+}
+
+define <8 x i16> @sshll_v8i8(<8 x i8> %a) {
+; CHECK-LABEL: sshll_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll v0.8h, v0.8b, #1
+; CHECK-NEXT: ret
+ %1 = sext <8 x i8> %a to <8 x i16>
+ %tmp = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %tmp
+}
+
+define <4 x i32> @sshll_v4i16(<4 x i16> %a) {
+; CHECK-LABEL: sshll_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll v0.4s, v0.4h, #1
+; CHECK-NEXT: ret
+ %1 = sext <4 x i16> %a to <4 x i32>
+ %tmp = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %tmp
+}
+
+define <2 x i64> @sshll_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: sshll_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll v0.2d, v0.2s, #1
+; CHECK-NEXT: ret
+ %1 = sext <2 x i32> %a to <2 x i64>
+ %tmp = shl <2 x i64> %1, <i64 1, i64 1>
+ ret <2 x i64> %tmp
+}
+
+define <8 x i16> @ushll_v8i8(<8 x i8> %a) {
+; CHECK-LABEL: ushll_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.8h, v0.8b, #1
+; CHECK-NEXT: ret
+ %1 = zext <8 x i8> %a to <8 x i16>
+ %tmp = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %tmp
+}
+
+define <4 x i32> @ushll_v4i16(<4 x i16> %a) {
+; CHECK-LABEL: ushll_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.4s, v0.4h, #1
+; CHECK-NEXT: ret
+ %1 = zext <4 x i16> %a to <4 x i32>
+ %tmp = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %tmp
+}
+
+define <2 x i64> @ushll_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: ushll_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.2d, v0.2s, #1
+; CHECK-NEXT: ret
+ %1 = zext <2 x i32> %a to <2 x i64>
+ %tmp = shl <2 x i64> %1, <i64 1, i64 1>
+ ret <2 x i64> %tmp
+}
; CHECK-NEXT: mul v0.4h, v0.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2]
; CHECK-NEXT: adrp x8, .LCPI4_3
-; CHECK-NEXT: shl v3.4h, v0.4h, #1
+; CHECK-NEXT: add v3.4h, v0.4h, v0.4h
; CHECK-NEXT: bic v0.4h, #248, lsl #8
; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ushl v1.4h, v3.4h, v2.4h
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.16b, #7
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: shl v0.16b, v0.16b, #1
+; CHECK-NEXT: add v0.16b, v0.16b, v0.16b
; CHECK-NEXT: ret
%t0 = and <16 x i8> %a0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%t1 = shl <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.16b, #28
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: shl v0.16b, v0.16b, #1
+; CHECK-NEXT: add v0.16b, v0.16b, v0.16b
; CHECK-NEXT: ret
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = shl <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.16b, #224
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: shl v0.16b, v0.16b, #1
+; CHECK-NEXT: add v0.16b, v0.16b, v0.16b
; CHECK-NEXT: ret
%t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
%t1 = shl <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.8h, #127
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: shl v0.8h, v0.8h, #1
+; CHECK-NEXT: add v0.8h, v0.8h, v0.8h
; CHECK-NEXT: ret
%t0 = and <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%t1 = shl <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.8h, #254, lsl #8
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: shl v0.8h, v0.8h, #1
+; CHECK-NEXT: add v0.8h, v0.8h, v0.8h
; CHECK-NEXT: ret
%t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
%t1 = shl <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #127, msl #8
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: shl v0.4s, v0.4s, #1
+; CHECK-NEXT: add v0.4s, v0.4s, v0.4s
; CHECK-NEXT: ret
%t0 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
%t1 = shl <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
; CHECK: // %bb.0:
; CHECK-NEXT: mvni v1.4s, #1, msl #16
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: shl v0.4s, v0.4s, #1
+; CHECK-NEXT: add v0.4s, v0.4s, v0.4s
; CHECK-NEXT: ret
%t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
%t1 = shl <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: mov w8, #2147483647
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: shl v0.2d, v0.2d, #1
+; CHECK-NEXT: add v0.2d, v0.2d, v0.2d
; CHECK-NEXT: ret
%t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
%t1 = shl <2 x i64> %t0, <i64 1, i64 1>
; CHECK-NEXT: mov x8, #-8589934592
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: shl v0.2d, v0.2d, #1
+; CHECK-NEXT: add v0.2d, v0.2d, v0.2d
; CHECK-NEXT: ret
%t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
%t1 = shl <2 x i64> %t0, <i64 1, i64 1>