From f76fe454268d661cb31018d9e46a6df467bb22b9 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 2 Apr 2019 15:36:30 +0000 Subject: [PATCH] [SystemZ] Improve instruction selection of 64 bit shifts and rotates. For shift and rotate instructions that only use the last 6 bits of the shift amount, a shift amount of (x*64-s) can be substituted with (-s). This saves one instruction and a register: lhi %r1, 64 sr %r1, %r3 sllg %r2, %r2, 0(%r1) => lcr %r1, %r3 sllg %r2, %r2, 0(%r1) Review: Ulrich Weigand llvm-svn: 357481 --- llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 16 ++++ llvm/lib/Target/SystemZ/SystemZOperators.td | 4 + .../CodeGen/SystemZ/rot-shift-64-sub-amt.ll | 82 +++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 llvm/test/CodeGen/SystemZ/rot-shift-64-sub-amt.ll diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 63d7ae99f7ac..100812623897 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -2182,6 +2182,22 @@ let AddedComplexity = 4 in { (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; } +// Substitute (x*64-s) with (-s), since shift/rotate instructions only +// use the last 6 bits of the second operand register (making it modulo 64). +let AddedComplexity = 4 in { + def : Pat<(shl GR64:$val, (sub imm32mod64, GR32:$shift)), + (SLLG GR64:$val, (LCR GR32:$shift), 0)>; + + def : Pat<(sra GR64:$val, (sub imm32mod64, GR32:$shift)), + (SRAG GR64:$val, (LCR GR32:$shift), 0)>; + + def : Pat<(srl GR64:$val, (sub imm32mod64, GR32:$shift)), + (SRLG GR64:$val, (LCR GR32:$shift), 0)>; + + def : Pat<(rotl GR64:$val, (sub imm32mod64, GR32:$shift)), + (RLLG GR64:$val, (LCR GR32:$shift), 0)>; +} + // Peepholes for turning scalar operations into block operations. defm : BlockLoadStore; diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 032d08d1cac5..29a55bf6970b 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -708,6 +708,10 @@ class shiftop [(operator node:$val, node:$count), (operator node:$val, (and node:$count, imm32bottom6set))]>; +def imm32mod64 : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() % 64 == 0); +}]>; + // Load a scalar and replicate it in all elements of a vector. class z_replicate_load : PatFrag<(ops node:$addr), diff --git a/llvm/test/CodeGen/SystemZ/rot-shift-64-sub-amt.ll b/llvm/test/CodeGen/SystemZ/rot-shift-64-sub-amt.ll new file mode 100644 index 000000000000..c29f6ab996c6 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/rot-shift-64-sub-amt.ll @@ -0,0 +1,82 @@ +; Test that the case of (64 - shift) used by a shift/rotate instruction is +; implemented with an lcr. This should also work for any multiple of 64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define i64 @f1(i64 %in, i64 %sh) { +; CHECK-LABEL: f1: +; CHECK: lcr %r1, %r3 +; CHECK: sllg %r2, %r2, 0(%r1) + %sub = sub i64 64, %sh + %shl = shl i64 %in, %sub + ret i64 %shl +} + +define i64 @f2(i64 %in, i64 %sh) { +; CHECK-LABEL: f2: +; CHECK: lcr %r1, %r3 +; CHECK: srag %r2, %r2, 0(%r1) + %sub = sub i64 64, %sh + %shl = ashr i64 %in, %sub + ret i64 %shl +} + +define i64 @f3(i64 %in, i64 %sh) { +; CHECK-LABEL: f3: +; CHECK: lcr %r1, %r3 +; CHECK: srlg %r2, %r2, 0(%r1) + %sub = sub i64 64, %sh + %shl = lshr i64 %in, %sub + ret i64 %shl +} + +define i64 @f4(i64 %in, i64 %sh) { +; CHECK-LABEL: f4: +; CHECK: lcr %r1, %r3 +; CHECK: rllg %r2, %r2, 0(%r1) + %shr = lshr i64 %in, %sh + %sub = sub i64 64, %sh + %shl = shl i64 %in, %sub + %or = or i64 %shl, %shr + ret i64 %or +} + +define i64 @f5(i64 %in, i64 %sh) { +; CHECK-LABEL: f5: +; CHECK: lcr %r1, %r3 +; CHECK: sllg %r2, %r2, 0(%r1) + %sub = sub i64 128, %sh + %shl = shl i64 %in, %sub + ret i64 %shl +} + +define i64 @f6(i64 %in, i64 %sh) { +; CHECK-LABEL: f6: +; CHECK: lcr %r1, %r3 +; CHECK: srag %r2, %r2, 0(%r1) + %sub = sub i64 256, %sh + %shl = ashr i64 %in, %sub + ret i64 %shl +} + +define i64 @f7(i64 %in, i64 %sh) { +; CHECK-LABEL: f7: +; CHECK: lcr %r1, %r3 +; CHECK: srlg %r2, %r2, 0(%r1) + %sub = sub i64 512, %sh + %shl = lshr i64 %in, %sub + ret i64 %shl +} + +define i64 @f8(i64 %in, i64 %sh) { +; CHECK-LABEL: f8: +; CHECK: lcr %r1, %r3 +; CHECK: srlg %r0, %r2, 0(%r3) +; CHECK: sllg %r2, %r2, 0(%r1) +; CHECK: ogr %r2, %r0 + %shr = lshr i64 %in, %sh + %sub = sub i64 1024, %sh + %shl = shl i64 %in, %sub + %or = or i64 %shl, %shr + ret i64 %or +} -- 2.34.1