From 5dc7b67c6263fe7f2ced7cb4957e967ab770fab8 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 11 Nov 2016 12:43:51 +0000 Subject: [PATCH] [SystemZ] Use LLGT(R) instructions This adds support for the 31-to-64-bit zero extension instructions LLGT and LLGTR and uses them for code generation where appropriate. Since this operation can also be performed via RISBG, we have to update SystemZDAGToDAGISel::tryRISBGZero so that we prefer LLGT over RISBG in case both are possible. The patch includes some simplification to the tryRISBGZero code; this is not intended to cause any (further) functional change in codegen. llvm-svn: 286585 --- llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 76 +++++++------- llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 8 ++ llvm/lib/Target/SystemZ/SystemZScheduleZ13.td | 4 +- llvm/lib/Target/SystemZ/SystemZScheduleZ196.td | 4 +- llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td | 4 +- llvm/test/CodeGen/SystemZ/int-conv-12.ll | 133 ++++++++++++++++++++++++ llvm/test/MC/Disassembler/SystemZ/insns.txt | 39 +++++++ llvm/test/MC/SystemZ/insn-bad.s | 8 ++ llvm/test/MC/SystemZ/insn-good.s | 30 ++++++ 9 files changed, 260 insertions(+), 46 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/int-conv-12.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 154ae474..6d027b8 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -935,49 +935,45 @@ bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { Count += 1; if (Count == 0) return false; - if (Count == 1) { - // Prefer to use normal shift instructions over RISBG, since they can handle - // all cases and are sometimes shorter. - if (N->getOpcode() != ISD::AND) - return false; - // Prefer register extensions like LLC over RISBG. Also prefer to start - // out with normal ANDs if one instruction would be enough. We can convert - // these ANDs into an RISBG later if a three-address instruction is useful. - if (VT == MVT::i32 || - RISBG.Mask == 0xff || - RISBG.Mask == 0xffff || - SystemZ::isImmLF(~RISBG.Mask) || - SystemZ::isImmHF(~RISBG.Mask)) { - // Force the new mask into the DAG, since it may include known-one bits. - auto *MaskN = cast(N->getOperand(1).getNode()); - if (MaskN->getZExtValue() != RISBG.Mask) { - SDValue NewMask = CurDAG->getConstant(RISBG.Mask, DL, VT); - N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask); - SelectCode(N); - return true; - } - return false; - } - } + // Prefer to use normal shift instructions over RISBG, since they can handle + // all cases and are sometimes shorter. + if (Count == 1 && N->getOpcode() != ISD::AND) + return false; - // If the RISBG operands require no rotation and just masks the bottom - // 8/16 bits, attempt to convert this to a LLC zero extension. - if (RISBG.Rotate == 0 && (RISBG.Mask == 0xff || RISBG.Mask == 0xffff)) { - unsigned OpCode = (RISBG.Mask == 0xff ? SystemZ::LLGCR : SystemZ::LLGHR); - if (VT == MVT::i32) { - if (Subtarget->hasHighWord()) - OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCRMux : SystemZ::LLHRMux); - else - OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCR : SystemZ::LLHR); + // Prefer register extensions like LLC over RISBG. Also prefer to start + // out with normal ANDs if one instruction would be enough. We can convert + // these ANDs into an RISBG later if a three-address instruction is useful. + if (RISBG.Rotate == 0) { + bool PreferAnd = false; + // Prefer AND for any 32-bit and-immediate operation. + if (VT == MVT::i32) + PreferAnd = true; + // As well as for any 64-bit operation that can be implemented via LLC(R), + // LLH(R), LLGT(R), or one of the and-immediate instructions. + else if (RISBG.Mask == 0xff || + RISBG.Mask == 0xffff || + RISBG.Mask == 0x7fffffff || + SystemZ::isImmLF(~RISBG.Mask) || + SystemZ::isImmHF(~RISBG.Mask)) + PreferAnd = true; + if (PreferAnd) { + // Replace the current node with an AND. Note that the current node + // might already be that same AND, in which case it is already CSE'd + // with it, and we must not call ReplaceNode. + SDValue In = convertTo(DL, VT, RISBG.Input); + SDValue Mask = CurDAG->getConstant(RISBG.Mask, DL, VT); + SDValue New = CurDAG->getNode(ISD::AND, DL, VT, In, Mask); + if (N != New.getNode()) { + insertDAGNode(CurDAG, N, Mask); + insertDAGNode(CurDAG, N, New); + ReplaceNode(N, New.getNode()); + N = New.getNode(); + } + // Now, select the machine opcode to implement this operation. + SelectCode(N); + return true; } - - SDValue In = convertTo(DL, VT, RISBG.Input); - SDValue New = convertTo( - DL, VT, SDValue(CurDAG->getMachineNode(OpCode, DL, VT, In), 0)); - ReplaceUses(N, New.getNode()); - CurDAG->RemoveDeadNode(N); - return true; } unsigned Opcode = SystemZ::RISBG; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index a80b0d2..d030ff0 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -575,6 +575,14 @@ def LLGF : UnaryRXY<"llgf", 0xE316, azextloadi32, GR64, 4>; def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_azextloadi16, GR64>; def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_azextloadi32, GR64>; +// 31-to-64-bit zero extensions. +def LLGTR : UnaryRRE<"llgtr", 0xB917, null_frag, GR64, GR64>; +def LLGT : UnaryRXY<"llgt", 0xE317, null_frag, GR64, 4>; +def : Pat<(and GR64:$src, 0x7fffffff), + (LLGTR GR64:$src)>; +def : Pat<(and (i64 (azextloadi32 bdxaddr20only:$src)), 0x7fffffff), + (LLGT bdxaddr20only:$src)>; + //===----------------------------------------------------------------------===// // Truncations //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td index 7bf15b4..3be98d1 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -237,12 +237,12 @@ def : InstRW<[FXa, LSU, Lat5], (instregex "LG(H|F)RL$")>; def : InstRW<[FXa], (instregex "LLCR(Mux)?$")>; def : InstRW<[FXa], (instregex "LLHR(Mux)?$")>; -def : InstRW<[FXa], (instregex "LLG(C|H|F)R$")>; +def : InstRW<[FXa], (instregex "LLG(C|H|F|T)R$")>; def : InstRW<[LSU], (instregex "LLC(Mux)?$")>; def : InstRW<[LSU], (instregex "LLH(Mux)?$")>; def : InstRW<[FXa, LSU, Lat5], (instregex "LL(C|H)H$")>; def : InstRW<[LSU], (instregex "LLHRL$")>; -def : InstRW<[LSU], (instregex "LLG(C|H|F|HRL|FRL)$")>; +def : InstRW<[LSU], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; //===----------------------------------------------------------------------===// // Truncations diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index 0657505..4f28c51 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -212,12 +212,12 @@ def : InstRW<[FXU, LSU, Lat5], (instregex "LG(H|F)RL$")>; def : InstRW<[FXU], (instregex "LLCR(Mux)?$")>; def : InstRW<[FXU], (instregex "LLHR(Mux)?$")>; -def : InstRW<[FXU], (instregex "LLG(C|F|H)R$")>; +def : InstRW<[FXU], (instregex "LLG(C|F|H|T)R$")>; def : InstRW<[LSU], (instregex "LLC(Mux)?$")>; def : InstRW<[LSU], (instregex "LLH(Mux)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "LL(C|H)H$")>; def : InstRW<[LSU], (instregex "LLHRL$")>; -def : InstRW<[LSU], (instregex "LLG(C|F|H|FRL|HRL)$")>; +def : InstRW<[LSU], (instregex "LLG(C|F|H|T|FRL|HRL)$")>; //===----------------------------------------------------------------------===// // Truncations diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index e302d46..3f3391b 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -214,12 +214,12 @@ def : InstRW<[FXU, LSU, Lat5], (instregex "LG(H|F)RL$")>; def : InstRW<[FXU], (instregex "LLCR(Mux)?$")>; def : InstRW<[FXU], (instregex "LLHR(Mux)?$")>; -def : InstRW<[FXU], (instregex "LLG(C|H|F)R$")>; +def : InstRW<[FXU], (instregex "LLG(C|H|F|T)R$")>; def : InstRW<[LSU], (instregex "LLC(Mux)?$")>; def : InstRW<[LSU], (instregex "LLH(Mux)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "LL(C|H)H$")>; def : InstRW<[LSU], (instregex "LLHRL$")>; -def : InstRW<[LSU], (instregex "LLG(C|H|F|HRL|FRL)$")>; +def : InstRW<[LSU], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; //===----------------------------------------------------------------------===// // Truncations diff --git a/llvm/test/CodeGen/SystemZ/int-conv-12.ll b/llvm/test/CodeGen/SystemZ/int-conv-12.ll new file mode 100644 index 0000000..bedd295 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-conv-12.ll @@ -0,0 +1,133 @@ +; Test 31-to-64 bit zero extensions. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test register extension, starting with an i64. +define i64 @f1(i64 %a) { +; CHECK-LABEL: f1: +; CHECK: llgtr %r2, %r2 +; CHECK: br %r14 + %ext = and i64 %a, 2147483647 + ret i64 %ext +} + +; Test register extension, starting with an i32. +define i64 @f2(i32 %a) { +; CHECK-LABEL: f2: +; CHECK: llgtr %r2, %r2 +; CHECK: br %r14 + %and = and i32 %a, 2147483647 + %ext = zext i32 %and to i64 + ret i64 %ext +} + +; ... and the other way around. +define i64 @f3(i32 %a) { +; CHECK-LABEL: f3: +; CHECK: llgtr %r2, %r2 +; CHECK: br %r14 + %ext = zext i32 %a to i64 + %and = and i64 %ext, 2147483647 + ret i64 %and +} + +; Check LLGT with no displacement. +define i64 @f4(i32 *%src) { +; CHECK-LABEL: f4: +; CHECK: llgt %r2, 0(%r2) +; CHECK: br %r14 + %word = load i32, i32 *%src + %ext = zext i32 %word to i64 + %and = and i64 %ext, 2147483647 + ret i64 %and +} + +; ... and the other way around. +define i64 @f5(i32 *%src) { +; CHECK-LABEL: f5: +; CHECK: llgt %r2, 0(%r2) +; CHECK: br %r14 + %word = load i32, i32 *%src + %and = and i32 %word, 2147483647 + %ext = zext i32 %and to i64 + ret i64 %ext +} + +; Check the high end of the LLGT range. +define i64 @f6(i32 *%src) { +; CHECK-LABEL: f6: +; CHECK: llgt %r2, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %word = load i32 , i32 *%ptr + %ext = zext i32 %word to i64 + %and = and i64 %ext, 2147483647 + ret i64 %and +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f7(i32 *%src) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: llgt %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %word = load i32 , i32 *%ptr + %ext = zext i32 %word to i64 + %and = and i64 %ext, 2147483647 + ret i64 %and +} + +; Check the high end of the negative LLGT range. +define i64 @f8(i32 *%src) { +; CHECK-LABEL: f8: +; CHECK: llgt %r2, -4(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %word = load i32 , i32 *%ptr + %ext = zext i32 %word to i64 + %and = and i64 %ext, 2147483647 + ret i64 %and +} + +; Check the low end of the LLGT range. +define i64 @f9(i32 *%src) { +; CHECK-LABEL: f9: +; CHECK: llgt %r2, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %word = load i32 , i32 *%ptr + %ext = zext i32 %word to i64 + %and = and i64 %ext, 2147483647 + ret i64 %and +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f10(i32 *%src) { +; CHECK-LABEL: f10: +; CHECK: agfi %r2, -524292 +; CHECK: llgt %r2, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %word = load i32 , i32 *%ptr + %ext = zext i32 %word to i64 + %and = and i64 %ext, 2147483647 + ret i64 %and +} + +; Check that LLGT allows an index. +define i64 @f11(i64 %src, i64 %index) { +; CHECK-LABEL: f11: +; CHECK: llgt %r2, 524287(%r3,%r2) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i32 * + %word = load i32 , i32 *%ptr + %ext = zext i32 %word to i64 + %and = and i64 %ext, 2147483647 + ret i64 %and +} + diff --git a/llvm/test/MC/Disassembler/SystemZ/insns.txt b/llvm/test/MC/Disassembler/SystemZ/insns.txt index cb16327..26eb456 100644 --- a/llvm/test/MC/Disassembler/SystemZ/insns.txt +++ b/llvm/test/MC/Disassembler/SystemZ/insns.txt @@ -5356,6 +5356,45 @@ # CHECK: llgf %r15, 0 0xe3 0xf0 0x00 0x00 0x00 0x16 +# CHECK: llgtr %r0, %r15 +0xb9 0x17 0x00 0x0f + +# CHECK: llgtr %r7, %r8 +0xb9 0x17 0x00 0x78 + +# CHECK: llgtr %r15, %r0 +0xb9 0x17 0x00 0xf0 + +# CHECK: llgt %r0, -524288 +0xe3 0x00 0x00 0x00 0x80 0x17 + +# CHECK: llgt %r0, -1 +0xe3 0x00 0x0f 0xff 0xff 0x17 + +# CHECK: llgt %r0, 0 +0xe3 0x00 0x00 0x00 0x00 0x17 + +# CHECK: llgt %r0, 1 +0xe3 0x00 0x00 0x01 0x00 0x17 + +# CHECK: llgt %r0, 524287 +0xe3 0x00 0x0f 0xff 0x7f 0x17 + +# CHECK: llgt %r0, 0(%r1) +0xe3 0x00 0x10 0x00 0x00 0x17 + +# CHECK: llgt %r0, 0(%r15) +0xe3 0x00 0xf0 0x00 0x00 0x17 + +# CHECK: llgt %r0, 524287(%r1,%r15) +0xe3 0x01 0xff 0xff 0x7f 0x17 + +# CHECK: llgt %r0, 524287(%r15,%r1) +0xe3 0x0f 0x1f 0xff 0x7f 0x17 + +# CHECK: llgt %r15, 0 +0xe3 0xf0 0x00 0x00 0x00 0x17 + # CHECK: llghr %r0, %r15 0xb9 0x85 0x00 0x0f diff --git a/llvm/test/MC/SystemZ/insn-bad.s b/llvm/test/MC/SystemZ/insn-bad.s index 49b4a0b..5a2e63a 100644 --- a/llvm/test/MC/SystemZ/insn-bad.s +++ b/llvm/test/MC/SystemZ/insn-bad.s @@ -2081,6 +2081,14 @@ llgc %r0, 524288 #CHECK: error: invalid operand +#CHECK: llgt %r0, -524289 +#CHECK: error: invalid operand +#CHECK: llgt %r0, 524288 + + llgt %r0, -524289 + llgt %r0, 524288 + +#CHECK: error: invalid operand #CHECK: llgf %r0, -524289 #CHECK: error: invalid operand #CHECK: llgf %r0, 524288 diff --git a/llvm/test/MC/SystemZ/insn-good.s b/llvm/test/MC/SystemZ/insn-good.s index 854cf1e..39274b4 100644 --- a/llvm/test/MC/SystemZ/insn-good.s +++ b/llvm/test/MC/SystemZ/insn-good.s @@ -6741,6 +6741,36 @@ llgcr %r7, %r8 llgcr %r15, %r0 +#CHECK: llgt %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x17] +#CHECK: llgt %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x17] +#CHECK: llgt %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x17] +#CHECK: llgt %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x17] +#CHECK: llgt %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x17] +#CHECK: llgt %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x17] +#CHECK: llgt %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x17] +#CHECK: llgt %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x17] +#CHECK: llgt %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x17] +#CHECK: llgt %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x17] + + llgt %r0, -524288 + llgt %r0, -1 + llgt %r0, 0 + llgt %r0, 1 + llgt %r0, 524287 + llgt %r0, 0(%r1) + llgt %r0, 0(%r15) + llgt %r0, 524287(%r1,%r15) + llgt %r0, 524287(%r15,%r1) + llgt %r15, 0 + +#CHECK: llgtr %r0, %r15 # encoding: [0xb9,0x17,0x00,0x0f] +#CHECK: llgtr %r7, %r8 # encoding: [0xb9,0x17,0x00,0x78] +#CHECK: llgtr %r15, %r0 # encoding: [0xb9,0x17,0x00,0xf0] + + llgtr %r0, %r15 + llgtr %r7, %r8 + llgtr %r15, %r0 + #CHECK: llgf %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x16] #CHECK: llgf %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x16] #CHECK: llgf %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x16] -- 2.7.4