From a68e6f5660b725dc695c0b4dfbb0a4f2de332284 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 25 Jul 2013 08:57:02 +0000 Subject: [PATCH] [SystemZ] Add STOC and STOCG These instructions are allowed to trap even if the condition is false, so for now they are only used for "*ptr = (cond ? x : *ptr)"-style constructs. llvm-svn: 187111 --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 62 +++++--- llvm/lib/Target/SystemZ/SystemZISelLowering.h | 3 +- llvm/lib/Target/SystemZ/SystemZInstrFormats.td | 48 +++++- llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 47 +++--- llvm/lib/Target/SystemZ/SystemZProcessors.td | 11 +- llvm/lib/Target/SystemZ/SystemZSubtarget.cpp | 2 +- llvm/lib/Target/SystemZ/SystemZSubtarget.h | 4 + llvm/test/CodeGen/SystemZ/cond-store-03.ll | 2 +- llvm/test/CodeGen/SystemZ/cond-store-04.ll | 2 +- llvm/test/CodeGen/SystemZ/cond-store-07.ll | 186 ++++++++++++++++++++++++ llvm/test/CodeGen/SystemZ/cond-store-08.ll | 124 ++++++++++++++++ llvm/test/MC/Disassembler/SystemZ/insns.txt | 96 ++++++++++++ llvm/test/MC/SystemZ/insn-bad-z196.s | 34 +++++ llvm/test/MC/SystemZ/insn-good-z196.s | 96 ++++++++++++ 14 files changed, 664 insertions(+), 53 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/cond-store-07.ll create mode 100644 llvm/test/CodeGen/SystemZ/cond-store-08.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index e6e6d02..e70f775 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1773,22 +1773,36 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. // StoreOpcode is the store to use and Invert says whether the store should -// happen when the condition is false rather than true. +// happen when the condition is false rather than true. If a STORE ON +// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. MachineBasicBlock * SystemZTargetLowering::emitCondStore(MachineInstr *MI, MachineBasicBlock *MBB, - unsigned StoreOpcode, bool Invert) const { + unsigned StoreOpcode, unsigned STOCOpcode, + bool Invert) const { const SystemZInstrInfo *TII = TM.getInstrInfo(); - MachineOperand Base = MI->getOperand(0); - int64_t Disp = MI->getOperand(1).getImm(); - unsigned IndexReg = MI->getOperand(2).getReg(); - unsigned SrcReg = MI->getOperand(3).getReg(); + unsigned SrcReg = MI->getOperand(0).getReg(); + MachineOperand Base = MI->getOperand(1); + int64_t Disp = MI->getOperand(2).getImm(); + unsigned IndexReg = MI->getOperand(3).getReg(); unsigned CCMask = MI->getOperand(4).getImm(); DebugLoc DL = MI->getDebugLoc(); StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); + // Use STOCOpcode if possible. We could use different store patterns in + // order to avoid matching the index register, but the performance trade-offs + // might be more complicated in that case. + if (STOCOpcode && !IndexReg && TM.getSubtargetImpl()->hasLoadStoreOnCond()) { + if (Invert) + CCMask = CCMask ^ SystemZ::CCMASK_ANY; + BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) + .addReg(SrcReg).addOperand(Base).addImm(Disp).addImm(CCMask); + MI->eraseFromParent(); + return MBB; + } + // Get the condition needed to branch around the store. if (!Invert) CCMask = CCMask ^ SystemZ::CCMASK_ANY; @@ -2249,41 +2263,41 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { return emitSelect(MI, MBB); case SystemZ::CondStore8_32: - return emitCondStore(MI, MBB, SystemZ::STC32, false); + return emitCondStore(MI, MBB, SystemZ::STC32, 0, false); case SystemZ::CondStore8_32Inv: - return emitCondStore(MI, MBB, SystemZ::STC32, true); + return emitCondStore(MI, MBB, SystemZ::STC32, 0, true); case SystemZ::CondStore16_32: - return emitCondStore(MI, MBB, SystemZ::STH32, false); + return emitCondStore(MI, MBB, SystemZ::STH32, 0, false); case SystemZ::CondStore16_32Inv: - return emitCondStore(MI, MBB, SystemZ::STH32, true); + return emitCondStore(MI, MBB, SystemZ::STH32, 0, true); case SystemZ::CondStore32_32: - return emitCondStore(MI, MBB, SystemZ::ST32, false); + return emitCondStore(MI, MBB, SystemZ::ST32, SystemZ::STOC32, false); case SystemZ::CondStore32_32Inv: - return emitCondStore(MI, MBB, SystemZ::ST32, true); + return emitCondStore(MI, MBB, SystemZ::ST32, SystemZ::STOC32, true); case SystemZ::CondStore8: - return emitCondStore(MI, MBB, SystemZ::STC, false); + return emitCondStore(MI, MBB, SystemZ::STC, 0, false); case SystemZ::CondStore8Inv: - return emitCondStore(MI, MBB, SystemZ::STC, true); + return emitCondStore(MI, MBB, SystemZ::STC, 0, true); case SystemZ::CondStore16: - return emitCondStore(MI, MBB, SystemZ::STH, false); + return emitCondStore(MI, MBB, SystemZ::STH, 0, false); case SystemZ::CondStore16Inv: - return emitCondStore(MI, MBB, SystemZ::STH, true); + return emitCondStore(MI, MBB, SystemZ::STH, 0, true); case SystemZ::CondStore32: - return emitCondStore(MI, MBB, SystemZ::ST, false); + return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); case SystemZ::CondStore32Inv: - return emitCondStore(MI, MBB, SystemZ::ST, true); + return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); case SystemZ::CondStore64: - return emitCondStore(MI, MBB, SystemZ::STG, false); + return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); case SystemZ::CondStore64Inv: - return emitCondStore(MI, MBB, SystemZ::STG, true); + return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); case SystemZ::CondStoreF32: - return emitCondStore(MI, MBB, SystemZ::STE, false); + return emitCondStore(MI, MBB, SystemZ::STE, 0, false); case SystemZ::CondStoreF32Inv: - return emitCondStore(MI, MBB, SystemZ::STE, true); + return emitCondStore(MI, MBB, SystemZ::STE, 0, true); case SystemZ::CondStoreF64: - return emitCondStore(MI, MBB, SystemZ::STD, false); + return emitCondStore(MI, MBB, SystemZ::STD, 0, false); case SystemZ::CondStoreF64Inv: - return emitCondStore(MI, MBB, SystemZ::STD, true); + return emitCondStore(MI, MBB, SystemZ::STD, 0, true); case SystemZ::AEXT128_64: return emitExt128(MI, MBB, false, SystemZ::subreg_low); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 88e1fa7..ce876a9 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -211,7 +211,8 @@ private: MachineBasicBlock *BB) const; MachineBasicBlock *emitCondStore(MachineInstr *MI, MachineBasicBlock *BB, - unsigned StoreOpcode, bool Invert) const; + unsigned StoreOpcode, unsigned STOCOpcode, + bool Invert) const; MachineBasicBlock *emitExt128(MachineInstr *MI, MachineBasicBlock *MBB, bool ClearEven, unsigned SubReg) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 9257a6a0..b92c350 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -451,9 +451,11 @@ class InstSS op, dag outs, dag ins, string asmstr, list pattern> // Instruction definitions with semantics //===----------------------------------------------------------------------===// // -// These classes have the form , where is one +// These classes have the form [Cond], where is one // of the formats defined above and where describes the inputs -// and outputs. can be one of: +// and outputs. "Cond" is used if the instruction is conditional, +// in which case the 4-bit condition-code mask is added as a final operand. +// can be one of: // // Inherent: // One register output operand and no input operands. @@ -618,6 +620,40 @@ multiclass StoreSIPair siOpcode, bits<16> siyOpcode, } } +class CondStoreRSY opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; +} + +// Like CondStoreRSY, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondStoreRSY opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; +} + +// Like CondStoreRSY, but with a fixed CC mask. +class FixedCondStoreRSY opcode, + RegisterOperand cls, bits<4> ccmask, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; + let R3 = ccmask; +} + class UnaryRR opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR multiclass CondStores { let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in { - def "" : Pseudo<(outs), (ins mode:$addr, cls:$new, i8imm:$cc), + def "" : Pseudo<(outs), (ins cls:$new, mode:$addr, uimm8zx4:$cc), [(store (z_select_ccmask cls:$new, (load mode:$addr), - imm:$cc), mode:$addr)]>; - def Inv : Pseudo<(outs), (ins mode:$addr, cls:$new, i8imm:$cc), + uimm8zx4:$cc), mode:$addr)]>; + def Inv : Pseudo<(outs), (ins cls:$new, mode:$addr, uimm8zx4:$cc), [(store (z_select_ccmask (load mode:$addr), cls:$new, - imm:$cc), mode:$addr)]>; + uimm8zx4:$cc), mode:$addr)]>; } } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 56b7a1f..bda34df 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -107,26 +107,28 @@ defm AsmC : CompareBranches; // (integer or floating-point) multiclass CondExtendedMnemonic ccmask, string name> { let R1 = ccmask in { - def "" : InstRI<0xA74, (outs), (ins brtarget16:$I2), - "j"##name##"\t$I2", []>; - def L : InstRIL<0xC04, (outs), (ins brtarget32:$I2), + def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), + "j"##name##"\t$I2", []>; + def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), "jg"##name##"\t$I2", []>; } + def STOC : FixedCondStoreRSY<"stoc"##name, 0xEBF3, GR32, ccmask, 4>; + def STOCG : FixedCondStoreRSY<"stocg"##name, 0xEBE3, GR64, ccmask, 8>; } -defm AsmJO : CondExtendedMnemonic<1, "o">; -defm AsmJH : CondExtendedMnemonic<2, "h">; -defm AsmJNLE : CondExtendedMnemonic<3, "nle">; -defm AsmJL : CondExtendedMnemonic<4, "l">; -defm AsmJNHE : CondExtendedMnemonic<5, "nhe">; -defm AsmJLH : CondExtendedMnemonic<6, "lh">; -defm AsmJNE : CondExtendedMnemonic<7, "ne">; -defm AsmJE : CondExtendedMnemonic<8, "e">; -defm AsmJNLH : CondExtendedMnemonic<9, "nlh">; -defm AsmJHE : CondExtendedMnemonic<10, "he">; -defm AsmJNL : CondExtendedMnemonic<11, "nl">; -defm AsmJLE : CondExtendedMnemonic<12, "le">; -defm AsmJNH : CondExtendedMnemonic<13, "nh">; -defm AsmJNO : CondExtendedMnemonic<14, "no">; +defm AsmO : CondExtendedMnemonic<1, "o">; +defm AsmH : CondExtendedMnemonic<2, "h">; +defm AsmNLE : CondExtendedMnemonic<3, "nle">; +defm AsmL : CondExtendedMnemonic<4, "l">; +defm AsmNHE : CondExtendedMnemonic<5, "nhe">; +defm AsmLH : CondExtendedMnemonic<6, "lh">; +defm AsmNE : CondExtendedMnemonic<7, "ne">; +defm AsmE : CondExtendedMnemonic<8, "e">; +defm AsmNLH : CondExtendedMnemonic<9, "nlh">; +defm AsmHE : CondExtendedMnemonic<10, "he">; +defm AsmNL : CondExtendedMnemonic<11, "nl">; +defm AsmLE : CondExtendedMnemonic<12, "le">; +defm AsmNH : CondExtendedMnemonic<13, "nh">; +defm AsmNO : CondExtendedMnemonic<14, "no">; // Define AsmParser mnemonics for each integer condition-code mask. // This is like the list above, except that condition 3 is not possible @@ -274,6 +276,17 @@ let isCodeGenOnly = 1 in def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>; +// Store on condition. +let isCodeGenOnly = 1, Uses = [CC] in { + def STOC32 : CondStoreRSY<"stoc", 0xEBF3, GR32, 4>; + def STOC : CondStoreRSY<"stoc", 0xEBF3, GR64, 4>; + def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>; +} +let Uses = [CC] in { + def AsmSTOC : AsmCondStoreRSY<"stoc", 0xEBF3, GR32, 4>; + def AsmSTOCG : AsmCondStoreRSY<"stocg", 0xEBE3, GR64, 8>; +} + // 8-bit immediate stores to 8-bit fields. defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>; diff --git a/llvm/lib/Target/SystemZ/SystemZProcessors.td b/llvm/lib/Target/SystemZ/SystemZProcessors.td index 5668ae3..96fa6a4 100644 --- a/llvm/lib/Target/SystemZ/SystemZProcessors.td +++ b/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -21,6 +21,13 @@ def FeatureDistinctOps : SystemZFeature< "Assume that the distinct-operands facility is installed" >; +def FeatureLoadStoreOnCond : SystemZFeature< + "load-store-on-cond", "LoadStoreOnCond", + "Assume that the load/store-on-condition facility is installed" +>; + def : Processor<"z10", NoItineraries, []>; -def : Processor<"z196", NoItineraries, [FeatureDistinctOps]>; -def : Processor<"zEC12", NoItineraries, [FeatureDistinctOps]>; +def : Processor<"z196", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond]>; +def : Processor<"zEC12", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond]>; diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index f37ea21..43ac1ea 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -21,7 +21,7 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS) : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), - TargetTriple(TT) { + HasLoadStoreOnCond(false), TargetTriple(TT) { std::string CPUName = CPU; if (CPUName.empty()) CPUName = "z10"; diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h index 4a86287..9d5dfc8a 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -28,6 +28,7 @@ class StringRef; class SystemZSubtarget : public SystemZGenSubtargetInfo { protected: bool HasDistinctOps; + bool HasLoadStoreOnCond; private: Triple TargetTriple; @@ -42,6 +43,9 @@ public: // Return true if the target has the distinct-operands facility. bool hasDistinctOps() const { return HasDistinctOps; } + // Return true if the target has the load/store-on-condition facility. + bool hasLoadStoreOnCond() const { return HasLoadStoreOnCond; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, diff --git a/llvm/test/CodeGen/SystemZ/cond-store-03.ll b/llvm/test/CodeGen/SystemZ/cond-store-03.ll index ba076b0..e2aeecf 100644 --- a/llvm/test/CodeGen/SystemZ/cond-store-03.ll +++ b/llvm/test/CodeGen/SystemZ/cond-store-03.ll @@ -1,6 +1,6 @@ ; Test 32-bit conditional stores that are presented as selects. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s declare void @foo(i32 *) diff --git a/llvm/test/CodeGen/SystemZ/cond-store-04.ll b/llvm/test/CodeGen/SystemZ/cond-store-04.ll index f00c94c..1830f27 100644 --- a/llvm/test/CodeGen/SystemZ/cond-store-04.ll +++ b/llvm/test/CodeGen/SystemZ/cond-store-04.ll @@ -1,6 +1,6 @@ ; Test 64-bit conditional stores that are presented as selects. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s declare void @foo(i64 *) diff --git a/llvm/test/CodeGen/SystemZ/cond-store-07.ll b/llvm/test/CodeGen/SystemZ/cond-store-07.ll new file mode 100644 index 0000000..291360b --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-store-07.ll @@ -0,0 +1,186 @@ +; Test STOCs that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare void @foo(i32 *) + +; Test the simple case, with the loaded value first. +define void @f1(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK: clfi %r4, 42 +; CHECK: stocnl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK: clfi %r4, 42 +; CHECK: stocl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %alt, i32 %orig + store i32 %res, i32 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 64 bits, with the +; loaded value first. +define void @f3(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK: clfi %r4, 42 +; CHECK: stocnl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = sext i32 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f4(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK: clfi %r4, 42 +; CHECK: stocl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = sext i32 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 32 bits, with the +; loaded value first. +define void @f5(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK: clfi %r4, 42 +; CHECK: stocnl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = zext i32 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f6(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK: clfi %r4, 42 +; CHECK: stocl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = zext i32 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; Check the high end of the aligned STOC range. +define void @f7(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK: clfi %r4, 42 +; CHECK: stocnl %r3, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131071 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word up. Other sequences besides this one would be OK. +define void @f8(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f8: +; CHECK: agfi %r2, 524288 +; CHECK: clfi %r4, 42 +; CHECK: stocnl %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131072 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the low end of the STOC range. +define void @f9(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f9: +; CHECK: clfi %r4, 42 +; CHECK: stocnl %r3, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131072 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word down, with the same comments as f8. +define void @f10(i32 *%base, i32 %alt, i32 %limit) { +; CHECK-LABEL: f10: +; CHECK: agfi %r2, -524292 +; CHECK: clfi %r4, 42 +; CHECK: stocnl %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131073 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Try a frame index base. +define void @f11(i32 %alt, i32 %limit) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: stocnl {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i32 + call void @foo(i32 *%ptr) + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + call void @foo(i32 *%ptr) + ret void +} + +; Test that conditionally-executed stores do not use STOC, since STOC +; is allowed to trap even when the condition is false. +define void @f12(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f12: +; CHECK-NOT: stoc +; CHECK: br %r14 +entry: + %cmp = icmp ule i32 %a, %b + br i1 %cmp, label %store, label %exit + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/cond-store-08.ll b/llvm/test/CodeGen/SystemZ/cond-store-08.ll new file mode 100644 index 0000000..d67281c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-store-08.ll @@ -0,0 +1,124 @@ +; Test STOCGs that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare void @foo(i64 *) + +; Test with the loaded value first. +define void @f1(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f1: +; CHECK: clfi %r4, 42 +; CHECK: stocgnl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK-LABEL: f2: +; CHECK: clfi %r4, 42 +; CHECK: stocgl %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %alt, i64 %orig + store i64 %res, i64 *%ptr + ret void +} + +; Check the high end of the aligned STOCG range. +define void @f3(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f3: +; CHECK: clfi %r4, 42 +; CHECK: stocgnl %r3, 524280(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65535 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the next doubleword up. Other sequences besides this one would be OK. +define void @f4(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f4: +; CHECK: agfi %r2, 524288 +; CHECK: clfi %r4, 42 +; CHECK: stocgnl %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65536 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the low end of the STOCG range. +define void @f5(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f5: +; CHECK: clfi %r4, 42 +; CHECK: stocgnl %r3, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65536 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the next doubleword down, with the same comments as f4. +define void @f6(i64 *%base, i64 %alt, i32 %limit) { +; CHECK-LABEL: f6: +; CHECK: agfi %r2, -524296 +; CHECK: clfi %r4, 42 +; CHECK: stocgnl %r3, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65537 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Try a frame index base. +define void @f7(i64 %alt, i32 %limit) { +; CHECK-LABEL: f7: +; CHECK: brasl %r14, foo@PLT +; CHECK: stocgnl {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i64 + call void @foo(i64 *%ptr) + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + call void @foo(i64 *%ptr) + ret void +} + +; Test that conditionally-executed stores do not use STOC, since STOC +; is allowed to trap even when the condition is false. +define void @f8(i64 %a, i64 %b, i64 *%dest) { +; CHECK-LABEL: f8: +; CHECK-NOT: stocg %r3, 0(%r4) +; CHECK: br %r14 +entry: + %cmp = icmp ule i64 %a, %b + br i1 %cmp, label %store, label %exit + +store: + store i64 %b, i64 *%dest + br label %exit + +exit: + ret void +} diff --git a/llvm/test/MC/Disassembler/SystemZ/insns.txt b/llvm/test/MC/Disassembler/SystemZ/insns.txt index 29ff69e..7728a51 100644 --- a/llvm/test/MC/Disassembler/SystemZ/insns.txt +++ b/llvm/test/MC/Disassembler/SystemZ/insns.txt @@ -6106,6 +6106,102 @@ # CHECK: st %r15, 0 0x50 0xf0 0x00 0x00 +# CHECK: stoc %r1, 2(%r3), 0 +0xeb 0x10 0x30 0x02 0x00 0xf3 + +# CHECK: stoco %r1, 2(%r3) +0xeb 0x11 0x30 0x02 0x00 0xf3 + +# CHECK: stoch %r1, 2(%r3) +0xeb 0x12 0x30 0x02 0x00 0xf3 + +# CHECK: stocnle %r1, 2(%r3) +0xeb 0x13 0x30 0x02 0x00 0xf3 + +# CHECK: stocl %r1, 2(%r3) +0xeb 0x14 0x30 0x02 0x00 0xf3 + +# CHECK: stocnhe %r1, 2(%r3) +0xeb 0x15 0x30 0x02 0x00 0xf3 + +# CHECK: stoclh %r1, 2(%r3) +0xeb 0x16 0x30 0x02 0x00 0xf3 + +# CHECK: stocne %r1, 2(%r3) +0xeb 0x17 0x30 0x02 0x00 0xf3 + +# CHECK: stoce %r1, 2(%r3) +0xeb 0x18 0x30 0x02 0x00 0xf3 + +# CHECK: stocnlh %r1, 2(%r3) +0xeb 0x19 0x30 0x02 0x00 0xf3 + +# CHECK: stoche %r1, 2(%r3) +0xeb 0x1a 0x30 0x02 0x00 0xf3 + +# CHECK: stocnl %r1, 2(%r3) +0xeb 0x1b 0x30 0x02 0x00 0xf3 + +# CHECK: stocle %r1, 2(%r3) +0xeb 0x1c 0x30 0x02 0x00 0xf3 + +# CHECK: stocnh %r1, 2(%r3) +0xeb 0x1d 0x30 0x02 0x00 0xf3 + +# CHECK: stocno %r1, 2(%r3) +0xeb 0x1e 0x30 0x02 0x00 0xf3 + +# CHECK: stoc %r1, 2(%r3), 15 +0xeb 0x1f 0x30 0x02 0x00 0xf3 + +# CHECK: stocg %r1, 2(%r3), 0 +0xeb 0x10 0x30 0x02 0x00 0xe3 + +# CHECK: stocgo %r1, 2(%r3) +0xeb 0x11 0x30 0x02 0x00 0xe3 + +# CHECK: stocgh %r1, 2(%r3) +0xeb 0x12 0x30 0x02 0x00 0xe3 + +# CHECK: stocgnle %r1, 2(%r3) +0xeb 0x13 0x30 0x02 0x00 0xe3 + +# CHECK: stocgl %r1, 2(%r3) +0xeb 0x14 0x30 0x02 0x00 0xe3 + +# CHECK: stocgnhe %r1, 2(%r3) +0xeb 0x15 0x30 0x02 0x00 0xe3 + +# CHECK: stocglh %r1, 2(%r3) +0xeb 0x16 0x30 0x02 0x00 0xe3 + +# CHECK: stocgne %r1, 2(%r3) +0xeb 0x17 0x30 0x02 0x00 0xe3 + +# CHECK: stocge %r1, 2(%r3) +0xeb 0x18 0x30 0x02 0x00 0xe3 + +# CHECK: stocgnlh %r1, 2(%r3) +0xeb 0x19 0x30 0x02 0x00 0xe3 + +# CHECK: stocghe %r1, 2(%r3) +0xeb 0x1a 0x30 0x02 0x00 0xe3 + +# CHECK: stocgnl %r1, 2(%r3) +0xeb 0x1b 0x30 0x02 0x00 0xe3 + +# CHECK: stocgle %r1, 2(%r3) +0xeb 0x1c 0x30 0x02 0x00 0xe3 + +# CHECK: stocgnh %r1, 2(%r3) +0xeb 0x1d 0x30 0x02 0x00 0xe3 + +# CHECK: stocgno %r1, 2(%r3) +0xeb 0x1e 0x30 0x02 0x00 0xe3 + +# CHECK: stocg %r1, 2(%r3), 15 +0xeb 0x1f 0x30 0x02 0x00 0xe3 + # CHECK: s %r0, 0 0x5b 0x00 0x00 0x00 diff --git a/llvm/test/MC/SystemZ/insn-bad-z196.s b/llvm/test/MC/SystemZ/insn-bad-z196.s index b4dc529..f62ea74 100644 --- a/llvm/test/MC/SystemZ/insn-bad-z196.s +++ b/llvm/test/MC/SystemZ/insn-bad-z196.s @@ -65,3 +65,37 @@ srlk %r0,%r0,524288 srlk %r0,%r0,0(%r0) srlk %r0,%r0,0(%r1,%r2) + +#CHECK: error: invalid operand +#CHECK: stoc %r0,0,-1 +#CHECK: error: invalid operand +#CHECK: stoc %r0,0,16 +#CHECK: error: invalid operand +#CHECK: stoc %r0,-524289,1 +#CHECK: error: invalid operand +#CHECK: stoc %r0,524288,1 +#CHECK: error: invalid use of indexed addressing +#CHECK: stoc %r0,0(%r1,%r2),1 + + stoc %r0,0,-1 + stoc %r0,0,16 + stoc %r0,-524289,1 + stoc %r0,524288,1 + stoc %r0,0(%r1,%r2),1 + +#CHECK: error: invalid operand +#CHECK: stocg %r0,0,-1 +#CHECK: error: invalid operand +#CHECK: stocg %r0,0,16 +#CHECK: error: invalid operand +#CHECK: stocg %r0,-524289,1 +#CHECK: error: invalid operand +#CHECK: stocg %r0,524288,1 +#CHECK: error: invalid use of indexed addressing +#CHECK: stocg %r0,0(%r1,%r2),1 + + stocg %r0,0,-1 + stocg %r0,0,16 + stocg %r0,-524289,1 + stocg %r0,524288,1 + stocg %r0,0(%r1,%r2),1 diff --git a/llvm/test/MC/SystemZ/insn-good-z196.s b/llvm/test/MC/SystemZ/insn-good-z196.s index 422c689e..5b0ed59 100644 --- a/llvm/test/MC/SystemZ/insn-good-z196.s +++ b/llvm/test/MC/SystemZ/insn-good-z196.s @@ -295,6 +295,102 @@ srlk %r0,%r0,524287(%r1) srlk %r0,%r0,524287(%r15) +#CHECK: stoc %r0, 0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0xf3] +#CHECK: stoc %r0, 0, 15 # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xf3] +#CHECK: stoc %r0, -524288, 0 # encoding: [0xeb,0x00,0x00,0x00,0x80,0xf3] +#CHECK: stoc %r0, 524287, 0 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xf3] +#CHECK: stoc %r0, 0(%r1), 0 # encoding: [0xeb,0x00,0x10,0x00,0x00,0xf3] +#CHECK: stoc %r0, 0(%r15), 0 # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xf3] +#CHECK: stoc %r15, 0, 0 # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xf3] +#CHECK: stoc %r1, 4095(%r2), 3 # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xf3] + + stoc %r0,0,0 + stoc %r0,0,15 + stoc %r0,-524288,0 + stoc %r0,524287,0 + stoc %r0,0(%r1),0 + stoc %r0,0(%r15),0 + stoc %r15,0,0 + stoc %r1,4095(%r2),3 + +#CHECK: stoco %r1, 2(%r3) # encoding: [0xeb,0x11,0x30,0x02,0x00,0xf3] +#CHECK: stoch %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xf3] +#CHECK: stocnle %r1, 2(%r3) # encoding: [0xeb,0x13,0x30,0x02,0x00,0xf3] +#CHECK: stocl %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xf3] +#CHECK: stocnhe %r1, 2(%r3) # encoding: [0xeb,0x15,0x30,0x02,0x00,0xf3] +#CHECK: stoclh %r1, 2(%r3) # encoding: [0xeb,0x16,0x30,0x02,0x00,0xf3] +#CHECK: stocne %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xf3] +#CHECK: stoce %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xf3] +#CHECK: stocnlh %r1, 2(%r3) # encoding: [0xeb,0x19,0x30,0x02,0x00,0xf3] +#CHECK: stoche %r1, 2(%r3) # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xf3] +#CHECK: stocnl %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xf3] +#CHECK: stocle %r1, 2(%r3) # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xf3] +#CHECK: stocnh %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xf3] +#CHECK: stocno %r1, 2(%r3) # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xf3] + + stoco %r1,2(%r3) + stoch %r1,2(%r3) + stocnle %r1,2(%r3) + stocl %r1,2(%r3) + stocnhe %r1,2(%r3) + stoclh %r1,2(%r3) + stocne %r1,2(%r3) + stoce %r1,2(%r3) + stocnlh %r1,2(%r3) + stoche %r1,2(%r3) + stocnl %r1,2(%r3) + stocle %r1,2(%r3) + stocnh %r1,2(%r3) + stocno %r1,2(%r3) + +#CHECK: stocg %r0, 0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0xe3] +#CHECK: stocg %r0, 0, 15 # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xe3] +#CHECK: stocg %r0, -524288, 0 # encoding: [0xeb,0x00,0x00,0x00,0x80,0xe3] +#CHECK: stocg %r0, 524287, 0 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xe3] +#CHECK: stocg %r0, 0(%r1), 0 # encoding: [0xeb,0x00,0x10,0x00,0x00,0xe3] +#CHECK: stocg %r0, 0(%r15), 0 # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xe3] +#CHECK: stocg %r15, 0, 0 # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xe3] +#CHECK: stocg %r1, 4095(%r2), 3 # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xe3] + + stocg %r0,0,0 + stocg %r0,0,15 + stocg %r0,-524288,0 + stocg %r0,524287,0 + stocg %r0,0(%r1),0 + stocg %r0,0(%r15),0 + stocg %r15,0,0 + stocg %r1,4095(%r2),3 + +#CHECK: stocgo %r1, 2(%r3) # encoding: [0xeb,0x11,0x30,0x02,0x00,0xe3] +#CHECK: stocgh %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe3] +#CHECK: stocgnle %r1, 2(%r3) # encoding: [0xeb,0x13,0x30,0x02,0x00,0xe3] +#CHECK: stocgl %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe3] +#CHECK: stocgnhe %r1, 2(%r3) # encoding: [0xeb,0x15,0x30,0x02,0x00,0xe3] +#CHECK: stocglh %r1, 2(%r3) # encoding: [0xeb,0x16,0x30,0x02,0x00,0xe3] +#CHECK: stocgne %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe3] +#CHECK: stocge %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe3] +#CHECK: stocgnlh %r1, 2(%r3) # encoding: [0xeb,0x19,0x30,0x02,0x00,0xe3] +#CHECK: stocghe %r1, 2(%r3) # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xe3] +#CHECK: stocgnl %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe3] +#CHECK: stocgle %r1, 2(%r3) # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xe3] +#CHECK: stocgnh %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe3] +#CHECK: stocgno %r1, 2(%r3) # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xe3] + + stocgo %r1,2(%r3) + stocgh %r1,2(%r3) + stocgnle %r1,2(%r3) + stocgl %r1,2(%r3) + stocgnhe %r1,2(%r3) + stocglh %r1,2(%r3) + stocgne %r1,2(%r3) + stocge %r1,2(%r3) + stocgnlh %r1,2(%r3) + stocghe %r1,2(%r3) + stocgnl %r1,2(%r3) + stocgle %r1,2(%r3) + stocgnh %r1,2(%r3) + stocgno %r1,2(%r3) + #CHECK: xgrk %r0, %r0, %r0 # encoding: [0xb9,0xe7,0x00,0x00] #CHECK: xgrk %r0, %r0, %r15 # encoding: [0xb9,0xe7,0xf0,0x00] #CHECK: xgrk %r0, %r15, %r0 # encoding: [0xb9,0xe7,0x00,0x0f] -- 2.7.4