From d96f8a3e82f6e64603d2ad563407cb24599a6733 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Wed, 20 Apr 2016 18:28:55 +0000 Subject: [PATCH] [lanai] Add subword scheduling itineraries. Differentiate between word and subword memory operations as they take different amount of cycles to complete. This just adds a basic model of the subword latency to the scheduler. llvm-svn: 266898 --- llvm/lib/Target/Lanai/LanaiInstrInfo.td | 19 ++++++++++++++----- llvm/lib/Target/Lanai/LanaiSchedule.td | 32 ++++++++++++++++++++------------ llvm/test/CodeGen/Lanai/subword.ll | 29 +++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 17 deletions(-) create mode 100644 llvm/test/CodeGen/Lanai/subword.ll diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.td b/llvm/lib/Target/Lanai/LanaiInstrInfo.td index 3cc1363..217cdd4 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.td +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.td @@ -496,6 +496,7 @@ class LoadRR let Q = src{8}; let BBB = src{7-5}; let JJJJJ = src{4-0}; + let mayLoad = 1; } class LoadRI @@ -511,6 +512,7 @@ class LoadRI let Q = src{16}; let imm16 = src{15-0}; let isReMaterializable = 1; + let mayLoad = 1; } let E = 0 in { @@ -554,19 +556,22 @@ def LDADDR : InstSLS<0x0, (outs GPR:$Rd), (ins MEMi:$src), let msb = src{20-16}; let lsb = src{15-0}; let isReMaterializable = 1; + let mayLoad = 1; } class LoadSPLS : InstSPLS<(outs GPR:$Rd), (ins MEMspls:$src), !strconcat(asmstring, "\t$src, $Rd"), [(set (i32 GPR:$Rd), (opNode ADDRspls:$src))]>, - Sched<[WriteLD]> { + Sched<[WriteLDSW]> { bits<17> src; - let Itinerary = IIC_LD; + let Itinerary = IIC_LDSW; let Rs1 = src{16-12}; let P = src{11}; let Q = src{10}; let imm10 = src{9-0}; + let mayLoad = 1; + let isReMaterializable = 1; } let Y = 0, S = 0, E = 1 in @@ -586,10 +591,10 @@ def SLI : InstSLI<(outs GPR:$Rd), (ins i32lo21:$imm), [(set GPR:$Rd, i32lo21:$imm)]> { bits<21> imm; - let Itinerary = IIC_LD; let msb = imm{20-16}; let lsb = imm{15-0}; let isReMaterializable = 1; + let isAsCheapAsAMove = 1; } // -------------------------------------------------- // @@ -610,6 +615,7 @@ class StoreRR let Q = dst{8}; let BBB = dst{7-5}; let JJJJJ = dst{4-0}; + let mayStore = 1; } class StoreRI @@ -624,6 +630,7 @@ class StoreRI let P = dst{17}; let Q = dst{16}; let imm16 = dst{15-0}; + let mayStore = 1; } let YL = 0b01, E = 0 in { @@ -647,20 +654,22 @@ def STADDR : InstSLS<0x1, (outs), (ins GPR:$Rd, MEMi:$dst), let Itinerary = IIC_ST; let msb = dst{20-16}; let lsb = dst{15-0}; + let mayStore = 1; } class StoreSPLS : InstSPLS<(outs), (ins GPR:$Rd, MEMspls:$dst), !strconcat(asmstring, "\t$Rd, $dst"), [(opNode (i32 GPR:$Rd), ADDRspls:$dst)]>, - Sched<[WriteST]> { + Sched<[WriteSTSW]> { bits<17> dst; - let Itinerary = IIC_ST; + let Itinerary = IIC_STSW; let Rs1 = dst{16-12}; let P = dst{11}; let Q = dst{10}; let imm10 = dst{9-0}; + let mayStore = 1; } let Y = 0, S = 1, E = 0 in diff --git a/llvm/lib/Target/Lanai/LanaiSchedule.td b/llvm/lib/Target/Lanai/LanaiSchedule.td index 949a2e2..72c22a6 100644 --- a/llvm/lib/Target/Lanai/LanaiSchedule.td +++ b/llvm/lib/Target/Lanai/LanaiSchedule.td @@ -10,14 +10,18 @@ def ALU_FU : FuncUnit; def LDST_FU : FuncUnit; -def IIC_ALU : InstrItinClass; -def IIC_LD : InstrItinClass; -def IIC_ST : InstrItinClass; +def IIC_ALU : InstrItinClass; +def IIC_LD : InstrItinClass; +def IIC_ST : InstrItinClass; +def IIC_LDSW : InstrItinClass; +def IIC_STSW : InstrItinClass; def LanaiItinerary : ProcessorItineraries<[ALU_FU, LDST_FU],[],[ - InstrItinData]>, - InstrItinData]>, - InstrItinData]> + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; def LanaiSchedModel : SchedMachineModel { @@ -55,12 +59,16 @@ def LanaiSchedModel : SchedMachineModel { def ALU : ProcResource<1> { let BufferSize = 0; } def LdSt : ProcResource<1> { let BufferSize = 0; } -def WriteLD : SchedWrite; -def WriteST : SchedWrite; -def WriteALU : SchedWrite; +def WriteLD : SchedWrite; +def WriteST : SchedWrite; +def WriteLDSW : SchedWrite; +def WriteSTSW : SchedWrite; +def WriteALU : SchedWrite; let SchedModel = LanaiSchedModel in { - def : WriteRes { let Latency = 2; } - def : WriteRes { let Latency = 2; } - def : WriteRes { let Latency = 1; } + def : WriteRes { let Latency = 2; } + def : WriteRes { let Latency = 2; } + def : WriteRes { let Latency = 2; } + def : WriteRes { let Latency = 4; } + def : WriteRes { let Latency = 1; } } diff --git a/llvm/test/CodeGen/Lanai/subword.ll b/llvm/test/CodeGen/Lanai/subword.ll new file mode 100644 index 0000000..c0e1eaf --- /dev/null +++ b/llvm/test/CodeGen/Lanai/subword.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=lanai-unknown-unknown | FileCheck %s + +; Test scheduling of subwords. + +%struct.X = type { i16, i16 } + +define void @f(%struct.X* inreg nocapture %c) #0 { +entry: + %a = getelementptr inbounds %struct.X, %struct.X* %c, i32 0, i32 0 + %0 = load i16, i16* %a, align 2 + %inc = add i16 %0, 1 + store i16 %inc, i16* %a, align 2 + %b = getelementptr inbounds %struct.X, %struct.X* %c, i32 0, i32 1 + %1 = load i16, i16* %b, align 2 + %dec = add i16 %1, -1 + store i16 %dec, i16* %b, align 2 + ret void +} + +; Verify that the two loads occur before the stores. Without memory +; disambiguation and subword schedule, the resultant code was a per subword +; load-modify-store sequence instead of the more optimal schedule where all +; loads occurred before modification and storage. +; CHECK: uld.h +; CHECK-NEXT: uld.h +; CHECK-NEXT: add +; CHECK-NEXT: st.h +; CHECK-NEXT: sub +; CHECK-NEXT: st.h -- 2.7.4