From 645c3014a12cc24b75556d93db2ecfa4b99f8635 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 1 Apr 2016 01:09:05 +0000 Subject: [PATCH] [NVPTX] Annotate some instructions as hasSideEffects = 0. Summary: Tablegen tries to infer this from the selection DAG patterns defined for the instructions, but it can't always. An instructive example is CLZr64. CLZr32 is correctly inferred to have no side-effects, but the selection DAG pattern for CLZr64 is slightly more complicated, and in particular the ctlz DAG node is not at the root of the pattern. Thus tablegen can't infer that CLZr64 has no side-effects. Reviewers: jholewinski Subscribers: jholewinski, tra, llvm-commits Differential Revision: http://reviews.llvm.org/D17472 llvm-svn: 265089 --- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 230 +++++++++++++++++-------------- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 87 ++++++------ 2 files changed, 171 insertions(+), 146 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 6a2c328..65de673 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -14,7 +14,9 @@ include "NVPTXInstrFormats.td" // A NOP instruction -def NOP : NVPTXInst<(outs), (ins), "", []>; +let hasSideEffects = 0 in { + def NOP : NVPTXInst<(outs), (ins), "", []>; +} // List of vector specific properties def isVecLD : VecInstTypeEnum<1>; @@ -1227,10 +1229,12 @@ multiclass BFE { !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; } -defm BFE_S32 : BFE<"s32", Int32Regs>; -defm BFE_U32 : BFE<"u32", Int32Regs>; -defm BFE_S64 : BFE<"s64", Int64Regs>; -defm BFE_U64 : BFE<"u64", Int64Regs>; +let hasSideEffects = 0 in { + defm BFE_S32 : BFE<"s32", Int32Regs>; + defm BFE_U32 : BFE<"u32", Int32Regs>; + defm BFE_S64 : BFE<"s64", Int64Regs>; + defm BFE_U64 : BFE<"u64", Int64Regs>; +} //----------------------------------- // Comparison instructions (setp, set) @@ -1239,19 +1243,21 @@ defm BFE_U64 : BFE<"u64", Int64Regs>; // FIXME: This doesn't cover versions of set and setp that combine with a // boolean predicate, e.g. setp.eq.and.b16. -multiclass SETP { - def rr : - NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp), - !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, - "\t$dst, $a, $b;"), []>; - def ri : - NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp), - !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, - "\t$dst, $a, $b;"), []>; - def ir : - NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp), - !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, - "\t$dst, $a, $b;"), []>; +let hasSideEffects = 0 in { + multiclass SETP { + def rr : + NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, + "\t$dst, $a, $b;"), []>; + def ri : + NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, + "\t$dst, $a, $b;"), []>; + def ir : + NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, + "\t$dst, $a, $b;"), []>; + } } defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>; @@ -1270,16 +1276,18 @@ defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; // "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination // reg, either u32, s32, or f32. Anyway these aren't used at the moment. -multiclass SET { - def rr : NVPTXInst<(outs Int32Regs:$dst), - (ins RC:$a, RC:$b, CmpMode:$cmp), - !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; - def ri : NVPTXInst<(outs Int32Regs:$dst), - (ins RC:$a, ImmCls:$b, CmpMode:$cmp), - !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; - def ir : NVPTXInst<(outs Int32Regs:$dst), - (ins ImmCls:$a, RC:$b, CmpMode:$cmp), - !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; +let hasSideEffects = 0 in { + multiclass SET { + def rr : NVPTXInst<(outs Int32Regs:$dst), + (ins RC:$a, RC:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; + def ri : NVPTXInst<(outs Int32Regs:$dst), + (ins RC:$a, ImmCls:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; + def ir : NVPTXInst<(outs Int32Regs:$dst), + (ins ImmCls:$a, RC:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; + } } defm SET_b16 : SET<"b16", Int16Regs, i16imm>; @@ -1302,43 +1310,45 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>; // selp instructions that don't have any pattern matches; we explicitly use // them within this file. -multiclass SELP { - def rr : NVPTXInst<(outs RC:$dst), - (ins RC:$a, RC:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; - def ri : NVPTXInst<(outs RC:$dst), - (ins RC:$a, ImmCls:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; - def ir : NVPTXInst<(outs RC:$dst), - (ins ImmCls:$a, RC:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; - def ii : NVPTXInst<(outs RC:$dst), - (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; -} +let hasSideEffects = 0 in { + multiclass SELP { + def rr : NVPTXInst<(outs RC:$dst), + (ins RC:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; + def ri : NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; + def ir : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; + def ii : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; + } -multiclass SELP_PATTERN { - def rr : - NVPTXInst<(outs RC:$dst), - (ins RC:$a, RC:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>; - def ri : - NVPTXInst<(outs RC:$dst), - (ins RC:$a, ImmCls:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>; - def ir : - NVPTXInst<(outs RC:$dst), - (ins ImmCls:$a, RC:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>; - def ii : - NVPTXInst<(outs RC:$dst), - (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; + multiclass SELP_PATTERN { + def rr : + NVPTXInst<(outs RC:$dst), + (ins RC:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>; + def ri : + NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>; + def ir : + NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>; + def ii : + NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; + } } // Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as @@ -1397,14 +1407,16 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; // Get pointer to local stack. -def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), - "mov.u32 \t$d, __local_depot$num;", []>; -def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), - "mov.u64 \t$d, __local_depot$num;", []>; +let hasSideEffects = 0 in { + def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), + "mov.u32 \t$d, __local_depot$num;", []>; + def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), + "mov.u64 \t$d, __local_depot$num;", []>; +} // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp -let IsSimpleMove=1 in { +let IsSimpleMove=1, hasSideEffects=0 in { def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), "mov.pred \t$dst, $sss;", []>; def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), @@ -2512,41 +2524,45 @@ def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -// pack a set of smaller int registers to a larger int register -def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), - (ins Int16Regs:$s1, Int16Regs:$s2, - Int16Regs:$s3, Int16Regs:$s4), - "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", []>; -def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), - (ins Int16Regs:$s1, Int16Regs:$s2), - "mov.b32\t$d, {{$s1, $s2}};", []>; -def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), - (ins Int32Regs:$s1, Int32Regs:$s2), - "mov.b64\t$d, {{$s1, $s2}};", []>; -def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), - (ins Float32Regs:$s1, Float32Regs:$s2), - "mov.b64\t$d, {{$s1, $s2}};", []>; - -// unpack a larger int register to a set of smaller int registers -def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, - Int16Regs:$d3, Int16Regs:$d4), - (ins Int64Regs:$s), - "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", []>; -def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), - (ins Int32Regs:$s), - "mov.b32\t{{$d1, $d2}}, $s;", []>; -def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), - (ins Int64Regs:$s), - "mov.b64\t{{$d1, $d2}}, $s;", []>; -def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), - (ins Float64Regs:$s), - "mov.b64\t{{$d1, $d2}}, $s;", []>; +let hasSideEffects = 0 in { + // pack a set of smaller int registers to a larger int register + def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), + (ins Int16Regs:$s1, Int16Regs:$s2, + Int16Regs:$s3, Int16Regs:$s4), + "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", []>; + def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), + (ins Int16Regs:$s1, Int16Regs:$s2), + "mov.b32\t$d, {{$s1, $s2}};", []>; + def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), + (ins Int32Regs:$s1, Int32Regs:$s2), + "mov.b64\t$d, {{$s1, $s2}};", []>; + def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), + (ins Float32Regs:$s1, Float32Regs:$s2), + "mov.b64\t$d, {{$s1, $s2}};", []>; + + // unpack a larger int register to a set of smaller int registers + def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, + Int16Regs:$d3, Int16Regs:$d4), + (ins Int64Regs:$s), + "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", []>; + def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), + (ins Int32Regs:$s), + "mov.b32\t{{$d1, $d2}}, $s;", []>; + def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), + (ins Int64Regs:$s), + "mov.b64\t{{$d1, $d2}}, $s;", []>; + def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), + (ins Float64Regs:$s), + "mov.b64\t{{$d1, $d2}}, $s;", []>; +} // Count leading zeros -def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), - "clz.b32\t$d, $a;", []>; -def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "clz.b64\t$d, $a;", []>; +let hasSideEffects = 0 in { + def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), + "clz.b32\t$d, $a;", []>; + def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "clz.b64\t$d, $a;", []>; +} // 32-bit has a direct PTX instruction def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>; @@ -2572,10 +2588,12 @@ def : Pat<(ctlz_zero_undef Int16Regs:$a), CvtNONE), 16)>; // Population count -def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), - "popc.b32\t$d, $a;", []>; -def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "popc.b64\t$d, $a;", []>; +let hasSideEffects = 0 in { + def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), + "popc.b32\t$d, $a;", []>; + def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "popc.b64\t$d, $a;", []>; +} // 32-bit has a direct PTX instruction def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 1aec261..c491763 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1846,54 +1846,61 @@ def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, Requires<[noHWROT32]> ; -def GET_LO_INT64 - : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), - !strconcat("{{\n\t", - !strconcat(".reg .b32 %dummy;\n\t", - !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t", - !strconcat("}}", "")))), - []> ; - -def GET_HI_INT64 - : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), - !strconcat("{{\n\t", - !strconcat(".reg .b32 %dummy;\n\t", - !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t", - !strconcat("}}", "")))), - []> ; - -def PACK_TWO_INT32 - : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), - "mov.b64 \t$dst, {{$lo, $hi}};", []> ; +let hasSideEffects = 0 in { + def GET_LO_INT64 + : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), + !strconcat("{{\n\t", + !strconcat(".reg .b32 %dummy;\n\t", + !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t", + !strconcat("}}", "")))), + []> ; + + def GET_HI_INT64 + : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), + !strconcat("{{\n\t", + !strconcat(".reg .b32 %dummy;\n\t", + !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t", + !strconcat("}}", "")))), + []> ; +} + +let hasSideEffects = 0 in { + def PACK_TWO_INT32 + : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), + "mov.b64 \t$dst, {{$lo, $hi}};", []> ; +} def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), (GET_LO_INT64 Int64Regs:$src))> ; -// funnel shift, requires >= sm_32 -def SHF_L_WRAP_B32_IMM - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), - "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, - Requires<[hasHWROT32]>; +// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so +// no side effects. +let hasSideEffects = 0 in { + def SHF_L_WRAP_B32_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; -def SHF_L_WRAP_B32_REG - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), - "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, - Requires<[hasHWROT32]>; + def SHF_L_WRAP_B32_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; -def SHF_R_WRAP_B32_IMM - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), - "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, - Requires<[hasHWROT32]>; + def SHF_R_WRAP_B32_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), + "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; -def SHF_R_WRAP_B32_REG - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), - "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, - Requires<[hasHWROT32]>; + def SHF_R_WRAP_B32_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; +} // HW version of rotate 64 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), -- 2.7.4