From 7ab68fbd1d721e3ee2b9aa62240630ee1616e307 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Mon, 4 Feb 2013 15:52:56 +0000 Subject: [PATCH] Hexagon: Add V4 combine instructions and some more Def Pats for V2. llvm-svn: 174331 --- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 2 + llvm/lib/Target/Hexagon/HexagonInstrInfo.td | 61 +++++++++++++++++--- llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td | 80 +++++++++++++++++++++++++++ llvm/test/CodeGen/Hexagon/combine_ir.ll | 55 ++++++++++++++++++ llvm/test/CodeGen/Hexagon/struct_args.ll | 2 +- 5 files changed, 191 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/combine_ir.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 5a415eb..65dab85 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -52,6 +52,8 @@ namespace llvm { WrapperCP, WrapperCombineII, WrapperCombineRR, + WrapperCombineRI_V4, + WrapperCombineIR_V4, WrapperPackhl, WrapperSplatB, WrapperSplatH, diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonInstrInfo.td index 11c0167..6caab26 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.td @@ -2825,23 +2825,42 @@ def : Pat <(i32 (zext (i1 PredRegs:$src1))), // i1 -> i64 def : Pat <(i64 (zext (i1 PredRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>; + (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, + Requires<[NoV4T]>; // i32 -> i64 def : Pat <(i64 (zext (i32 IntRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>, + Requires<[NoV4T]>; // i8 -> i64 def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[NoV4T]>; // i16 -> i64 def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[NoV4T]>; // i32 -> i64 def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), (i32 (LDriw ADDRriS11_0:$src1))>; @@ -2862,15 +2881,41 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))), // Any extended 64-bit load. // anyext i32 -> i64 def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; + +// When there is an offset we should prefer the pattern below over the pattern above. +// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc) +// So this complexity below is comfortably higher to allow for choosing the below. +// If this is not done then we generate addresses such as +// ******************************************** +// r1 = add (r0, #4) +// r1 = memw(r1 + #0) +// instead of +// r1 = memw(r0 + #4) +// ******************************************** +let AddedComplexity = 100 in +def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[NoV4T]>; // anyext i16 -> i64. def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[NoV4T]>; // Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs). def : Pat<(i64 (zext (i32 IntRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>, + Requires<[NoV4T]>; // Multiply 64-bit unsigned and use upper result. def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td index 617eef4..08225e4 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -280,6 +280,19 @@ def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst), []>, Requires<[HasV4T]>; +def HexagonWrapperCombineRI_V4 : + SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>; +def HexagonWrapperCombineIR_V4 : + SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>; + +def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i), + (COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>, + Requires<[HasV4T]>; + +def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r), + (COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>, + Requires<[HasV4T]>; + let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6, neverHasSideEffects = 1, validSubTargets = HasV4SubT in def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst), @@ -1143,6 +1156,73 @@ def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global), u16ImmPred:$offset))), (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, Requires<[HasV4T]>; +// zext i1->i64 +def : Pat <(i64 (zext (i1 PredRegs:$src1))), + (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, + Requires<[HasV4T]>; + +// zext i32->i64 +def : Pat <(i64 (zext (i32 IntRegs:$src1))), + (i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>, + Requires<[HasV4T]>; +// zext i8->i64 +def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// zext i16->i64 +def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// anyext i16->i64 +def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// zext i32->i64 +def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 100 in +def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// anyext i32->i64 +def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 100 in +def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[HasV4T]>; + //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/Hexagon/combine_ir.ll b/llvm/test/CodeGen/Hexagon/combine_ir.ll new file mode 100644 index 0000000..921ce99 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/combine_ir.ll @@ -0,0 +1,55 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; CHECK: word +; CHECK: combine(#0 + +define void @word(i32* nocapture %a) nounwind { +entry: + %0 = load i32* %a, align 4, !tbaa !0 + %1 = zext i32 %0 to i64 + %add.ptr = getelementptr inbounds i32* %a, i32 1 + %2 = load i32* %add.ptr, align 4, !tbaa !0 + %3 = zext i32 %2 to i64 + %4 = shl nuw i64 %3, 32 + %ins = or i64 %4, %1 + tail call void @bar(i64 %ins) nounwind + ret void +} + +declare void @bar(i64) + +; CHECK: halfword +; CHECK: combine(#0 + +define void @halfword(i16* nocapture %a) nounwind { +entry: + %0 = load i16* %a, align 2, !tbaa !3 + %1 = zext i16 %0 to i64 + %add.ptr = getelementptr inbounds i16* %a, i32 1 + %2 = load i16* %add.ptr, align 2, !tbaa !3 + %3 = zext i16 %2 to i64 + %4 = shl nuw nsw i64 %3, 16 + %ins = or i64 %4, %1 + tail call void @bar(i64 %ins) nounwind + ret void +} + +; CHECK: byte +; CHECK: combine(#0 + +define void @byte(i8* nocapture %a) nounwind { +entry: + %0 = load i8* %a, align 1, !tbaa !1 + %1 = zext i8 %0 to i64 + %add.ptr = getelementptr inbounds i8* %a, i32 1 + %2 = load i8* %add.ptr, align 1, !tbaa !1 + %3 = zext i8 %2 to i64 + %4 = shl nuw nsw i64 %3, 8 + %ins = or i64 %4, %1 + tail call void @bar(i64 %ins) nounwind + ret void +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"short", metadata !1} diff --git a/llvm/test/CodeGen/Hexagon/struct_args.ll b/llvm/test/CodeGen/Hexagon/struct_args.ll index e488f33..f91300b 100644 --- a/llvm/test/CodeGen/Hexagon/struct_args.ll +++ b/llvm/test/CodeGen/Hexagon/struct_args.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s -; CHECK: r{{[0-9]}}:{{[0-9]}} = combine(r{{[0-9]}}, r{{[0-9]}}) +; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}}) ; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32) %struct.small = type { i32, i32 } -- 2.7.4