From 47ab1f2007aff817be4ced49884827ef10e12601 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 18 Mar 2015 16:23:44 +0000 Subject: [PATCH] [Hexagon] Intrinsics for circular and bit-reversed loads and stores llvm-svn: 232645 --- llvm/include/llvm/IR/IntrinsicsHexagon.td | 144 +++++++++++- .../Target/Hexagon/HexagonExpandPredSpillCode.cpp | 161 ++++++++++++- llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 200 ++++++++++++++++ llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 15 +- llvm/lib/Target/Hexagon/HexagonIntrinsics.td | 24 ++ llvm/test/CodeGen/Hexagon/brev_ld.ll | 140 +++++++++++ llvm/test/CodeGen/Hexagon/brev_st.ll | 112 +++++++++ llvm/test/CodeGen/Hexagon/circ_ld.ll | 135 +++++++++++ llvm/test/CodeGen/Hexagon/circ_ldd_bug.ll | 255 +++++++++++++++++++++ llvm/test/CodeGen/Hexagon/circ_ldw.ll | 18 ++ llvm/test/CodeGen/Hexagon/circ_st.ll | 108 +++++++++ 11 files changed, 1306 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/brev_ld.ll create mode 100644 llvm/test/CodeGen/Hexagon/brev_st.ll create mode 100644 llvm/test/CodeGen/Hexagon/circ_ld.ll create mode 100644 llvm/test/CodeGen/Hexagon/circ_ldd_bug.ll create mode 100644 llvm/test/CodeGen/Hexagon/circ_ldw.ll create mode 100644 llvm/test/CodeGen/Hexagon/circ_st.ll diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td index b566956..78ee651 100644 --- a/llvm/include/llvm/IR/IntrinsicsHexagon.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td @@ -422,12 +422,42 @@ class Hexagon_di_didisisi_Intrinsic llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +class Hexagon_mem_memmemsi_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_mem_memsisi_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_mem_memdisi_Intrinsic + : Hexagon_Intrinsic; + class Hexagon_mem_memmemsisi_Intrinsic : Hexagon_Intrinsic; +class Hexagon_mem_memsisisi_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_mem_memdisisi_Intrinsic + : Hexagon_Intrinsic; + // // Hexagon_sf_df_Intrinsic // @@ -606,20 +636,126 @@ class Hexagon_df_dfdfdfqi_Intrinsic [IntrNoMem, Throws]>; -// This one below will not be generated from iset.py. -// So make sure, you don't overwrite this one. +// This one below will not be auto-generated, +// so make sure, you don't overwrite this one. // // BUILTIN_INFO(SI_to_SXTHI_asrh,SI_ftype_SI,1) // def int_hexagon_SI_to_SXTHI_asrh : Hexagon_si_si_Intrinsic<"SI_to_SXTHI_asrh">; // +// BUILTIN_INFO_NONCONST(brev_ldd,PTR_ftype_PTRPTRSI,3) +// +def int_hexagon_brev_ldd : +Hexagon_mem_memmemsi_Intrinsic<"brev_ldd">; +// +// BUILTIN_INFO_NONCONST(brev_ldw,PTR_ftype_PTRPTRSI,3) +// +def int_hexagon_brev_ldw : +Hexagon_mem_memmemsi_Intrinsic<"brev_ldw">; +// +// BUILTIN_INFO_NONCONST(brev_ldh,PTR_ftype_PTRPTRSI,3) +// +def int_hexagon_brev_ldh : +Hexagon_mem_memmemsi_Intrinsic<"brev_ldh">; +// +// BUILTIN_INFO_NONCONST(brev_lduh,PTR_ftype_PTRPTRSI,3) +// +def int_hexagon_brev_lduh : +Hexagon_mem_memmemsi_Intrinsic<"brev_lduh">; +// +// BUILTIN_INFO_NONCONST(brev_ldb,PTR_ftype_PTRPTRSI,3) +// +def int_hexagon_brev_ldb : +Hexagon_mem_memmemsi_Intrinsic<"brev_ldb">; +// +// BUILTIN_INFO_NONCONST(brev_ldub,PTR_ftype_PTRPTRSI,3) +// +def int_hexagon_brev_ldub : +Hexagon_mem_memmemsi_Intrinsic<"brev_ldub">; +// // BUILTIN_INFO_NONCONST(circ_ldd,PTR_ftype_PTRPTRSISI,4) // def int_hexagon_circ_ldd : Hexagon_mem_memmemsisi_Intrinsic<"circ_ldd">; -// This one above will not be generated from iset.py. -// So make sure, you don't overwrite this one. +// +// BUILTIN_INFO_NONCONST(circ_ldw,PTR_ftype_PTRPTRSISI,4) +// +def int_hexagon_circ_ldw : +Hexagon_mem_memmemsisi_Intrinsic<"circ_ldw">; +// +// BUILTIN_INFO_NONCONST(circ_ldh,PTR_ftype_PTRPTRSISI,4) +// +def int_hexagon_circ_ldh : +Hexagon_mem_memmemsisi_Intrinsic<"circ_ldh">; +// +// BUILTIN_INFO_NONCONST(circ_lduh,PTR_ftype_PTRPTRSISI,4) +// +def int_hexagon_circ_lduh : +Hexagon_mem_memmemsisi_Intrinsic<"circ_lduh">; +// +// BUILTIN_INFO_NONCONST(circ_ldb,PTR_ftype_PTRPTRSISI,4) +// +def int_hexagon_circ_ldb : +Hexagon_mem_memmemsisi_Intrinsic<"circ_ldb">; +// +// BUILTIN_INFO_NONCONST(circ_ldub,PTR_ftype_PTRPTRSISI,4) +// +def int_hexagon_circ_ldub : +Hexagon_mem_memmemsisi_Intrinsic<"circ_ldub">; + +// +// BUILTIN_INFO_NONCONST(brev_stb,PTR_ftype_PTRSISI,3) +// +def int_hexagon_brev_stb : +Hexagon_mem_memsisi_Intrinsic<"brev_stb">; +// +// BUILTIN_INFO_NONCONST(brev_sthhi,PTR_ftype_PTRSISI,3) +// +def int_hexagon_brev_sthhi : +Hexagon_mem_memsisi_Intrinsic<"brev_sthhi">; +// +// BUILTIN_INFO_NONCONST(brev_sth,PTR_ftype_PTRSISI,3) +// +def int_hexagon_brev_sth : +Hexagon_mem_memsisi_Intrinsic<"brev_sth">; +// +// BUILTIN_INFO_NONCONST(brev_stw,PTR_ftype_PTRSISI,3) +// +def int_hexagon_brev_stw : +Hexagon_mem_memsisi_Intrinsic<"brev_stw">; +// +// BUILTIN_INFO_NONCONST(brev_std,PTR_ftype_PTRSISI,3) +// +def int_hexagon_brev_std : +Hexagon_mem_memdisi_Intrinsic<"brev_std">; +// +// BUILTIN_INFO_NONCONST(circ_std,PTR_ftype_PTRDISISI,4) +// +def int_hexagon_circ_std : +Hexagon_mem_memdisisi_Intrinsic<"circ_std">; +// +// BUILTIN_INFO_NONCONST(circ_stw,PTR_ftype_PTRSISISI,4) +// +def int_hexagon_circ_stw : +Hexagon_mem_memsisisi_Intrinsic<"circ_stw">; +// +// BUILTIN_INFO_NONCONST(circ_sth,PTR_ftype_PTRSISISI,4) +// +def int_hexagon_circ_sth : +Hexagon_mem_memsisisi_Intrinsic<"circ_sth">; +// +// BUILTIN_INFO_NONCONST(circ_sthhi,PTR_ftype_PTRSISISI,4) +// +def int_hexagon_circ_sthhi : +Hexagon_mem_memsisisi_Intrinsic<"circ_sthhi">; +// +// BUILTIN_INFO_NONCONST(circ_stb,PTR_ftype_PTRSISISI,4) +// +def int_hexagon_circ_stb : +Hexagon_mem_memsisisi_Intrinsic<"circ_stb">; + + // // BUILTIN_INFO(HEXAGON.C2_cmpeq,QI_ftype_SISI,2) // diff --git a/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index 8176598..40059fb 100644 --- a/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -79,7 +79,166 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { ++MII) { MachineInstr *MI = MII; int Opc = MI->getOpcode(); - if (Opc == Hexagon::STriw_pred) { + if (Opc == Hexagon::S2_storerb_pci_pseudo || + Opc == Hexagon::S2_storerh_pci_pseudo || + Opc == Hexagon::S2_storeri_pci_pseudo || + Opc == Hexagon::S2_storerd_pci_pseudo || + Opc == Hexagon::S2_storerf_pci_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::S2_storerd_pci_pseudo) + Opcode = Hexagon::S2_storerd_pci; + else if (Opc == Hexagon::S2_storeri_pci_pseudo) + Opcode = Hexagon::S2_storeri_pci; + else if (Opc == Hexagon::S2_storerh_pci_pseudo) + Opcode = Hexagon::S2_storerh_pci; + else if (Opc == Hexagon::S2_storerf_pci_pseudo) + Opcode = Hexagon::S2_storerf_pci; + else if (Opc == Hexagon::S2_storerb_pci_pseudo) + Opcode = Hexagon::S2_storerb_pci; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); // Modifier value. + MachineOperand &Op4 = MI->getOperand(4); + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op3); + // Replace the pseude circ_ldd by the real circ_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op0); + NewMI->addOperand(Op1); + NewMI->addOperand(Op4); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + NewMI->addOperand(Op2); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::L2_loadrd_pci_pseudo || + Opc == Hexagon::L2_loadri_pci_pseudo || + Opc == Hexagon::L2_loadrh_pci_pseudo || + Opc == Hexagon::L2_loadruh_pci_pseudo|| + Opc == Hexagon::L2_loadrb_pci_pseudo || + Opc == Hexagon::L2_loadrub_pci_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::L2_loadrd_pci_pseudo) + Opcode = Hexagon::L2_loadrd_pci; + else if (Opc == Hexagon::L2_loadri_pci_pseudo) + Opcode = Hexagon::L2_loadri_pci; + else if (Opc == Hexagon::L2_loadrh_pci_pseudo) + Opcode = Hexagon::L2_loadrh_pci; + else if (Opc == Hexagon::L2_loadruh_pci_pseudo) + Opcode = Hexagon::L2_loadruh_pci; + else if (Opc == Hexagon::L2_loadrb_pci_pseudo) + Opcode = Hexagon::L2_loadrb_pci; + else if (Opc == Hexagon::L2_loadrub_pci_pseudo) + Opcode = Hexagon::L2_loadrub_pci; + else + llvm_unreachable("wrong Opc"); + + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op4 = MI->getOperand(4); // Modifier value. + MachineOperand &Op5 = MI->getOperand(5); + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op4); + // Replace the pseude circ_ldd by the real circ_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op1); + NewMI->addOperand(Op0); + NewMI->addOperand(Op2); + NewMI->addOperand(Op5); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo || + Opc == Hexagon::L2_loadri_pbr_pseudo || + Opc == Hexagon::L2_loadrh_pbr_pseudo || + Opc == Hexagon::L2_loadruh_pbr_pseudo|| + Opc == Hexagon::L2_loadrb_pbr_pseudo || + Opc == Hexagon::L2_loadrub_pbr_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::L2_loadrd_pbr_pseudo) + Opcode = Hexagon::L2_loadrd_pbr; + else if (Opc == Hexagon::L2_loadri_pbr_pseudo) + Opcode = Hexagon::L2_loadri_pbr; + else if (Opc == Hexagon::L2_loadrh_pbr_pseudo) + Opcode = Hexagon::L2_loadrh_pbr; + else if (Opc == Hexagon::L2_loadruh_pbr_pseudo) + Opcode = Hexagon::L2_loadruh_pbr; + else if (Opc == Hexagon::L2_loadrb_pbr_pseudo) + Opcode = Hexagon::L2_loadrb_pbr; + else if (Opc == Hexagon::L2_loadrub_pbr_pseudo) + Opcode = Hexagon::L2_loadrub_pbr; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op4 = MI->getOperand(4); // Modifier value. + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op4); + // Replace the pseudo brev_ldd by the real brev_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op1); + NewMI->addOperand(Op0); + NewMI->addOperand(Op2); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::S2_storerd_pbr_pseudo || + Opc == Hexagon::S2_storeri_pbr_pseudo || + Opc == Hexagon::S2_storerh_pbr_pseudo || + Opc == Hexagon::S2_storerb_pbr_pseudo || + Opc == Hexagon::S2_storerf_pbr_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::S2_storerd_pbr_pseudo) + Opcode = Hexagon::S2_storerd_pbr; + else if (Opc == Hexagon::S2_storeri_pbr_pseudo) + Opcode = Hexagon::S2_storeri_pbr; + else if (Opc == Hexagon::S2_storerh_pbr_pseudo) + Opcode = Hexagon::S2_storerh_pbr; + else if (Opc == Hexagon::S2_storerf_pbr_pseudo) + Opcode = Hexagon::S2_storerf_pbr; + else if (Opc == Hexagon::S2_storerb_pbr_pseudo) + Opcode = Hexagon::S2_storerb_pbr; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); // Modifier value. + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op3); + // Replace the pseudo brev_ldd by the real brev_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op0); + NewMI->addOperand(Op1); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + NewMI->addOperand(Op2); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::STriw_pred) { // STriw_pred [R30], ofst, SrcReg; unsigned FP = MI->getOperand(0).getReg(); assert(FP == QST.getRegisterInfo()->getFrameRegister() && diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 35600e1..6e8d431 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -748,6 +748,203 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { } // +// Checking for intrinsics circular load/store, and bitreverse load/store +// instrisics in order to select the correct lowered operation. +// +SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { + unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); + if (IntNo == Intrinsic::hexagon_circ_ldd || + IntNo == Intrinsic::hexagon_circ_ldw || + IntNo == Intrinsic::hexagon_circ_lduh || + IntNo == Intrinsic::hexagon_circ_ldh || + IntNo == Intrinsic::hexagon_circ_ldub || + IntNo == Intrinsic::hexagon_circ_ldb) { + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Load = N->getOperand(3); + SDValue ModifierExpr = N->getOperand(4); + SDValue Offset = N->getOperand(5); + + // We need to add the rerurn type for the load. This intrinsic has + // two return types, one for the load and one for the post-increment. + // Only the *_ld instructions push the extra return type, and bump the + // result node operand number correspondingly. + std::vector ResTys; + unsigned opc; + unsigned memsize, align; + MVT MvtSize = MVT::i32; + + if (IntNo == Intrinsic::hexagon_circ_ldd) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i64); + opc = Hexagon::L2_loadrd_pci_pseudo; + memsize = 8; + align = 8; + } else if (IntNo == Intrinsic::hexagon_circ_ldw) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadri_pci_pseudo; + memsize = 4; + align = 4; + } else if (IntNo == Intrinsic::hexagon_circ_ldh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrh_pci_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_circ_lduh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadruh_pci_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_circ_ldb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrb_pci_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else if (IntNo == Intrinsic::hexagon_circ_ldub) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrub_pci_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else + llvm_unreachable("no opc"); + + ResTys.push_back(MVT::Other); + + // Copy over the arguments, which are the same mostly. + SmallVector Ops; + Ops.push_back(Base); + Ops.push_back(Load); + Ops.push_back(ModifierExpr); + int32_t Val = cast(Offset.getNode())->getSExtValue(); + Ops.push_back(CurDAG->getTargetConstant(Val, MVT::i32)); + Ops.push_back(Chain); + SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); + + SDValue ST; + MachineMemOperand *Mem = + MF->getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, memsize, align); + if (MvtSize != MVT::i32) + ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, + MvtSize, Mem); + else + ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); + + SDNode* Store = SelectStore(ST.getNode()); + + const SDValue Froms[] = { SDValue(N, 0), + SDValue(N, 1) }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Store, 0) }; + ReplaceUses(Froms, Tos, 2); + return Result; + } + + if (IntNo == Intrinsic::hexagon_brev_ldd || + IntNo == Intrinsic::hexagon_brev_ldw || + IntNo == Intrinsic::hexagon_brev_ldh || + IntNo == Intrinsic::hexagon_brev_lduh || + IntNo == Intrinsic::hexagon_brev_ldb || + IntNo == Intrinsic::hexagon_brev_ldub) { + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Load = N->getOperand(3); + SDValue ModifierExpr = N->getOperand(4); + + // We need to add the rerurn type for the load. This intrinsic has + // two return types, one for the load and one for the post-increment. + std::vector ResTys; + unsigned opc; + unsigned memsize, align; + MVT MvtSize = MVT::i32; + + if (IntNo == Intrinsic::hexagon_brev_ldd) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i64); + opc = Hexagon::L2_loadrd_pbr_pseudo; + memsize = 8; + align = 8; + } else if (IntNo == Intrinsic::hexagon_brev_ldw) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadri_pbr_pseudo; + memsize = 4; + align = 4; + } else if (IntNo == Intrinsic::hexagon_brev_ldh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrh_pbr_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_brev_lduh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadruh_pbr_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_brev_ldb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrb_pbr_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else if (IntNo == Intrinsic::hexagon_brev_ldub) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrub_pbr_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else + llvm_unreachable("no opc"); + + ResTys.push_back(MVT::Other); + + // Copy over the arguments, which are the same mostly. + SmallVector Ops; + Ops.push_back(Base); + Ops.push_back(Load); + Ops.push_back(ModifierExpr); + Ops.push_back(Chain); + SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); + SDValue ST; + MachineMemOperand *Mem = + MF->getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, memsize, align); + if (MvtSize != MVT::i32) + ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, + MvtSize, Mem); + else + ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); + + SDNode* Store = SelectStore(ST.getNode()); + + const SDValue Froms[] = { SDValue(N, 0), + SDValue(N, 1) }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Store, 0) }; + ReplaceUses(Froms, Tos, 2); + return Result; + } + + return SelectCode(N); +} + +// // Checking for intrinsics which have predicate registers as operand(s) // and lowering to the actual intrinsic. // @@ -1055,6 +1252,9 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { case ISD::ZERO_EXTEND: return SelectZeroExtend(N); + case ISD::INTRINSIC_W_CHAIN: + return SelectIntrinsicWChain(N); + case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N); } diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 92ed968..58bc287 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1550,7 +1550,6 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { switch (MI->getOpcode()) { default: llvm_unreachable("Unknown .new type"); - // store new value byte case Hexagon::S4_storerb_ur: return Hexagon::S4_storerbnew_ur; @@ -1560,6 +1559,20 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { case Hexagon::S4_storeri_ur: return Hexagon::S4_storerinew_ur; + case Hexagon::S2_storerb_pci: + return Hexagon::S2_storerb_pci; + + case Hexagon::S2_storeri_pci: + return Hexagon::S2_storeri_pci; + + case Hexagon::S2_storerh_pci: + return Hexagon::S2_storerh_pci; + + case Hexagon::S2_storerd_pci: + return Hexagon::S2_storerd_pci; + + case Hexagon::S2_storerf_pci: + return Hexagon::S2_storerf_pci; } return 0; } diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td index 5e7cfe0..4275230 100644 --- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td @@ -1257,6 +1257,30 @@ def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))), def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))), (i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>; +/******************************************************************** +* ST +*********************************************************************/ + +class T_stb_pat + : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru), + (MI I32:$Rs, Val:$Rt, I32:$Ru)>; + +def : T_stb_pat ; +def : T_stb_pat ; +def : T_stb_pat ; +def : T_stb_pat ; +def : T_stb_pat ; + +class T_stc_pat + : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s), + (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>; + +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; + include "HexagonIntrinsicsV3.td" include "HexagonIntrinsicsV4.td" include "HexagonIntrinsicsV5.td" diff --git a/llvm/test/CodeGen/Hexagon/brev_ld.ll b/llvm/test/CodeGen/Hexagon/brev_ld.ll new file mode 100644 index 0000000..12edb4c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/brev_ld.ll @@ -0,0 +1,140 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -verify-machineinstrs=true < %s | FileCheck %s +; Testing bitreverse load intrinsics: +; Q6_bitrev_load_update_D(inputLR, pDelay, nConvLength); +; Q6_bitrev_load_update_W(inputLR, pDelay, nConvLength); +; Q6_bitrev_load_update_H(inputLR, pDelay, nConvLength); +; Q6_bitrev_load_update_UH(inputLR, pDelay, nConvLength); +; Q6_bitrev_load_update_UB(inputLR, pDelay, nConvLength); +; Q6_bitrev_load_update_B(inputLR, pDelay, nConvLength); +; producing these instructions: +; r3:2 = memd(r0++m0:brev) +; r1 = memw(r0++m0:brev) +; r1 = memh(r0++m0:brev) +; r1 = memuh(r0++m0:brev) +; r1 = memub(r0++m0:brev) +; r1 = memb(r0++m0:brev) + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" +target triple = "hexagon" + +define i64 @foo(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i64, align 8 + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %1 = bitcast i64* %inputLR to i8* + %sub = sub i32 13, %shr1 + %shl = shl i32 1, %sub +; CHECK: memd(r{{[0-9]*}} ++ m{{[0-1]}}:brev) + %2 = call i8* @llvm.hexagon.brev.ldd(i8* %0, i8* %1, i32 %shl) + %3 = bitcast i8* %2 to i64* + %4 = load i64, i64* %3, align 8, !tbaa !0 + ret i64 %4 +} + +declare i8* @llvm.hexagon.brev.ldd(i8*, i8*, i32) nounwind + +define i32 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i32, align 4 + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %1 = bitcast i32* %inputLR to i8* + %sub = sub i32 14, %shr1 + %shl = shl i32 1, %sub +; CHECK: memw(r{{[0-9]*}} ++ m{{[0-1]}}:brev) + %2 = call i8* @llvm.hexagon.brev.ldw(i8* %0, i8* %1, i32 %shl) + %3 = bitcast i8* %2 to i32* + %4 = load i32, i32* %3, align 4, !tbaa !2 + ret i32 %4 +} + +declare i8* @llvm.hexagon.brev.ldw(i8*, i8*, i32) nounwind + +define signext i16 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i16, align 2 + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %1 = bitcast i16* %inputLR to i8* + %sub = sub i32 15, %shr1 + %shl = shl i32 1, %sub +; CHECK: memh(r{{[0-9]*}} ++ m0:brev) + %2 = call i8* @llvm.hexagon.brev.ldh(i8* %0, i8* %1, i32 %shl) + %3 = bitcast i8* %2 to i16* + %4 = load i16, i16* %3, align 2, !tbaa !3 + ret i16 %4 +} + +declare i8* @llvm.hexagon.brev.ldh(i8*, i8*, i32) nounwind + +define zeroext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i16, align 2 + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %1 = bitcast i16* %inputLR to i8* + %sub = sub i32 15, %shr1 + %shl = shl i32 1, %sub +; CHECK: memuh(r{{[0-9]*}} ++ m0:brev) + %2 = call i8* @llvm.hexagon.brev.lduh(i8* %0, i8* %1, i32 %shl) + %3 = bitcast i8* %2 to i16* + %4 = load i16, i16* %3, align 2, !tbaa !3 + ret i16 %4 +} + +declare i8* @llvm.hexagon.brev.lduh(i8*, i8*, i32) nounwind + +define zeroext i8 @foo4(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i8, align 1 + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %sub = sub nsw i32 16, %shr1 + %shl = shl i32 1, %sub +; CHECK: memub(r{{[0-9]*}} ++ m{{[0-1]}}:brev) + %1 = call i8* @llvm.hexagon.brev.ldub(i8* %0, i8* %inputLR, i32 %shl) + %2 = load i8, i8* %1, align 1, !tbaa !0 + ret i8 %2 +} + +declare i8* @llvm.hexagon.brev.ldub(i8*, i8*, i32) nounwind + +define zeroext i8 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i8, align 1 + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %sub = sub nsw i32 16, %shr1 + %shl = shl i32 1, %sub +; CHECK: memb(r{{[0-9]*}} ++ m{{[0-1]}}:brev) + %1 = call i8* @llvm.hexagon.brev.ldb(i8* %0, i8* %inputLR, i32 %shl) + %2 = load i8, i8* %1, align 1, !tbaa !0 + ret i8 %2 +} + +declare i8* @llvm.hexagon.brev.ldb(i8*, i8*, i32) nounwind + +!0 = !{!"omnipotent char", !1} +!1 = !{!"Simple C/C++ TBAA"} +!2 = !{!"int", !0} +!3 = !{!"short", !0} diff --git a/llvm/test/CodeGen/Hexagon/brev_st.ll b/llvm/test/CodeGen/Hexagon/brev_st.ll new file mode 100644 index 0000000..b805791 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/brev_st.ll @@ -0,0 +1,112 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -verify-machineinstrs=true < %s | FileCheck %s +; Test these 5 bitreverse store intrinsics: +; Q6_bitrev_store_update_D(inputLR, pDelay, nConvLength); +; Q6_bitrev_store_update_W(inputLR, pDelay, nConvLength); +; Q6_bitrev_store_update_HL(inputLR, pDelay, nConvLength); +; Q6_bitrev_store_update_HH(inputLR, pDelay, nConvLength); +; Q6_bitrev_store_update_B(inputLR, pDelay, nConvLength); +; producing these instructions: +; memd(r0++m0:brev) = r1:0 +; memw(r0++m0:brev) = r0 +; memh(r0++m0:brev) = r3 +; memh(r0++m0:brev) = r3.h +; memb(r0++m0:brev) = r3 + +; ModuleID = 'brev_st.i' +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" +target triple = "hexagon" + +define i64 @foo(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %conv = zext i16 %filtMemLen to i32 + %shr2 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %sub = sub i32 13, %shr2 + %shl = shl i32 1, %sub +; CHECK: memd(r{{[0-9]*}} ++ m{{[0-1]}}:brev) + %1 = tail call i8* @llvm.hexagon.brev.std(i8* %0, i64 undef, i32 %shl) + %2 = bitcast i8* %1 to i64* + %3 = load i64, i64* %2, align 8, !tbaa !0 + ret i64 %3 +} + +declare i8* @llvm.hexagon.brev.std(i8*, i64, i32) nounwind + +define i32 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %sub = sub i32 14, %shr1 + %shl = shl i32 1, %sub +; CHECK: memw(r{{[0-9]*}} ++ m{{[0-1]}}:brev) + %1 = tail call i8* @llvm.hexagon.brev.stw(i8* %0, i32 undef, i32 %shl) + %2 = bitcast i8* %1 to i32* + %3 = load i32, i32* %2, align 4, !tbaa !2 + ret i32 %3 +} + +declare i8* @llvm.hexagon.brev.stw(i8*, i32, i32) nounwind + +define signext i16 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %conv = zext i16 %filtMemLen to i32 + %shr2 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %sub = sub i32 15, %shr2 + %shl = shl i32 1, %sub +; CHECK: memh(r{{[0-9]*}} ++ m{{[0-1]}}:brev) + %1 = tail call i8* @llvm.hexagon.brev.sth(i8* %0, i32 0, i32 %shl) + %2 = bitcast i8* %1 to i16* + %3 = load i16, i16* %2, align 2, !tbaa !3 + ret i16 %3 +} + +declare i8* @llvm.hexagon.brev.sth(i8*, i32, i32) nounwind + +define signext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %conv = zext i16 %filtMemLen to i32 + %shr2 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %sub = sub i32 15, %shr2 + %shl = shl i32 1, %sub +; CHECK: memh(r{{[0-9]*}} ++ m{{[0-1]}}:brev){{ *}}={{ *}}r{{[0-9]*}}.h + %1 = tail call i8* @llvm.hexagon.brev.sthhi(i8* %0, i32 0, i32 %shl) + %2 = bitcast i8* %1 to i16* + %3 = load i16, i16* %2, align 2, !tbaa !3 + ret i16 %3 +} + +declare i8* @llvm.hexagon.brev.sthhi(i8*, i32, i32) nounwind + +define zeroext i8 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %conv = zext i16 %filtMemLen to i32 + %shr2 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %sub = sub nsw i32 16, %shr2 + ; CHECK: memb(r{{[0-9]*}} ++ m{{[0-1]}}:brev) + %shl = shl i32 1, %sub + %1 = tail call i8* @llvm.hexagon.brev.stb(i8* %0, i32 0, i32 %shl) + %2 = load i8, i8* %1, align 1, !tbaa !0 + ret i8 %2 +} + +declare i8* @llvm.hexagon.brev.stb(i8*, i32, i32) nounwind + +!0 = !{!"omnipotent char", !1} +!1 = !{!"Simple C/C++ TBAA"} +!2 = !{!"int", !0} +!3 = !{!"short", !0} diff --git a/llvm/test/CodeGen/Hexagon/circ_ld.ll b/llvm/test/CodeGen/Hexagon/circ_ld.ll new file mode 100644 index 0000000..6d37240 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/circ_ld.ll @@ -0,0 +1,135 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; Testing for these 6 variants of circular load: +; Q6_circ_load_update_B(inputLR, pDelay, -1, nConvLength, 4); +; Q6_circ_load_update_D(inputLR, pDelay, -1, nConvLength, 4); +; Q6_circ_load_update_H(inputLR, pDelay, -1, nConvLength, 4); +; Q6_circ_load_update_UB(inputLR, pDelay, -1, nConvLength, 4); +; Q6_circ_load_update_UH(inputLR, pDelay, -1, nConvLength, 4); +; Q6_circ_load_update_W(inputLR, pDelay, -1, nConvLength, 4); +; producing these: +; r0 = memb(r1++#-1:circ(m0)) +; r3:2 = memd(r1++#-8:circ(m0)) +; r0 = memh(r1++#-2:circ(m0)) +; r0 = memub(r1++#-1:circ(m0)) +; r0 = memuh(r1++#-2:circ(m0)) +; r0 = memw(r1++#-4:circ(m0)) + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" +target triple = "hexagon" + +define zeroext i8 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i8, align 1 + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %or = or i32 %shr1, 33554432 +; CHECK: memb(r{{[0-9]*.}}++{{.}}#-1:circ(m{{[0-1]}})) + %1 = call i8* @llvm.hexagon.circ.ldb(i8* %0, i8* %inputLR, i32 %or, i32 -1) + %2 = load i8, i8* %1, align 1, !tbaa !0 + ret i8 %2 +} + +declare i8* @llvm.hexagon.circ.ldb(i8*, i8*, i32, i32) nounwind + +define i64 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i64, align 8 + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %1 = bitcast i64* %inputLR to i8* + %shl = shl nuw nsw i32 %shr1, 3 + %or = or i32 %shl, 83886080 +; CHECK: memd(r{{[0-9]*.}}++{{.}}#-8:circ(m{{[0-1]}})) + %2 = call i8* @llvm.hexagon.circ.ldd(i8* %0, i8* %1, i32 %or, i32 -8) + %3 = bitcast i8* %2 to i64* + %4 = load i64, i64* %3, align 8, !tbaa !0 + ret i64 %4 +} + +declare i8* @llvm.hexagon.circ.ldd(i8*, i8*, i32, i32) nounwind + +define signext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i16, align 2 + %conv = zext i16 %filtMemLen to i32 + %shr1 = and i32 %conv, 65534 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %1 = bitcast i16* %inputLR to i8* + %or = or i32 %shr1, 50331648 +; CHECK: memh(r{{[0-9]*.}}++{{.}}#-2:circ(m{{[0-1]}})) + %2 = call i8* @llvm.hexagon.circ.ldh(i8* %0, i8* %1, i32 %or, i32 -2) + %3 = bitcast i8* %2 to i16* + %4 = load i16, i16* %3, align 2, !tbaa !2 + ret i16 %4 +} + +declare i8* @llvm.hexagon.circ.ldh(i8*, i8*, i32, i32) nounwind + +define zeroext i8 @foo4(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i8, align 1 + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %or = or i32 %shr1, 33554432 +; CHECK: memub(r{{[0-9]*.}}++{{.}}#-1:circ(m{{[0-1]}})) + %1 = call i8* @llvm.hexagon.circ.ldub(i8* %0, i8* %inputLR, i32 %or, i32 -1) + %2 = load i8, i8* %1, align 1, !tbaa !0 + ret i8 %2 +} + +declare i8* @llvm.hexagon.circ.ldub(i8*, i8*, i32, i32) nounwind + +define zeroext i16 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i16, align 2 + %conv = zext i16 %filtMemLen to i32 + %shr1 = and i32 %conv, 65534 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %1 = bitcast i16* %inputLR to i8* + %or = or i32 %shr1, 50331648 +; CHECK: memuh(r{{[0-9]*.}}++{{.}}#-2:circ(m{{[0-1]}})) + %2 = call i8* @llvm.hexagon.circ.lduh(i8* %0, i8* %1, i32 %or, i32 -2) + %3 = bitcast i8* %2 to i16* + %4 = load i16, i16* %3, align 2, !tbaa !2 + ret i16 %4 +} + +declare i8* @llvm.hexagon.circ.lduh(i8*, i8*, i32, i32) nounwind + +define i32 @foo6(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %inputLR = alloca i32, align 4 + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %1 = bitcast i32* %inputLR to i8* + %shl = shl nuw nsw i32 %shr1, 2 + %or = or i32 %shl, 67108864 +; CHECK: memw(r{{[0-9]*.}}++{{.}}#-4:circ(m{{[0-1]}})) + %2 = call i8* @llvm.hexagon.circ.ldw(i8* %0, i8* %1, i32 %or, i32 -4) + %3 = bitcast i8* %2 to i32* + %4 = load i32, i32* %3, align 4, !tbaa !3 + ret i32 %4 +} + +declare i8* @llvm.hexagon.circ.ldw(i8*, i8*, i32, i32) nounwind + +!0 = !{!"omnipotent char", !1} +!1 = !{!"Simple C/C++ TBAA"} +!2 = !{!"short", !0} +!3 = !{!"int", !0} diff --git a/llvm/test/CodeGen/Hexagon/circ_ldd_bug.ll b/llvm/test/CodeGen/Hexagon/circ_ldd_bug.ll new file mode 100644 index 0000000..d15b5c9 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/circ_ldd_bug.ll @@ -0,0 +1,255 @@ +; RUN: llc -O2 < %s +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" +target triple = "hexagon" + +; We would fail on this file with: +; Unimplemented +; UNREACHABLE executed at llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp:615! +; This happened because after unrolling a loop with a ldd_circ instruction we +; would have several TFCR and ldd_circ instruction sequences. +; %vreg0 (CRRegs) = TFCR %vreg0 (IntRegs) +; = ldd_circ( , , vreg0) +; %vreg1 (CRRegs) = TFCR %vreg1 (IntRegs) +; = ldd_circ( , , vreg0) +; The scheduler would move the CRRegs to the top of the loop. The allocator +; would try to spill the CRRegs after running out of them. We don't have code to +; spill CRRegs and the above assertion would be triggered. +declare i8* @llvm.hexagon.circ.ldd(i8*, i8*, i32, i32) nounwind + +define i32 @test(i16 zeroext %var0, i16* %var1, i16 signext %var2, i16* nocapture %var3) nounwind { +entry: + %var4 = alloca i64, align 8 + %conv = zext i16 %var0 to i32 + %shr5 = lshr i32 %conv, 1 + %idxprom = sext i16 %var2 to i32 + %arrayidx = getelementptr inbounds i16, i16* %var1, i32 %idxprom + %0 = bitcast i16* %var3 to i64* + %1 = load i64, i64* %0, align 8, !tbaa !1 + %2 = bitcast i16* %arrayidx to i8* + %3 = bitcast i64* %var4 to i8* + %shl = shl nuw nsw i32 %shr5, 3 + %or = or i32 %shl, 83886080 + %4 = call i8* @llvm.hexagon.circ.ldd(i8* %2, i8* %3, i32 %or, i32 -8) + %sub = add nsw i32 %shr5, -1 + %cmp6 = icmp sgt i32 %sub, 0 + %5 = load i64, i64* %var4, align 8, !tbaa !1 + %6 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 0, i64 %1, i64 %5) + br i1 %cmp6, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %incdec.ptr = getelementptr inbounds i16, i16* %var3, i32 4 + %7 = bitcast i16* %incdec.ptr to i64* + %8 = zext i16 %var0 to i32 + %9 = lshr i32 %8, 1 + %10 = add i32 %9, -1 + %xtraiter = urem i32 %10, 8 + %lcmp = icmp ne i32 %xtraiter, 0 + br i1 %lcmp, label %unr.cmp60, label %for.body.lr.ph.split.split + +unr.cmp60: ; preds = %for.body.lr.ph + %un.tmp61 = icmp eq i32 %xtraiter, 1 + br i1 %un.tmp61, label %for.body.unr53, label %unr.cmp51 + +unr.cmp51: ; preds = %unr.cmp60 + %un.tmp52 = icmp eq i32 %xtraiter, 2 + br i1 %un.tmp52, label %for.body.unr44, label %unr.cmp42 + +unr.cmp42: ; preds = %unr.cmp51 + %un.tmp43 = icmp eq i32 %xtraiter, 3 + br i1 %un.tmp43, label %for.body.unr35, label %unr.cmp33 + +unr.cmp33: ; preds = %unr.cmp42 + %un.tmp34 = icmp eq i32 %xtraiter, 4 + br i1 %un.tmp34, label %for.body.unr26, label %unr.cmp24 + +unr.cmp24: ; preds = %unr.cmp33 + %un.tmp25 = icmp eq i32 %xtraiter, 5 + br i1 %un.tmp25, label %for.body.unr17, label %unr.cmp + +unr.cmp: ; preds = %unr.cmp24 + %un.tmp = icmp eq i32 %xtraiter, 6 + br i1 %un.tmp, label %for.body.unr13, label %for.body.unr + +for.body.unr: ; preds = %unr.cmp + %11 = call i8* @llvm.hexagon.circ.ldd(i8* %4, i8* %3, i32 %or, i32 -8) + %12 = load i64, i64* %7, align 8, !tbaa !1 + %inc.unr = add nsw i32 0, 1 + %incdec.ptr4.unr = getelementptr inbounds i64, i64* %7, i32 1 + %cmp.unr = icmp slt i32 %inc.unr, %sub + %13 = load i64, i64* %var4, align 8, !tbaa !1 + %14 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %6, i64 %12, i64 %13) + br label %for.body.unr13 + +for.body.unr13: ; preds = %for.body.unr, %unr.cmp + %15 = phi i64 [ %6, %unr.cmp ], [ %14, %for.body.unr ] + %pvar6.09.unr = phi i64* [ %7, %unr.cmp ], [ %incdec.ptr4.unr, %for.body.unr ] + %var8.0.in8.unr = phi i8* [ %4, %unr.cmp ], [ %11, %for.body.unr ] + %i.07.unr = phi i32 [ 0, %unr.cmp ], [ %inc.unr, %for.body.unr ] + %16 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr, i8* %3, i32 %or, i32 -8) + %17 = load i64, i64* %pvar6.09.unr, align 8, !tbaa !1 + %inc.unr14 = add nsw i32 %i.07.unr, 1 + %incdec.ptr4.unr15 = getelementptr inbounds i64, i64* %pvar6.09.unr, i32 1 + %cmp.unr16 = icmp slt i32 %inc.unr14, %sub + %18 = load i64, i64* %var4, align 8, !tbaa !1 + %19 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %15, i64 %17, i64 %18) + br label %for.body.unr17 + +for.body.unr17: ; preds = %for.body.unr13, %unr.cmp24 + %20 = phi i64 [ %6, %unr.cmp24 ], [ %19, %for.body.unr13 ] + %pvar6.09.unr18 = phi i64* [ %7, %unr.cmp24 ], [ %incdec.ptr4.unr15, %for.body.unr13 ] + %var8.0.in8.unr19 = phi i8* [ %4, %unr.cmp24 ], [ %16, %for.body.unr13 ] + %i.07.unr20 = phi i32 [ 0, %unr.cmp24 ], [ %inc.unr14, %for.body.unr13 ] + %21 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr19, i8* %3, i32 %or, i32 -8) + %22 = load i64, i64* %pvar6.09.unr18, align 8, !tbaa !1 + %inc.unr21 = add nsw i32 %i.07.unr20, 1 + %incdec.ptr4.unr22 = getelementptr inbounds i64, i64* %pvar6.09.unr18, i32 1 + %cmp.unr23 = icmp slt i32 %inc.unr21, %sub + %23 = load i64, i64* %var4, align 8, !tbaa !1 + %24 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %20, i64 %22, i64 %23) + br label %for.body.unr26 + +for.body.unr26: ; preds = %for.body.unr17, %unr.cmp33 + %25 = phi i64 [ %6, %unr.cmp33 ], [ %24, %for.body.unr17 ] + %pvar6.09.unr27 = phi i64* [ %7, %unr.cmp33 ], [ %incdec.ptr4.unr22, %for.body.unr17 ] + %var8.0.in8.unr28 = phi i8* [ %4, %unr.cmp33 ], [ %21, %for.body.unr17 ] + %i.07.unr29 = phi i32 [ 0, %unr.cmp33 ], [ %inc.unr21, %for.body.unr17 ] + %26 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr28, i8* %3, i32 %or, i32 -8) + %27 = load i64, i64* %pvar6.09.unr27, align 8, !tbaa !1 + %inc.unr30 = add nsw i32 %i.07.unr29, 1 + %incdec.ptr4.unr31 = getelementptr inbounds i64, i64* %pvar6.09.unr27, i32 1 + %cmp.unr32 = icmp slt i32 %inc.unr30, %sub + %28 = load i64, i64* %var4, align 8, !tbaa !1 + %29 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %25, i64 %27, i64 %28) + br label %for.body.unr35 + +for.body.unr35: ; preds = %for.body.unr26, %unr.cmp42 + %30 = phi i64 [ %6, %unr.cmp42 ], [ %29, %for.body.unr26 ] + %pvar6.09.unr36 = phi i64* [ %7, %unr.cmp42 ], [ %incdec.ptr4.unr31, %for.body.unr26 ] + %var8.0.in8.unr37 = phi i8* [ %4, %unr.cmp42 ], [ %26, %for.body.unr26 ] + %i.07.unr38 = phi i32 [ 0, %unr.cmp42 ], [ %inc.unr30, %for.body.unr26 ] + %31 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr37, i8* %3, i32 %or, i32 -8) + %32 = load i64, i64* %pvar6.09.unr36, align 8, !tbaa !1 + %inc.unr39 = add nsw i32 %i.07.unr38, 1 + %incdec.ptr4.unr40 = getelementptr inbounds i64, i64* %pvar6.09.unr36, i32 1 + %cmp.unr41 = icmp slt i32 %inc.unr39, %sub + %33 = load i64, i64* %var4, align 8, !tbaa !1 + %34 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %30, i64 %32, i64 %33) + br label %for.body.unr44 + +for.body.unr44: ; preds = %for.body.unr35, %unr.cmp51 + %35 = phi i64 [ %6, %unr.cmp51 ], [ %34, %for.body.unr35 ] + %pvar6.09.unr45 = phi i64* [ %7, %unr.cmp51 ], [ %incdec.ptr4.unr40, %for.body.unr35 ] + %var8.0.in8.unr46 = phi i8* [ %4, %unr.cmp51 ], [ %31, %for.body.unr35 ] + %i.07.unr47 = phi i32 [ 0, %unr.cmp51 ], [ %inc.unr39, %for.body.unr35 ] + %36 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr46, i8* %3, i32 %or, i32 -8) + %37 = load i64, i64* %pvar6.09.unr45, align 8, !tbaa !1 + %inc.unr48 = add nsw i32 %i.07.unr47, 1 + %incdec.ptr4.unr49 = getelementptr inbounds i64, i64* %pvar6.09.unr45, i32 1 + %cmp.unr50 = icmp slt i32 %inc.unr48, %sub + %38 = load i64, i64* %var4, align 8, !tbaa !1 + %39 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %35, i64 %37, i64 %38) + br label %for.body.unr53 + +for.body.unr53: ; preds = %for.body.unr44, %unr.cmp60 + %40 = phi i64 [ %6, %unr.cmp60 ], [ %39, %for.body.unr44 ] + %pvar6.09.unr54 = phi i64* [ %7, %unr.cmp60 ], [ %incdec.ptr4.unr49, %for.body.unr44 ] + %var8.0.in8.unr55 = phi i8* [ %4, %unr.cmp60 ], [ %36, %for.body.unr44 ] + %i.07.unr56 = phi i32 [ 0, %unr.cmp60 ], [ %inc.unr48, %for.body.unr44 ] + %41 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr55, i8* %3, i32 %or, i32 -8) + %42 = load i64, i64* %pvar6.09.unr54, align 8, !tbaa !1 + %inc.unr57 = add nsw i32 %i.07.unr56, 1 + %incdec.ptr4.unr58 = getelementptr inbounds i64, i64* %pvar6.09.unr54, i32 1 + %cmp.unr59 = icmp slt i32 %inc.unr57, %sub + %43 = load i64, i64* %var4, align 8, !tbaa !1 + %44 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %40, i64 %42, i64 %43) + br label %for.body.lr.ph.split + +for.body.lr.ph.split: ; preds = %for.body.unr53 + %45 = icmp ult i32 %10, 8 + br i1 %45, label %for.end.loopexit, label %for.body.lr.ph.split.split + +for.body.lr.ph.split.split: ; preds = %for.body.lr.ph.split, %for.body.lr.ph + %.unr = phi i64 [ %44, %for.body.lr.ph.split ], [ %6, %for.body.lr.ph ] + %pvar6.09.unr62 = phi i64* [ %incdec.ptr4.unr58, %for.body.lr.ph.split ], [ %7, %for.body.lr.ph ] + %var8.0.in8.unr63 = phi i8* [ %41, %for.body.lr.ph.split ], [ %4, %for.body.lr.ph ] + %i.07.unr64 = phi i32 [ %inc.unr57, %for.body.lr.ph.split ], [ 0, %for.body.lr.ph ] + %.lcssa12.unr = phi i64 [ %44, %for.body.lr.ph.split ], [ 0, %for.body.lr.ph ] + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph.split.split + %46 = phi i64 [ %.unr, %for.body.lr.ph.split.split ], [ %78, %for.body ] + %pvar6.09 = phi i64* [ %pvar6.09.unr62, %for.body.lr.ph.split.split ], [ %scevgep71, %for.body ] + %var8.0.in8 = phi i8* [ %var8.0.in8.unr63, %for.body.lr.ph.split.split ], [ %75, %for.body ] + %i.07 = phi i32 [ %i.07.unr64, %for.body.lr.ph.split.split ], [ %inc.7, %for.body ] + %47 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8, i8* %3, i32 %or, i32 -8) + %48 = load i64, i64* %pvar6.09, align 8, !tbaa !1 + %inc = add nsw i32 %i.07, 1 + %49 = load i64, i64* %var4, align 8, !tbaa !1 + %50 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %46, i64 %48, i64 %49) + %51 = call i8* @llvm.hexagon.circ.ldd(i8* %47, i8* %3, i32 %or, i32 -8) + %scevgep = getelementptr i64, i64* %pvar6.09, i32 1 + %52 = load i64, i64* %scevgep, align 8, !tbaa !1 + %inc.1 = add nsw i32 %inc, 1 + %53 = load i64, i64* %var4, align 8, !tbaa !1 + %54 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %50, i64 %52, i64 %53) + %55 = call i8* @llvm.hexagon.circ.ldd(i8* %51, i8* %3, i32 %or, i32 -8) + %scevgep65 = getelementptr i64, i64* %scevgep, i32 1 + %56 = load i64, i64* %scevgep65, align 8, !tbaa !1 + %inc.2 = add nsw i32 %inc.1, 1 + %57 = load i64, i64* %var4, align 8, !tbaa !1 + %58 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %54, i64 %56, i64 %57) + %59 = call i8* @llvm.hexagon.circ.ldd(i8* %55, i8* %3, i32 %or, i32 -8) + %scevgep66 = getelementptr i64, i64* %scevgep65, i32 1 + %60 = load i64, i64* %scevgep66, align 8, !tbaa !1 + %inc.3 = add nsw i32 %inc.2, 1 + %61 = load i64, i64* %var4, align 8, !tbaa !1 + %62 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %58, i64 %60, i64 %61) + %63 = call i8* @llvm.hexagon.circ.ldd(i8* %59, i8* %3, i32 %or, i32 -8) + %scevgep67 = getelementptr i64, i64* %scevgep66, i32 1 + %64 = load i64, i64* %scevgep67, align 8, !tbaa !1 + %inc.4 = add nsw i32 %inc.3, 1 + %65 = load i64, i64* %var4, align 8, !tbaa !1 + %66 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %62, i64 %64, i64 %65) + %67 = call i8* @llvm.hexagon.circ.ldd(i8* %63, i8* %3, i32 %or, i32 -8) + %scevgep68 = getelementptr i64, i64* %scevgep67, i32 1 + %68 = load i64, i64* %scevgep68, align 8, !tbaa !1 + %inc.5 = add nsw i32 %inc.4, 1 + %69 = load i64, i64* %var4, align 8, !tbaa !1 + %70 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %66, i64 %68, i64 %69) + %71 = call i8* @llvm.hexagon.circ.ldd(i8* %67, i8* %3, i32 %or, i32 -8) + %scevgep69 = getelementptr i64, i64* %scevgep68, i32 1 + %72 = load i64, i64* %scevgep69, align 8, !tbaa !1 + %inc.6 = add nsw i32 %inc.5, 1 + %73 = load i64, i64* %var4, align 8, !tbaa !1 + %74 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %70, i64 %72, i64 %73) + %75 = call i8* @llvm.hexagon.circ.ldd(i8* %71, i8* %3, i32 %or, i32 -8) + %scevgep70 = getelementptr i64, i64* %scevgep69, i32 1 + %76 = load i64, i64* %scevgep70, align 8, !tbaa !1 + %inc.7 = add nsw i32 %inc.6, 1 + %77 = load i64, i64* %var4, align 8, !tbaa !1 + %78 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %74, i64 %76, i64 %77) + %cmp.7 = icmp slt i32 %inc.7, %sub + %scevgep71 = getelementptr i64, i64* %scevgep70, i32 1 + br i1 %cmp.7, label %for.body, label %for.end.loopexit.unr-lcssa + +for.end.loopexit.unr-lcssa: ; preds = %for.body + %.lcssa12.ph = phi i64 [ %78, %for.body ] + br label %for.end.loopexit + +for.end.loopexit: ; preds = %for.end.loopexit.unr-lcssa, %for.body.lr.ph.split + %.lcssa12 = phi i64 [ %44, %for.body.lr.ph.split ], [ %.lcssa12.ph, %for.end.loopexit.unr-lcssa ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %.lcssa = phi i64 [ %6, %entry ], [ %.lcssa12, %for.end.loopexit ] + %79 = call i32 @llvm.hexagon.S2.vrndpackwhs(i64 %.lcssa) + ret i32 %79 +} + +declare i64 @llvm.hexagon.M2.vdmacs.s1(i64, i64, i64) nounwind readnone + +declare i32 @llvm.hexagon.S2.vrndpackwhs(i64) nounwind readnone + +!0 = !{!"long long", !1} +!1 = !{!"omnipotent char", !2} +!2 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/Hexagon/circ_ldw.ll b/llvm/test/CodeGen/Hexagon/circ_ldw.ll new file mode 100644 index 0000000..4511a9c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/circ_ldw.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +; CHECK: r{{[0-9]*}} = memw(r{{[0-9]*.}}++{{.}}#-4:circ(m0)) + + +%union.vect64 = type { i64 } +%union.vect32 = type { i32 } + +define i32* @HallowedBeThyName(%union.vect64* nocapture %pRx, %union.vect32* %pLut, %union.vect64* nocapture %pOut, i64 %dc.coerce, i32 %shift, i32 %numSamples) nounwind { +entry: + %vLutNext = alloca i32, align 4 + %0 = bitcast %union.vect32* %pLut to i8* + %1 = bitcast i32* %vLutNext to i8* + %2 = call i8* @llvm.hexagon.circ.ldw(i8* %0, i8* %1, i32 83886144, i32 -4) + %3 = bitcast i8* %2 to i32* + ret i32* %3 +} + +declare i8* @llvm.hexagon.circ.ldw(i8*, i8*, i32, i32) nounwind diff --git a/llvm/test/CodeGen/Hexagon/circ_st.ll b/llvm/test/CodeGen/Hexagon/circ_st.ll new file mode 100644 index 0000000..244ca3b --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/circ_st.ll @@ -0,0 +1,108 @@ +; RUN: llc -march=hexagon -verify-machineinstrs=true < %s | FileCheck %s +; Testing for these 5 variants of circular store: +; Q6_circ_store_update_B(inputLR, pDelay, -1, nConvLength, 4); +; Q6_circ_store_update_D(inputLR, pDelay, -1, nConvLength, 4); +; Q6_circ_store_update_HL(inputLR, pDelay, -1, nConvLength, 4); +; Q6_circ_store_update_HH(inputLR, pDelay, -1, nConvLength, 4); +; Q6_circ_store_update_W(inputLR, pDelay, -1, nConvLength, 4); +; producing these +; memb(r1++#-1:circ(m0)) = r3 +; memd(r1++#-8:circ(m0)) = r1:0 +; memh(r1++#-2:circ(m0)) = r3 +; memh(r1++#-2:circ(m0)) = r3.h +; memw(r1++#-4:circ(m0)) = r0 + +; ModuleID = 'circ_st.i' +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" +target triple = "hexagon" + +define zeroext i8 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %conv = zext i16 %filtMemLen to i32 + %shr2 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %or = or i32 %shr2, 33554432 +; CHECK: memb(r{{[0-9]*}}{{.}}++{{.}}#-1:circ(m{{[0-1]}})) + %1 = tail call i8* @llvm.hexagon.circ.stb(i8* %0, i32 0, i32 %or, i32 -1) + %2 = load i8, i8* %1, align 1, !tbaa !0 + ret i8 %2 +} + +declare i8* @llvm.hexagon.circ.stb(i8*, i32, i32, i32) nounwind + +define i64 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %shl = shl nuw nsw i32 %shr1, 3 + %or = or i32 %shl, 83886080 +; CHECK: memd(r{{[0-9]*}}{{.}}++{{.}}#-8:circ(m{{[0-1]}})) + %1 = tail call i8* @llvm.hexagon.circ.std(i8* %0, i64 undef, i32 %or, i32 -8) + %2 = bitcast i8* %1 to i64* + %3 = load i64, i64* %2, align 8, !tbaa !0 + ret i64 %3 +} + +declare i8* @llvm.hexagon.circ.std(i8*, i64, i32, i32) nounwind + +define signext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %conv = zext i16 %filtMemLen to i32 + %shr2 = and i32 %conv, 65534 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %or = or i32 %shr2, 50331648 +; CHECK: memh(r{{[0-9]*}}{{.}}++{{.}}#-2:circ(m{{[0-1]}})) + %1 = tail call i8* @llvm.hexagon.circ.sth(i8* %0, i32 0, i32 %or, i32 -2) + %2 = bitcast i8* %1 to i16* + %3 = load i16, i16* %2, align 2, !tbaa !2 + ret i16 %3 +} + +declare i8* @llvm.hexagon.circ.sth(i8*, i32, i32, i32) nounwind + +define signext i16 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %conv = zext i16 %filtMemLen to i32 + %shr2 = and i32 %conv, 65534 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %or = or i32 %shr2, 50331648 +; CHECK: memh(r{{[0-9]*}}{{.}}++{{.}}#-2:circ(m{{[0-1]}})){{ *}}={{ *}}r{{[0-9]*}}.h + %1 = tail call i8* @llvm.hexagon.circ.sthhi(i8* %0, i32 0, i32 %or, i32 -2) + %2 = bitcast i8* %1 to i16* + %3 = load i16, i16* %2, align 2, !tbaa !2 + ret i16 %3 +} + +declare i8* @llvm.hexagon.circ.sthhi(i8*, i32, i32, i32) nounwind + +define i32 @foo6(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind { +entry: + %conv = zext i16 %filtMemLen to i32 + %shr1 = lshr i32 %conv, 1 + %idxprom = sext i16 %filtMemIndex to i32 + %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom + %0 = bitcast i16* %arrayidx to i8* + %shl = shl nuw nsw i32 %shr1, 2 + %or = or i32 %shl, 67108864 +; CHECK: memw(r{{[0-9]*}}{{.}}++{{.}}#-4:circ(m{{[0-1]}})) + %1 = tail call i8* @llvm.hexagon.circ.stw(i8* %0, i32 undef, i32 %or, i32 -4) + %2 = bitcast i8* %1 to i32* + %3 = load i32, i32* %2, align 4, !tbaa !3 + ret i32 %3 +} + +declare i8* @llvm.hexagon.circ.stw(i8*, i32, i32, i32) nounwind + +!0 = !{!"omnipotent char", !1} +!1 = !{!"Simple C/C++ TBAA"} +!2 = !{!"short", !0} +!3 = !{!"int", !0} -- 2.7.4