From 06fe9183b07cdd56cc4f8d751ede27dc935ca549 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 Jan 2016 22:08:58 +0000 Subject: [PATCH] [x86] make the subtarget member a const reference, not a pointer ; NFCI It's passed in as a reference; it's not optional; it's not a pointer. llvm-svn: 258867 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 1458 +++++++++++++++---------------- llvm/lib/Target/X86/X86ISelLowering.h | 4 +- 2 files changed, 731 insertions(+), 731 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 749e578..f5fe320 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -71,9 +71,9 @@ static cl::opt ExperimentalVectorWideningLegalization( X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI) - : TargetLowering(TM), Subtarget(&STI) { - X86ScalarSSEf64 = Subtarget->hasSSE2(); - X86ScalarSSEf32 = Subtarget->hasSSE1(); + : TargetLowering(TM), Subtarget(STI) { + X86ScalarSSEf64 = Subtarget.hasSSE2(); + X86ScalarSSEf32 = Subtarget.hasSSE1(); MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize()); // Set up the TargetLowering object. @@ -86,24 +86,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // For 64-bit, since we have so many registers, use the ILP scheduler. // For 32-bit, use the register pressure specific scheduling. // For Atom, always use ILP scheduling. - if (Subtarget->isAtom()) + if (Subtarget.isAtom()) setSchedulingPreference(Sched::ILP); - else if (Subtarget->is64Bit()) + else if (Subtarget.is64Bit()) setSchedulingPreference(Sched::ILP); else setSchedulingPreference(Sched::RegPressure); - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); // Bypass expensive divides on Atom when compiling with O2. if (TM.getOptLevel() >= CodeGenOpt::Default) { - if (Subtarget->hasSlowDivide32()) + if (Subtarget.hasSlowDivide32()) addBypassSlowDiv(32, 8); - if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit()) + if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit()) addBypassSlowDiv(64, 16); } - if (Subtarget->isTargetKnownWindowsMSVC()) { + if (Subtarget.isTargetKnownWindowsMSVC()) { // Setup Windows compiler runtime calls. setLibcallName(RTLIB::SDIV_I64, "_alldiv"); setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); @@ -117,11 +117,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); } - if (Subtarget->isTargetDarwin()) { + if (Subtarget.isTargetDarwin()) { // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(false); setUseUnderscoreLongJmp(false); - } else if (Subtarget->isTargetWindowsGNU()) { + } else if (Subtarget.isTargetWindowsGNU()) { // MS runtime is weird: it exports _setjmp, but longjmp! setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(false); @@ -134,7 +134,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addRegisterClass(MVT::i8, &X86::GR8RegClass); addRegisterClass(MVT::i16, &X86::GR16RegClass); addRegisterClass(MVT::i32, &X86::GR32RegClass); - if (Subtarget->is64Bit()) + if (Subtarget.is64Bit()) addRegisterClass(MVT::i64, &X86::GR64RegClass); for (MVT VT : MVT::integer_valuetypes()) @@ -164,14 +164,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); - if (Subtarget->is64Bit()) { - if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) + if (Subtarget.is64Bit()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) // f32/f64 are legal, f80 is custom. setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); else setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); - } else if (!Subtarget->useSoftFloat()) { + } else if (!Subtarget.useSoftFloat()) { // We have an algorithm for SSE2->double, and we turn this into a // 64-bit FILD followed by conditional FADD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); @@ -185,7 +185,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); - if (!Subtarget->useSoftFloat()) { + if (!Subtarget.useSoftFloat()) { // SSE has no i16 to fp conversion, only i32 if (X86ScalarSSEf32) { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); @@ -205,7 +205,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); - if (!Subtarget->useSoftFloat()) { + if (!Subtarget.useSoftFloat()) { // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 // are Legal, f80 is custom lowered. setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); @@ -231,8 +231,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); - if (Subtarget->is64Bit()) { - if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) { + if (Subtarget.is64Bit()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); @@ -240,9 +240,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); } - } else if (!Subtarget->useSoftFloat()) { + } else if (!Subtarget.useSoftFloat()) { // Since AVX is a superset of SSE3, only check for SSE here. - if (Subtarget->hasSSE1() && !Subtarget->hasSSE3()) + if (Subtarget.hasSSE1() && !Subtarget.hasSSE3()) // Expand FP_TO_UINT into a select. // FIXME: We would like to use a Custom expander here eventually to do // the optimal thing for SSE vs. the default expansion in the legalizer. @@ -260,12 +260,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!X86ScalarSSEf64) { setOperationAction(ISD::BITCAST , MVT::f32 , Expand); setOperationAction(ISD::BITCAST , MVT::i32 , Expand); - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { setOperationAction(ISD::BITCAST , MVT::f64 , Expand); // Without SSE, i64->f64 goes through memory. setOperationAction(ISD::BITCAST , MVT::i64 , Expand); } - } else if (!Subtarget->is64Bit()) + } else if (!Subtarget.is64Bit()) setOperationAction(ISD::BITCAST , MVT::i64 , Custom); // Scalar integer divide and remainder are lowered to use operations that @@ -311,14 +311,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT_CC , MVT::i16, Expand); setOperationAction(ISD::SELECT_CC , MVT::i32, Expand); setOperationAction(ISD::SELECT_CC , MVT::i64, Expand); - if (Subtarget->is64Bit()) + if (Subtarget.is64Bit()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); - if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) { + if (Subtarget.is32Bit() && Subtarget.isTargetKnownWindowsMSVC()) { // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)` // is. We should promote the value to 64-bits to solve this. // This is what the CRT headers do - `fmodf` is an inline header @@ -338,19 +338,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32); setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote); AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32); - if (Subtarget->hasBMI()) { + if (Subtarget.hasBMI()) { setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand); - if (Subtarget->is64Bit()) + if (Subtarget.is64Bit()) setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); } else { setOperationAction(ISD::CTTZ , MVT::i16 , Custom); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); - if (Subtarget->is64Bit()) + if (Subtarget.is64Bit()) setOperationAction(ISD::CTTZ , MVT::i64 , Custom); } - if (Subtarget->hasLZCNT()) { + if (Subtarget.hasLZCNT()) { // When promoting the i8 variants, force them to i32 for a shorter // encoding. setOperationAction(ISD::CTLZ , MVT::i8 , Promote); @@ -359,7 +359,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand); - if (Subtarget->is64Bit()) + if (Subtarget.is64Bit()) setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); } else { setOperationAction(ISD::CTLZ , MVT::i8 , Custom); @@ -368,7 +368,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom); - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { setOperationAction(ISD::CTLZ , MVT::i64 , Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); } @@ -377,7 +377,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Special handling for half-precision floating point conversions. // If we don't have F16C support, then lower half float conversions // into library calls. - if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) { + if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) { setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } @@ -395,19 +395,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f80, MVT::f16, Expand); - if (Subtarget->hasPOPCNT()) { + if (Subtarget.hasPOPCNT()) { setOperationAction(ISD::CTPOP , MVT::i8 , Promote); } else { setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTPOP , MVT::i16 , Expand); setOperationAction(ISD::CTPOP , MVT::i32 , Expand); - if (Subtarget->is64Bit()) + if (Subtarget.is64Bit()) setOperationAction(ISD::CTPOP , MVT::i64 , Expand); } setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); - if (!Subtarget->hasMOVBE()) + if (!Subtarget.hasMOVBE()) setOperationAction(ISD::BSWAP , MVT::i16 , Expand); // These should be promoted to a larger select which is supported. @@ -430,7 +430,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SETCCE , MVT::i8 , Custom); setOperationAction(ISD::SETCCE , MVT::i16 , Custom); setOperationAction(ISD::SETCCE , MVT::i32 , Custom); - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { setOperationAction(ISD::SELECT , MVT::i64 , Custom); setOperationAction(ISD::SETCC , MVT::i64 , Custom); setOperationAction(ISD::SETCCE , MVT::i64 , Custom); @@ -450,11 +450,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::JumpTable , MVT::i32 , Custom); setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); - if (Subtarget->is64Bit()) + if (Subtarget.is64Bit()) setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); setOperationAction(ISD::BlockAddress , MVT::i32 , Custom); - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); setOperationAction(ISD::JumpTable , MVT::i64 , Custom); setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); @@ -465,13 +465,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom); setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); } - if (Subtarget->hasSSE1()) + if (Subtarget.hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); @@ -483,13 +483,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ATOMIC_STORE, VT, Custom); } - if (Subtarget->hasCmpxchg16b()) { + if (Subtarget.hasCmpxchg16b()) { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); } // FIXME - use subtarget debug flags - if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() && - !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) { + if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() && + !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64()) { setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } @@ -505,7 +505,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAEND , MVT::Other, Expand); - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { setOperationAction(ISD::VAARG , MVT::Other, Custom); setOperationAction(ISD::VACOPY , MVT::Other, Custom); } else { @@ -523,7 +523,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom); - if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) { + if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) { // f32 and f64 use SSE. // Set up the FP register classes. addRegisterClass(MVT::f32, &X86::FR32RegClass); @@ -557,7 +557,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // cases we handle. addLegalFPImmediate(APFloat(+0.0)); // xorpd addLegalFPImmediate(APFloat(+0.0f)); // xorps - } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) { + } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. addRegisterClass(MVT::f32, &X86::FR32RegClass); @@ -592,7 +592,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); } - } else if (!Subtarget->useSoftFloat()) { + } else if (!Subtarget.useSoftFloat()) { // f32 and f64 in x87. // Set up the FP register classes. addRegisterClass(MVT::f64, &X86::RFP64RegClass); @@ -626,8 +626,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMA, MVT::f32, Expand); // Long double always uses X87, except f128 in MMX. - if (!Subtarget->useSoftFloat()) { - if (Subtarget->is64Bit() && Subtarget->hasMMX()) { + if (!Subtarget.useSoftFloat()) { + if (Subtarget.is64Bit() && Subtarget.hasMMX()) { addRegisterClass(MVT::f128, &X86::FR128RegClass); ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); setOperationAction(ISD::FABS , MVT::f128, Custom); @@ -774,7 +774,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. - if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) { addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); // No operations on x86mmx supported, everything uses intrinsics. } @@ -792,7 +792,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand); - if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { addRegisterClass(MVT::v4f32, &X86::VR128RegClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); @@ -811,7 +811,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); } - if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { addRegisterClass(MVT::v2f64, &X86::VR128RegClass); // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM @@ -908,7 +908,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); } @@ -942,7 +942,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); // As there is no 64-bit GPR available, we need build a special custom // sequence to convert from v2i32 to v2f32. - if (!Subtarget->is64Bit()) + if (!Subtarget.is64Bit()) setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); @@ -956,7 +956,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); } - if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) { for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { setOperationAction(ISD::FFLOOR, RoundedTy, Legal); setOperationAction(ISD::FCEIL, RoundedTy, Legal); @@ -1020,13 +1020,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // FIXME: these should be Legal, but that's only for the case where // the index is constant. For now custom expand to deal with that. - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); } } - if (Subtarget->hasSSE2()) { + if (Subtarget.hasSSE2()) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); @@ -1052,7 +1052,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRA, MVT::v4i32, Custom); } - if (Subtarget->hasXOP()) { + if (Subtarget.hasXOP()) { setOperationAction(ISD::ROTL, MVT::v16i8, Custom); setOperationAction(ISD::ROTL, MVT::v8i16, Custom); setOperationAction(ISD::ROTL, MVT::v4i32, Custom); @@ -1063,7 +1063,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ROTL, MVT::v4i64, Custom); } - if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) { addRegisterClass(MVT::v32i8, &X86::VR256RegClass); addRegisterClass(MVT::v16i16, &X86::VR256RegClass); addRegisterClass(MVT::v8i32, &X86::VR256RegClass); @@ -1162,7 +1162,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom); - if (Subtarget->hasAnyFMA()) { + if (Subtarget.hasAnyFMA()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); setOperationAction(ISD::FMA, MVT::v4f64, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); @@ -1171,7 +1171,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMA, MVT::f64, Legal); } - if (Subtarget->hasInt256()) { + if (Subtarget.hasInt256()) { setOperationAction(ISD::ADD, MVT::v4i64, Legal); setOperationAction(ISD::ADD, MVT::v8i32, Legal); setOperationAction(ISD::ADD, MVT::v16i16, Legal); @@ -1289,7 +1289,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); } - if (Subtarget->hasInt256()) + if (Subtarget.hasInt256()) setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. @@ -1307,7 +1307,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } - if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { addRegisterClass(MVT::v16i32, &X86::VR512RegClass); addRegisterClass(MVT::v16f32, &X86::VR512RegClass); addRegisterClass(MVT::v8i64, &X86::VR512RegClass); @@ -1388,7 +1388,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); - if (Subtarget->hasVLX()){ + if (Subtarget.hasVLX()){ setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); @@ -1411,7 +1411,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom); - if (Subtarget->hasDQI()) { + if (Subtarget.hasDQI()) { setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); @@ -1419,7 +1419,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); - if (Subtarget->hasVLX()) { + if (Subtarget.hasVLX()) { setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal); @@ -1430,7 +1430,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); } } - if (Subtarget->hasVLX()) { + if (Subtarget.hasVLX()) { setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); @@ -1452,7 +1452,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); - if (Subtarget->hasDQI()) { + if (Subtarget.hasDQI()) { setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); } @@ -1524,7 +1524,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::OR, MVT::v16i32, Legal); setOperationAction(ISD::XOR, MVT::v16i32, Legal); - if (Subtarget->hasCDI()) { + if (Subtarget.hasCDI()) { setOperationAction(ISD::CTLZ, MVT::v8i64, Legal); setOperationAction(ISD::CTLZ, MVT::v16i32, Legal); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Expand); @@ -1542,7 +1542,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom); - if (Subtarget->hasVLX()) { + if (Subtarget.hasVLX()) { setOperationAction(ISD::CTLZ, MVT::v4i64, Legal); setOperationAction(ISD::CTLZ, MVT::v8i32, Legal); setOperationAction(ISD::CTLZ, MVT::v2i64, Legal); @@ -1566,9 +1566,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand); } - } // Subtarget->hasCDI() + } // Subtarget.hasCDI() - if (Subtarget->hasDQI()) { + if (Subtarget.hasDQI()) { setOperationAction(ISD::MUL, MVT::v2i64, Legal); setOperationAction(ISD::MUL, MVT::v4i64, Legal); setOperationAction(ISD::MUL, MVT::v8i64, Legal); @@ -1617,7 +1617,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } }// has AVX-512 - if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { addRegisterClass(MVT::v32i16, &X86::VR512RegClass); addRegisterClass(MVT::v64i8, &X86::VR512RegClass); @@ -1678,10 +1678,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); - if (Subtarget->hasVLX()) + if (Subtarget.hasVLX()) setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); - if (Subtarget->hasCDI()) { + if (Subtarget.hasCDI()) { setOperationAction(ISD::CTLZ, MVT::v32i16, Custom); setOperationAction(ISD::CTLZ, MVT::v64i8, Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand); @@ -1704,7 +1704,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } - if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { addRegisterClass(MVT::v4i1, &X86::VK4RegClass); addRegisterClass(MVT::v2i1, &X86::VK2RegClass); @@ -1744,7 +1744,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - if (!Subtarget->is64Bit()) { + if (!Subtarget.is64Bit()) { setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); } @@ -1756,7 +1756,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // subtraction on x86-32 once PR3203 is fixed. We really can't do much better // than generic legalization for 64-bit multiplication-with-overflow, though. for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { - if (VT == MVT::i64 && !Subtarget->is64Bit()) + if (VT == MVT::i64 && !Subtarget.is64Bit()) continue; // Add/Sub/Mul with overflow operations are custom lowered. setOperationAction(ISD::SADDO, VT, Custom); @@ -1767,7 +1767,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMULO, VT, Custom); } - if (!Subtarget->is64Bit()) { + if (!Subtarget.is64Bit()) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); @@ -1775,10 +1775,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // Combine sin / cos into one node or libcall if possible. - if (Subtarget->hasSinCos()) { + if (Subtarget.hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->isTargetDarwin()) { + if (Subtarget.isTargetDarwin()) { // For MacOSX, we don't want the normal expansion of a libcall to sincos. // We want to issue a libcall to __sincos_stret to avoid memory traffic. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); @@ -1786,7 +1786,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } - if (Subtarget->isTargetWin64()) { + if (Subtarget.isTargetWin64()) { setOperationAction(ISD::SDIV, MVT::i128, Custom); setOperationAction(ISD::UDIV, MVT::i128, Custom); setOperationAction(ISD::SREM, MVT::i128, Custom); @@ -1832,7 +1832,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTargetDAGCombine(ISD::MSCATTER); setTargetDAGCombine(ISD::MGATHER); - computeRegisterProperties(Subtarget->getRegisterInfo()); + computeRegisterProperties(Subtarget.getRegisterInfo()); MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores MaxStoresPerMemsetOptSize = 8; @@ -1844,7 +1844,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // A predictable cmov does not hurt on an in-order CPU. // FIXME: Use a CPU attribute to trigger this, not a CPU model. - PredictableSelectIsExpensive = !Subtarget->isAtom(); + PredictableSelectIsExpensive = !Subtarget.isAtom(); EnableExtLdPromotion = true; setPrefFunctionAlignment(4); // 2^4 bytes. @@ -1853,7 +1853,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // This has so far only been implemented for 64-bit MachO. bool X86TargetLowering::useLoadStackGuardNode() const { - return Subtarget->isTargetMachO() && Subtarget->is64Bit(); + return Subtarget.isTargetMachO() && Subtarget.is64Bit(); } TargetLoweringBase::LegalizeTypeAction @@ -1869,21 +1869,21 @@ X86TargetLowering::getPreferredVectorAction(EVT VT) const { EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) - return Subtarget->hasAVX512() ? MVT::i1: MVT::i8; + return Subtarget.hasAVX512() ? MVT::i1: MVT::i8; if (VT.isSimple()) { MVT VVT = VT.getSimpleVT(); const unsigned NumElts = VVT.getVectorNumElements(); const MVT EltVT = VVT.getVectorElementType(); if (VVT.is512BitVector()) { - if (Subtarget->hasAVX512()) + if (Subtarget.hasAVX512()) if (EltVT == MVT::i32 || EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64) switch(NumElts) { case 8: return MVT::v8i1; case 16: return MVT::v16i1; } - if (Subtarget->hasBWI()) + if (Subtarget.hasBWI()) if (EltVT == MVT::i8 || EltVT == MVT::i16) switch(NumElts) { case 32: return MVT::v32i1; @@ -1892,7 +1892,7 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, } if (VVT.is256BitVector() || VVT.is128BitVector()) { - if (Subtarget->hasVLX()) + if (Subtarget.hasVLX()) if (EltVT == MVT::i32 || EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64) switch(NumElts) { @@ -1900,7 +1900,7 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, case 4: return MVT::v4i1; case 8: return MVT::v8i1; } - if (Subtarget->hasBWI() && Subtarget->hasVLX()) + if (Subtarget.hasBWI() && Subtarget.hasVLX()) if (EltVT == MVT::i8 || EltVT == MVT::i16) switch(NumElts) { case 8: return MVT::v8i1; @@ -1944,7 +1944,7 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { /// are at 4-byte boundaries. unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty, const DataLayout &DL) const { - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { // Max of 8 and alignment of type. unsigned TyAlign = DL.getABITypeAlignment(Ty); if (TyAlign > 8) @@ -1953,7 +1953,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty, } unsigned Align = 4; - if (Subtarget->hasSSE1()) + if (Subtarget.hasSSE1()) getMaxByValAlign(Ty, Align); return Align; } @@ -1979,23 +1979,23 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, if ((!IsMemset || ZeroMemset) && !F->hasFnAttribute(Attribute::NoImplicitFloat)) { if (Size >= 16 && - (!Subtarget->isUnalignedMem16Slow() || + (!Subtarget.isUnalignedMem16Slow() || ((DstAlign == 0 || DstAlign >= 16) && (SrcAlign == 0 || SrcAlign >= 16)))) { if (Size >= 32) { // FIXME: Check if unaligned 32-byte accesses are slow. - if (Subtarget->hasInt256()) + if (Subtarget.hasInt256()) return MVT::v8i32; - if (Subtarget->hasFp256()) + if (Subtarget.hasFp256()) return MVT::v8f32; } - if (Subtarget->hasSSE2()) + if (Subtarget.hasSSE2()) return MVT::v4i32; - if (Subtarget->hasSSE1()) + if (Subtarget.hasSSE1()) return MVT::v4f32; } else if (!MemcpyStrSrc && Size >= 8 && - !Subtarget->is64Bit() && - Subtarget->hasSSE2()) { + !Subtarget.is64Bit() && + Subtarget.hasSSE2()) { // Do not use f64 to lower memcpy if source is string constant. It's // better to use i32 to avoid the loads. return MVT::f64; @@ -2004,7 +2004,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, // This is a compromise. If we reach here, unaligned accesses may be slow on // this target. However, creating smaller, aligned accesses could be even // slower and would certainly be a lot more code. - if (Subtarget->is64Bit() && Size >= 8) + if (Subtarget.is64Bit() && Size >= 8) return MVT::i64; return MVT::i32; } @@ -2029,10 +2029,10 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, *Fast = true; break; case 128: - *Fast = !Subtarget->isUnalignedMem16Slow(); + *Fast = !Subtarget.isUnalignedMem16Slow(); break; case 256: - *Fast = !Subtarget->isUnalignedMem32Slow(); + *Fast = !Subtarget.isUnalignedMem32Slow(); break; // TODO: What about AVX-512 (512-bit) accesses? } @@ -2048,7 +2048,7 @@ unsigned X86TargetLowering::getJumpTableEncoding() const { // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF // symbol. if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT()) + Subtarget.isPICStyleGOT()) return MachineJumpTableInfo::EK_Custom32; // Otherwise, use the normal jump table encoding heuristics. @@ -2056,7 +2056,7 @@ unsigned X86TargetLowering::getJumpTableEncoding() const { } bool X86TargetLowering::useSoftFloat() const { - return Subtarget->useSoftFloat(); + return Subtarget.useSoftFloat(); } const MCExpr * @@ -2064,7 +2064,7 @@ X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid,MCContext &Ctx) const{ assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT()); + Subtarget.isPICStyleGOT()); // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF // entries. return MCSymbolRefExpr::create(MBB->getSymbol(), @@ -2074,7 +2074,7 @@ X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, /// Returns relocation base for the given PIC jumptable. SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { - if (!Subtarget->is64Bit()) + if (!Subtarget.is64Bit()) // This doesn't have SDLoc associated with it, but is not really the // same as a Register. return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), @@ -2088,7 +2088,7 @@ const MCExpr *X86TargetLowering:: getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const { // X86-64 uses RIP relative addressing based on the jump table label. - if (Subtarget->isPICStyleRIPRel()) + if (Subtarget.isPICStyleRIPRel()) return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); // Otherwise, the reference is relative to the PIC base. @@ -2104,7 +2104,7 @@ X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, default: return TargetLowering::findRepresentativeClass(TRI, VT); case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: - RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; + RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; break; case MVT::x86mmx: RRC = &X86::VR64RegClass; @@ -2122,10 +2122,10 @@ X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const { - if (!Subtarget->isTargetLinux()) + if (!Subtarget.isTargetLinux()) return false; - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs: Offset = 0x28; if (getTargetMachine().getCodeModel() == CodeModel::Kernel) @@ -2141,14 +2141,14 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, } Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { - if (!Subtarget->isTargetAndroid()) + if (!Subtarget.isTargetAndroid()) return TargetLowering::getSafeStackPointerLocation(IRB); // Android provides a fixed TLS slot for the SafeStack pointer. See the // definition of TLS_SLOT_SAFESTACK in // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h unsigned AddressSpace, Offset; - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: Offset = 0x48; if (getTargetMachine().getCodeModel() == CodeModel::Kernel) @@ -2243,14 +2243,14 @@ X86TargetLowering::LowerReturn(SDValue Chain, // or SSE or MMX vectors. if ((ValVT == MVT::f32 || ValVT == MVT::f64 || VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && - (Subtarget->is64Bit() && !Subtarget->hasSSE1())) { + (Subtarget.is64Bit() && !Subtarget.hasSSE1())) { report_fatal_error("SSE register return with SSE disabled"); } // Likewise we can't return F64 values with SSE1 only. gcc does so, but // llvm-gcc has never done it right and no one has noticed, so this // should be OK for now. if (ValVT == MVT::f64 && - (Subtarget->is64Bit() && !Subtarget->hasSSE2())) + (Subtarget.is64Bit() && !Subtarget.hasSSE2())) report_fatal_error("SSE2 register return with SSE2 disabled"); // Returns in ST0/ST1 are handled specially: these are pushed as operands to @@ -2268,7 +2268,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 // which is returned in RAX / RDX. - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { if (ValVT == MVT::x86mmx) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); @@ -2276,7 +2276,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, ValToCopy); // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. - if (!Subtarget->hasSSE2()) + if (!Subtarget.hasSSE2()) ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); } } @@ -2301,7 +2301,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, getPointerTy(MF.getDataLayout())); unsigned RetValReg - = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ? + = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? X86::RAX : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); Flag = Chain.getValue(1); @@ -2311,7 +2311,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); } - const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); if (I) { @@ -2379,7 +2379,7 @@ X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const { MVT ReturnMVT; // TODO: Is this also valid on 32-bit? - if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND) + if (Subtarget.is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND) ReturnMVT = MVT::i8; else ReturnMVT = MVT::i32; @@ -2400,7 +2400,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Assign locations to each value returned by this call. SmallVector RVLocs; - bool Is64Bit = Subtarget->is64Bit(); + bool Is64Bit = Subtarget.is64Bit(); CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); @@ -2412,7 +2412,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && - ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { + ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget.hasSSE1())) { report_fatal_error("SSE register return with SSE disabled"); } @@ -2618,10 +2618,10 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // FIXME: Get this from tablegen. static ArrayRef get64BitArgumentGPRs(CallingConv::ID CallConv, - const X86Subtarget *Subtarget) { - assert(Subtarget->is64Bit()); + const X86Subtarget &Subtarget) { + assert(Subtarget.is64Bit()); - if (Subtarget->isCallingConvWin64(CallConv)) { + if (Subtarget.isCallingConvWin64(CallConv)) { static const MCPhysReg GPR64ArgRegsWin64[] = { X86::RCX, X86::RDX, X86::R8, X86::R9 }; @@ -2637,9 +2637,9 @@ static ArrayRef get64BitArgumentGPRs(CallingConv::ID CallConv, // FIXME: Get this from tablegen. static ArrayRef get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, - const X86Subtarget *Subtarget) { - assert(Subtarget->is64Bit()); - if (Subtarget->isCallingConvWin64(CallConv)) { + const X86Subtarget &Subtarget) { + assert(Subtarget.is64Bit()); + if (Subtarget.isCallingConvWin64(CallConv)) { // The XMM registers which might contain var arg parameters are shadowed // in their paired GPR. So we only need to save the GPR to their home // slots. @@ -2649,10 +2649,10 @@ static ArrayRef get64BitArgumentXMMs(MachineFunction &MF, const Function *Fn = MF.getFunction(); bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat); - bool isSoftFloat = Subtarget->useSoftFloat(); + bool isSoftFloat = Subtarget.useSoftFloat(); assert(!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); - if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1()) + if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1()) // Kernel mode asks for SSE to be disabled, so there are no XMM argument // registers. return None; @@ -2670,17 +2670,17 @@ SDValue X86TargetLowering::LowerFormalArguments( SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); - const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); + const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); const Function* Fn = MF.getFunction(); if (Fn->hasExternalLinkage() && - Subtarget->isTargetCygMing() && + Subtarget.isTargetCygMing() && Fn->getName() == "main") FuncInfo->setForceFramePointer(true); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool Is64Bit = Subtarget->is64Bit(); - bool IsWin64 = Subtarget->isCallingConvWin64(CallConv); + bool Is64Bit = Subtarget.is64Bit(); + bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); assert(!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"); @@ -2818,7 +2818,7 @@ SDValue X86TargetLowering::LowerFormalArguments( } // Figure out if XMM registers are in use. - assert(!(Subtarget->useSoftFloat() && + assert(!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"); @@ -2830,7 +2830,7 @@ SDValue X86TargetLowering::LowerFormalArguments( ArrayRef ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget); unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs); unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs); - assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && + assert(!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); // Gather all the live in physical registers. @@ -2912,13 +2912,13 @@ SDValue X86TargetLowering::LowerFormalArguments( // Find the largest legal vector type. MVT VecVT = MVT::Other; // FIXME: Only some x86_32 calling conventions support AVX512. - if (Subtarget->hasAVX512() && + if (Subtarget.hasAVX512() && (Is64Bit || (CallConv == CallingConv::X86_VectorCall || CallConv == CallingConv::Intel_OCL_BI))) VecVT = MVT::v16f32; - else if (Subtarget->hasAVX()) + else if (Subtarget.hasAVX()) VecVT = MVT::v8f32; - else if (Subtarget->hasSSE2()) + else if (Subtarget.hasSSE2()) VecVT = MVT::v4f32; // We forward some GPRs and some vector types. @@ -2959,8 +2959,8 @@ SDValue X86TargetLowering::LowerFormalArguments( FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. if (!Is64Bit && !canGuaranteeTCO(CallConv) && - !Subtarget->getTargetTriple().isOSMSVCRT() && - argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn) + !Subtarget.getTargetTriple().isOSMSVCRT() && + argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn) FuncInfo->setBytesToPopOnReturn(4); } @@ -3078,9 +3078,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); - bool Is64Bit = Subtarget->is64Bit(); - bool IsWin64 = Subtarget->isCallingConvWin64(CallConv); - StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU()); + bool Is64Bit = Subtarget.is64Bit(); + bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); + StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); @@ -3091,7 +3091,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Attr.getValueAsString() == "true") isTailCall = false; - if (Subtarget->isPICStyleGOT() && + if (Subtarget.isPICStyleGOT() && !MF.getTarget().Options.GuaranteedTailCallOpt) { // If we are using a GOT, disable tail calls to external symbols with // default visibility. Tail calling such a symbol requires using a GOT @@ -3194,7 +3194,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Walk the register/memloc assignments, inserting copies/loads. In the case // of tail call optimization arguments are handle later. - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { // Skip inalloca arguments, they have already been written. ISD::ArgFlagsTy Flags = Outs[i].Flags; @@ -3272,7 +3272,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); - if (Subtarget->isPICStyleGOT()) { + if (Subtarget.isPICStyleGOT()) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. if (!isTailCall) { @@ -3313,7 +3313,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); - assert((Subtarget->hasSSE1() || !NumXMMRegs) + assert((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"); RegsToPass.push_back(std::make_pair(unsigned(X86::AL), @@ -3423,19 +3423,19 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // external symbols most go through the PLT in PIC mode. If the symbol // has hidden or protected visibility, or if it is static or local, then // we don't need to use the PLT - we can directly call it. - if (Subtarget->isTargetELF() && + if (Subtarget.isTargetELF() && DAG.getTarget().getRelocationModel() == Reloc::PIC_ && GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { OpFlags = X86II::MO_PLT; - } else if (Subtarget->isPICStyleStubAny() && + } else if (Subtarget.isPICStyleStubAny() && !GV->isStrongDefinitionForLinker() && - (!Subtarget->getTargetTriple().isMacOSX() || - Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { + (!Subtarget.getTargetTriple().isMacOSX() || + Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. OpFlags = X86II::MO_DARWIN_STUB; - } else if (Subtarget->isPICStyleRIPRel() && isa(GV) && + } else if (Subtarget.isPICStyleRIPRel() && isa(GV) && cast(GV)->hasFnAttribute(Attribute::NonLazyBind)) { // If the function is marked as non-lazy, generate an indirect call // which loads from the GOT directly. This avoids runtime overhead @@ -3464,12 +3464,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // On ELF targets, in either X86-64 or X86-32 mode, direct calls to // external symbols should go through the PLT. - if (Subtarget->isTargetELF() && + if (Subtarget.isTargetELF() && DAG.getTarget().getRelocationModel() == Reloc::PIC_) { OpFlags = X86II::MO_PLT; - } else if (Subtarget->isPICStyleStubAny() && - (!Subtarget->getTargetTriple().isMacOSX() || - Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { + } else if (Subtarget.isPICStyleStubAny() && + (!Subtarget.getTargetTriple().isMacOSX() || + Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -3478,7 +3478,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Callee = DAG.getTargetExternalSymbol( S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags); - } else if (Subtarget->isTarget64BitILP32() && + } else if (Subtarget.isTarget64BitILP32() && Callee->getValueType(0) == MVT::i32) { // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); @@ -3551,7 +3551,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getTarget().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPop = NumBytes; // Callee pops everything else if (!Is64Bit && !canGuaranteeTCO(CallConv) && - !Subtarget->getTargetTriple().isOSMSVCRT() && + !Subtarget.getTargetTriple().isOSMSVCRT() && SR == StackStructReturn) // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. @@ -3613,8 +3613,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG) const { - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); - const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); uint64_t AlignMask = StackAlignment - 1; int64_t Offset = StackSize; @@ -3707,8 +3707,8 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; - bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC); - bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC); + bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); + bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); // Win64 functions have extra shadow space for argument homing. Don't do the // sibcall if the caller and callee have mismatched expectations for this @@ -3727,7 +3727,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to // emit a special epilogue. - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); if (RegInfo->needsStackRealignment(MF)) return false; @@ -3829,7 +3829,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const X86InstrInfo *TII = Subtarget->getInstrInfo(); + const X86InstrInfo *TII = Subtarget.getInstrInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; @@ -3849,7 +3849,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( // only target EAX, EDX, or ECX since the tail call must be scheduled after // callee-saved registers are restored. These happen to be the same // registers used to pass 'inreg' arguments so watch out for those. - if (!Subtarget->is64Bit() && + if (!Subtarget.is64Bit() && ((!isa(Callee) && !isa(Callee)) || DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { @@ -3876,7 +3876,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( } bool CalleeWillPop = - X86::isCalleePop(CalleeCC, Subtarget->is64Bit(), isVarArg, + X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg, MF.getTarget().Options.GuaranteedTailCallOpt); if (unsigned BytesToPop = @@ -3978,7 +3978,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT, SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); int ReturnAddrIndex = FuncInfo->getRAIndex(); @@ -4289,12 +4289,12 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, bool X86TargetLowering::isCheapToSpeculateCttz() const { // Speculate cttz only if we can directly use TZCNT. - return Subtarget->hasBMI(); + return Subtarget.hasBMI(); } bool X86TargetLowering::isCheapToSpeculateCtlz() const { // Speculate ctlz only if we can directly use LZCNT. - return Subtarget->hasLZCNT(); + return Subtarget.hasLZCNT(); } /// Return true if every element in Mask, beginning @@ -4474,7 +4474,7 @@ static SDValue getConstVector(ArrayRef Values, MVT VT, } /// Returns a vector of specified type with all zero elements. -static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget, +static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDLoc dl) { assert(VT.isVector() && "Expected a vector type"); @@ -4482,7 +4482,7 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget, // to their dest type. This ensures they get CSE'd. SDValue Vec; if (VT.is128BitVector()) { // SSE - if (Subtarget->hasSSE2()) { // SSE2 + if (Subtarget.hasSSE2()) { // SSE2 SDValue Cst = DAG.getConstant(0, dl, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } else { // SSE1 @@ -4490,7 +4490,7 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget, Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); } } else if (VT.is256BitVector()) { // AVX - if (Subtarget->hasInt256()) { // AVX2 + if (Subtarget.hasInt256()) { // AVX2 SDValue Cst = DAG.getConstant(0, dl, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops); @@ -4508,9 +4508,9 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget, Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops); } else if (VT.getVectorElementType() == MVT::i1) { - assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16) + assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); - assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8) + assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && "Unexpected vector type"); SDValue Cst = DAG.getConstant(0, dl, MVT::i1); SmallVector Ops(VT.getVectorNumElements(), Cst); @@ -4756,7 +4756,7 @@ static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT, /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with /// no AVX2 support, use two <4 x i32> inserted in a <8 x i32> appropriately. /// Then bitcast to their original type, ensuring they get CSE'd. -static SDValue getOnesVector(EVT VT, const X86Subtarget *Subtarget, +static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDLoc dl) { assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && "Expected a 128/256/512-bit vector type"); @@ -4764,7 +4764,7 @@ static SDValue getOnesVector(EVT VT, const X86Subtarget *Subtarget, APInt Ones = APInt::getAllOnesValue(32); unsigned NumElts = VT.getSizeInBits() / 32; SDValue Vec; - if (!Subtarget->hasInt256() && NumElts == 8) { + if (!Subtarget.hasInt256() && NumElts == 8) { Vec = DAG.getConstant(Ones, dl, MVT::v4i32); Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); } else { @@ -4803,7 +4803,7 @@ static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, /// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool IsZero, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = V2.getSimpleValueType(); SDValue V1 = IsZero @@ -5180,7 +5180,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, - const X86Subtarget* Subtarget, + const X86Subtarget &Subtarget, const TargetLowering &TLI) { if (NumNonZero > 8) return SDValue(); @@ -5190,7 +5190,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, bool First = true; // SSE4.1 - use PINSRB to insert each byte directly. - if (Subtarget->hasSSE41()) { + if (Subtarget.hasSSE41()) { for (unsigned i = 0; i < 16; ++i) { bool isNonZero = (NonZeros & (1 << i)) != 0; if (isNonZero) { @@ -5250,7 +5250,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, - const X86Subtarget* Subtarget, + const X86Subtarget &Subtarget, const TargetLowering &TLI) { if (NumNonZero > 4) return SDValue(); @@ -5279,7 +5279,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, /// Custom lower build_vector of v4i32 or v4f32. static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, const TargetLowering &TLI) { // Find all zeroable elements. std::bitset<4> Zeroable; @@ -5343,7 +5343,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, } // See if we can lower this build_vector to a INSERTPS. - if (!Subtarget->hasSSE41()) + if (!Subtarget.hasSSE41()) return SDValue(); SDValue V2 = Elt.getOperand(0); @@ -5624,12 +5624,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, /// a scalar load, or a constant. /// The VBROADCAST node is returned when a pattern is found, /// or SDValue() otherwise. -static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, +static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // VBROADCAST requires AVX. // TODO: Splats could be generated for non-AVX CPUs using SSE // instructions, but there's less potential gain for only 128-bit vectors. - if (!Subtarget->hasAVX()) + if (!Subtarget.hasAVX()) return SDValue(); MVT VT = Op.getSimpleValueType(); @@ -5679,7 +5679,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR && Sc.getOpcode() != ISD::BUILD_VECTOR) { - if (!Subtarget->hasInt256()) + if (!Subtarget.hasInt256()) return SDValue(); // Use the register form of the broadcast instruction available on AVX2. @@ -5697,7 +5697,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, // Constants may have multiple users. // AVX-512 has register version of the broadcast - bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() && + bool hasRegVer = Subtarget.hasAVX512() && VT.is512BitVector() && Ld.getValueType().getSizeInBits() >= 32; if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) && !hasRegVer)) @@ -5722,7 +5722,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, // from the constant pool and not to broadcast it from a scalar. // But override that restriction when optimizing for size. // TODO: Check if splatting is recommended for other AVX-capable CPUs. - if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) { + if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) { EVT CVT = Ld.getValueType(); assert(!CVT.isVector() && "Must not broadcast a vector type"); @@ -5731,7 +5731,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, // with AVX2, also splat i8 and i16. // With pattern matching, the VBROADCAST node may become a VMOVDDUP. if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || - (OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) { + (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast(Ld)) C = CI->getConstantIntValue(); @@ -5756,7 +5756,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, bool IsLoad = ISD::isNormalLoad(Ld.getNode()); // Handle AVX2 in-register broadcasts. - if (!IsLoad && Subtarget->hasInt256() && + if (!IsLoad && Subtarget.hasInt256() && (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); @@ -5765,12 +5765,12 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, return SDValue(); if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || - (Subtarget->hasVLX() && ScalarSize == 64)) + (Subtarget.hasVLX() && ScalarSize == 64)) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // The integer check is needed for the 64-bit into 128-bit so it doesn't match // double since there is no vbroadcastsd xmm - if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) { + if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) { if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } @@ -6156,10 +6156,10 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, /// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB /// node. static SDValue LowerToAddSub(const BuildVectorSDNode *BV, - const X86Subtarget *Subtarget, SelectionDAG &DAG) { + const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = BV->getSimpleValueType(0); - if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && - (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) + if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && + (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) return SDValue(); SDLoc DL(BV); @@ -6258,7 +6258,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV, /// Lower BUILD_VECTOR to a horizontal add/sub operation if possible. static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = BV->getSimpleValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -6282,14 +6282,14 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, SDLoc DL(BV); SDValue InVec0, InVec1; - if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) { + if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) { // Try to match an SSE3 float HADD/HSUB. if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1); if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1); - } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) { + } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget.hasSSSE3()) { // Try to match an SSSE3 integer HADD/HSUB. if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1); @@ -6298,7 +6298,7 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1); } - if (!Subtarget->hasAVX()) + if (!Subtarget.hasAVX()) return SDValue(); if ((VT == MVT::v8f32 || VT == MVT::v4f64)) { @@ -6346,7 +6346,7 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, if (CanFold) { // Fold this build_vector into a single horizontal add/sub. // Do this only if the target has AVX2. - if (Subtarget->hasAVX2()) + if (Subtarget.hasAVX2()) return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1); // Do not try to expand this build_vector into a pair of horizontal @@ -6364,7 +6364,7 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, } if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || - VT == MVT::v16i16) && Subtarget->hasAVX()) { + VT == MVT::v16i16) && Subtarget.hasAVX()) { unsigned X86Opcode; if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) X86Opcode = X86ISD::HADD; @@ -6408,7 +6408,7 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) return Op; - return getZeroVector(VT, &Subtarget, DAG, DL); + return getZeroVector(VT, Subtarget, DAG, DL); } // Vectors containing all ones can be matched by pcmpeqd on 128-bit width @@ -6419,7 +6419,7 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, return Op; if (!VT.is512BitVector()) - return getOnesVector(VT, &Subtarget, DAG, DL); + return getOnesVector(VT, Subtarget, DAG, DL); } return SDValue(); @@ -6434,10 +6434,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { unsigned NumElems = Op.getNumOperands(); // Generate vectors for predicate vectors. - if (VT.getVectorElementType() == MVT::i1 && Subtarget->hasAVX512()) + if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) return LowerBUILD_VECTORvXi1(Op, DAG); - if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, *Subtarget)) + if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget)) return VectorConstant; BuildVectorSDNode *BV = cast(Op.getNode()); @@ -6486,7 +6486,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // insertion that way. Only do this if the value is non-constant or if the // value is a constant being inserted into element 0. It is cheaper to do // a constant pool load than it is to do a movd + shuffle. - if (ExtVT == MVT::i64 && !Subtarget->is64Bit() && + if (ExtVT == MVT::i64 && !Subtarget.is64Bit() && (!IsAllConstants || Idx == 0)) { if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { // Handle SSE only. @@ -6511,7 +6511,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || - (ExtVT == MVT::i64 && Subtarget->is64Bit())) { + (ExtVT == MVT::i64 && Subtarget.is64Bit())) { if (VT.is512BitVector()) { SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, @@ -6529,7 +6529,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); if (VT.is256BitVector()) { - if (Subtarget->hasAVX()) { + if (Subtarget.hasAVX()) { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } else { @@ -6697,7 +6697,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return Sh; // For SSE 4.1, use insertps to put the high elements into the low element. - if (Subtarget->hasSSE41()) { + if (Subtarget.hasSSE41()) { SDValue Result; if (Op.getOperand(0).getOpcode() != ISD::UNDEF) Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); @@ -6774,7 +6774,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG & DAG) { SDLoc dl(Op); MVT ResVT = Op.getSimpleValueType(); @@ -6851,7 +6851,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, } static SDValue LowerCONCAT_VECTORS(SDValue Op, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); if (VT.getVectorElementType() == MVT::i1) @@ -7177,7 +7177,7 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1, /// that the shuffle mask is a blend, or convertible into a blend with zero. static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Original, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); @@ -7240,13 +7240,13 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, case MVT::v4i64: case MVT::v8i32: - assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!"); + assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); // FALLTHROUGH case MVT::v2i64: case MVT::v4i32: // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into // that instruction. - if (Subtarget->hasAVX2()) { + if (Subtarget.hasAVX2()) { // Scale the blend by the number of 32-bit dwords per element. int Scale = VT.getScalarSizeInBits() / 32; BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale); @@ -7271,7 +7271,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, } case MVT::v16i16: { - assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!"); + assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { // We can lower these with PBLENDW which is mirrored across 128-bit lanes. @@ -7287,7 +7287,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, // FALLTHROUGH case MVT::v16i8: case MVT::v32i8: { - assert((VT.is128BitVector() || Subtarget->hasAVX2()) && + assert((VT.is128BitVector() || Subtarget.hasAVX2()) && "256-bit byte-blends require AVX2 support!"); // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB. @@ -7425,7 +7425,7 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(SDLoc DL, MVT VT, static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); @@ -7503,7 +7503,7 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, int Scale = 16 / NumLaneElts; // SSSE3 targets can use the palignr instruction. - if (Subtarget->hasSSSE3()) { + if (Subtarget.hasSSSE3()) { // Cast the inputs to i8 vector of correct length to match PALIGNR. MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes); Lo = DAG.getBitcast(AlignVT, Lo); @@ -7767,7 +7767,7 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1, /// the same lane. static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( SDLoc DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV, - ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { + ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Scale > 1 && "Need a scale to extend."); int EltBits = VT.getScalarSizeInBits(); int NumElements = VT.getVectorNumElements(); @@ -7800,7 +7800,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( // Found a valid zext mask! Try various lowering strategies based on the // input type and available ISA extensions. - if (Subtarget->hasSSE41()) { + if (Subtarget.hasSSE41()) { // Not worth offseting 128-bit vectors if scale == 2, a pattern using // PUNPCK will catch this in a later shuffle match. if (Offset && Scale == 2 && VT.is128BitVector()) @@ -7839,7 +7839,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( // The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes // to 64-bits. - if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) { + if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) { assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!"); assert(VT.is128BitVector() && "Unexpected vector width!"); @@ -7865,7 +7865,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( // If this would require more than 2 unpack instructions to expand, use // pshufb when available. We can only use more than 2 unpack instructions // when zero extending i8 elements which also makes it easier to use pshufb. - if (Scale > 4 && EltBits == 8 && Subtarget->hasSSSE3()) { + if (Scale > 4 && EltBits == 8 && Subtarget.hasSSSE3()) { assert(NumElements == 16 && "Unexpected byte vector width!"); SDValue PSHUFBMask[16]; for (int i = 0; i < 16; ++i) { @@ -7925,7 +7925,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( /// are both incredibly common and often quite performance sensitive. static SDValue lowerVectorShuffleAsZeroOrAnyExtend( SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const X86Subtarget *Subtarget, SelectionDAG &DAG) { + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); int Bits = VT.getSizeInBits(); @@ -8084,7 +8084,7 @@ static bool isShuffleFoldableLoad(SDValue V) { /// across all subtarget feature sets. static SDValue lowerVectorShuffleAsElementInsertion( SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const X86Subtarget *Subtarget, SelectionDAG &DAG) { + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); MVT ExtVT = VT; MVT EltVT = VT.getVectorElementType(); @@ -8141,7 +8141,7 @@ static SDValue lowerVectorShuffleAsElementInsertion( // This is essentially a special case blend operation, but if we have // general purpose blend operations, they are always faster. Bail and let // the rest of the lowering handle these as blends. - if (Subtarget->hasSSE41()) + if (Subtarget.hasSSE41()) return SDValue(); // Otherwise, use MOVSD or MOVSS. @@ -8187,9 +8187,9 @@ static SDValue lowerVectorShuffleAsElementInsertion( /// This assumes we have AVX2. static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0, int BroadcastIdx, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { - assert(Subtarget->hasAVX2() && + assert(Subtarget.hasAVX2() && "We can only lower integer broadcasts with AVX2!"); EVT EltVT = VT.getVectorElementType(); @@ -8242,11 +8242,11 @@ static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0, /// FIXME: This is very similar to LowerVectorBroadcast - can we merge them? static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, ArrayRef Mask, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { - if (!Subtarget->hasAVX()) + if (!Subtarget.hasAVX()) return SDValue(); - if (VT.isInteger() && !Subtarget->hasAVX2()) + if (VT.isInteger() && !Subtarget.hasAVX2()) return SDValue(); // Check that the mask is a broadcast. @@ -8317,11 +8317,11 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, // If the scalar isn't a load, we can't broadcast from it in AVX1. // Only AVX2 has register broadcasts. - if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V)) + if (!Subtarget.hasAVX2() && !isShuffleFoldableLoad(V)) return SDValue(); } else if (MayFoldLoad(BC) && !cast(BC)->isVolatile()) { // 32-bit targets need to load i64 as a f64 and then bitcast the result. - if (!Subtarget->is64Bit() && VT.getScalarType() == MVT::i64) + if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements()); // If we are broadcasting a load that is only used by the shuffle @@ -8337,7 +8337,7 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); - } else if (!Subtarget->hasAVX2()) { + } else if (!Subtarget.hasAVX2()) { // We can't broadcast from a vector register without AVX2. return SDValue(); } else if (BroadcastIdx != 0) { @@ -8567,7 +8567,7 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(SDLoc DL, MVT VT, /// it is better to avoid lowering through this for integer vectors where /// possible. static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v2f64 && "Bad shuffle type!"); @@ -8579,7 +8579,7 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, if (isSingleInputShuffleMask(Mask)) { // Use low duplicate instructions for masks that match their pattern. - if (Subtarget->hasSSE3()) + if (Subtarget.hasSSE3()) if (isShuffleEquivalent(V1, V2, Mask, {0, 0})) return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, V1); @@ -8587,7 +8587,7 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // single input as both of the "inputs" to this instruction.. unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1); - if (Subtarget->hasAVX()) { + if (Subtarget.hasAVX()) { // If we have AVX, we can use VPERMILPS which will allow folding a load // into the shuffle. return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1, @@ -8626,7 +8626,7 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, DL, MVT::v2f64, V2, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S)); - if (Subtarget->hasSSE41()) + if (Subtarget.hasSSE41()) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG)) return Blend; @@ -8648,7 +8648,7 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// it falls back to the floating point shuffle operation with appropriate bit /// casting. static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v2i64 && "Bad shuffle type!"); @@ -8719,7 +8719,7 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // We have different paths for blend lowering, but they all must use the // *exact* same predicate. - bool IsBlendSupported = Subtarget->hasSSE41(); + bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG)) @@ -8732,7 +8732,7 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. - if (Subtarget->hasSSSE3()) + if (Subtarget.hasSSSE3()) if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; @@ -8867,7 +8867,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(SDLoc DL, MVT VT, /// domain crossing penalties, as these are sufficient to implement all v4f32 /// shuffles. static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!"); @@ -8887,14 +8887,14 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Broadcast; // Use even/odd duplicate instructions for masks that match their pattern. - if (Subtarget->hasSSE3()) { + if (Subtarget.hasSSE3()) { if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2})) return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1); if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3})) return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1); } - if (Subtarget->hasAVX()) { + if (Subtarget.hasAVX()) { // If we have AVX, we can use VPERMILPS which will allow folding a load // into the shuffle. return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1, @@ -8917,7 +8917,7 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, Mask, Subtarget, DAG)) return V; - if (Subtarget->hasSSE41()) { + if (Subtarget.hasSSE41()) { if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask, Subtarget, DAG)) return Blend; @@ -8946,7 +8946,7 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// We try to handle these with integer-domain shuffles where we can, but for /// blends we use the floating point domain blend instructions. static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v4i32 && "Bad shuffle type!"); @@ -9001,7 +9001,7 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // We have different paths for blend lowering, but they all must use the // *exact* same predicate. - bool IsBlendSupported = Subtarget->hasSSE41(); + bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) @@ -9018,7 +9018,7 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. - if (Subtarget->hasSSSE3()) + if (Subtarget.hasSSSE3()) if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; @@ -9063,7 +9063,7 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// vector, form the analogous 128-bit 8-element Mask. static SDValue lowerV8I16GeneralSingleInputVectorShuffle( SDLoc DL, MVT VT, SDValue V, MutableArrayRef Mask, - const X86Subtarget *Subtarget, SelectionDAG &DAG) { + const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!"); MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); @@ -9580,7 +9580,7 @@ static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1, /// halves of the inputs separately (making them have relatively few inputs) /// and then concatenate them. static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v8i16 && "Bad shuffle type!"); @@ -9641,7 +9641,7 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Shift; // See if we can use SSE4A Extraction / Insertion. - if (Subtarget->hasSSE4A()) + if (Subtarget.hasSSE4A()) if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG)) return V; @@ -9653,7 +9653,7 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // We have different paths for blend lowering, but they all must use the // *exact* same predicate. - bool IsBlendSupported = Subtarget->hasSSE41(); + bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) @@ -9683,7 +9683,7 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // If we can't directly blend but can use PSHUFB, that will be better as it // can both shuffle and set up the inefficient blend. - if (!IsBlendSupported && Subtarget->hasSSSE3()) { + if (!IsBlendSupported && Subtarget.hasSSSE3()) { bool V1InUse, V2InUse; return lowerVectorShuffleAsPSHUFB(DL, MVT::v8i16, V1, V2, Mask, DAG, V1InUse, V2InUse); @@ -9771,7 +9771,7 @@ static int canLowerByDroppingEvenElements(ArrayRef Mask) { /// the existing lowering for v8i16 blends on each half, finally PACK-ing them /// back together. static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!"); @@ -9797,7 +9797,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return ZExt; // See if we can use SSE4A Extraction / Insertion. - if (Subtarget->hasSSE4A()) + if (Subtarget.hasSSE4A()) if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG)) return V; @@ -9924,7 +9924,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // FIXME: The only exceptions to the above are blends which are exact // interleavings with direct instructions supporting them. We currently don't // handle those well here. - if (Subtarget->hasSSSE3()) { + if (Subtarget.hasSSSE3()) { bool V1InUse = false; bool V2InUse = false; @@ -9935,7 +9935,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // do so. This avoids using them to handle blends-with-zero which is // important as a single pshufb is significantly faster for that. if (V1InUse && V2InUse) { - if (Subtarget->hasSSE41()) + if (Subtarget.hasSSE41()) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return Blend; @@ -10064,7 +10064,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// This routine breaks down the specific type of 128-bit shuffle and /// dispatches to the lowering routines accordingly. static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, - MVT VT, const X86Subtarget *Subtarget, + MVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG) { switch (VT.SimpleTy) { case MVT::v2i64: @@ -10382,7 +10382,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT, /// \brief Handle lowering 2-lane 128-bit shuffles. static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { // TODO: If minimizing size and one of the inputs is a zero vector and the // the zero vector has only one use, we could use a VPERM2X128 to save the @@ -10475,7 +10475,7 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, /// those are still *marginally* more expensive. static SDValue lowerVectorShuffleByMerging128BitLanes( SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const X86Subtarget *Subtarget, SelectionDAG &DAG) { + const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(!isSingleInputShuffleMask(Mask) && "This is only useful with multiple inputs."); @@ -10549,7 +10549,7 @@ static SDValue lowerVectorShuffleByMerging128BitLanes( /// or shuffling smaller vector types which can lower more efficiently. static SDValue lowerVectorShuffleWithUndefHalf(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(VT.is256BitVector() && "Expected 256-bit vector"); @@ -10635,7 +10635,7 @@ static SDValue lowerVectorShuffleWithUndefHalf(SDLoc DL, MVT VT, SDValue V1, return SDValue(); // AVX2 - XXXXuuuu - always extract lowers. - if (Subtarget->hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) { + if (Subtarget.hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) { // AVX2 supports efficient immediate 64-bit element cross-lane shuffles. if (VT == MVT::v4f64 || VT == MVT::v4i64) return SDValue(); @@ -10714,7 +10714,7 @@ static SDValue lowerVectorShuffleWithSHUFPD(SDLoc DL, MVT VT, /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2 /// isn't available. static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); @@ -10748,7 +10748,7 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, } // With AVX2 we have direct support for this permutation. - if (Subtarget->hasAVX2()) + if (Subtarget.hasAVX2()) return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); @@ -10775,7 +10775,7 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // shuffle. However, if we have AVX2 and either inputs are already in place, // we will be able to shuffle even across lanes the other input in a single // instruction so skip this pattern. - if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || + if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)))) if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) @@ -10783,7 +10783,7 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // If we have AVX2 then we always want to lower with a blend because an v4 we // can fully permute the elements. - if (Subtarget->hasAVX2()) + if (Subtarget.hasAVX2()) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask, DAG); @@ -10796,7 +10796,7 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v4i64 shuffling.. static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); @@ -10804,7 +10804,7 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); - assert(Subtarget->hasAVX2() && "We can only lower v4i64 with AVX2!"); + assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!"); SmallVector WidenedMask; if (canWidenShuffleElements(Mask, WidenedMask)) @@ -10859,7 +10859,7 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // shuffle. However, if we have AVX2 and either inputs are already in place, // we will be able to shuffle even across lanes the other input in a single // instruction so skip this pattern. - if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || + if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)))) if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) @@ -10875,7 +10875,7 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2 /// isn't available. static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); @@ -10936,7 +10936,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, X86ISD::VPERMILPV, DL, MVT::v8f32, V1, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask)); - if (Subtarget->hasAVX2()) + if (Subtarget.hasAVX2()) return DAG.getNode( X86ISD::VPERMV, DL, MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1); @@ -10954,7 +10954,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // If we have AVX2 then we always want to lower with a blend because at v8 we // can fully permute the elements. - if (Subtarget->hasAVX2()) + if (Subtarget.hasAVX2()) return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2, Mask, DAG); @@ -10967,7 +10967,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v8i32 shuffling.. static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); @@ -10975,7 +10975,7 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - assert(Subtarget->hasAVX2() && "We can only lower v8i32 with AVX2!"); + assert(Subtarget.hasAVX2() && "We can only lower v8i32 with AVX2!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the @@ -11047,7 +11047,7 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v16i16 shuffling.. static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); @@ -11055,7 +11055,7 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); - assert(Subtarget->hasAVX2() && "We can only lower v16i16 with AVX2!"); + assert(Subtarget.hasAVX2() && "We can only lower v16i16 with AVX2!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the @@ -11138,7 +11138,7 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v32i8 shuffling.. static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); @@ -11146,7 +11146,7 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); - assert(Subtarget->hasAVX2() && "We can only lower v32i8 with AVX2!"); + assert(Subtarget.hasAVX2() && "We can only lower v32i8 with AVX2!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the @@ -11215,7 +11215,7 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// shuffle or splits it into two 128-bit shuffles and fuses the results back /// together based on the available instructions. static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, - MVT VT, const X86Subtarget *Subtarget, + MVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); ShuffleVectorSDNode *SVOp = cast(Op); @@ -11244,7 +11244,7 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, // essentially *zero* ability to manipulate a 256-bit vector with integer // types. Since we'll use floating point types there eventually, just // immediately cast everything to a float and operate entirely in that domain. - if (VT.isInteger() && !Subtarget->hasAVX2()) { + if (VT.isInteger() && !Subtarget.hasAVX2()) { int ElementBits = VT.getScalarSizeInBits(); if (ElementBits < 32) // No floating point type available, decompose into 128-bit vectors. @@ -11329,7 +11329,7 @@ static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT, /// \brief Handle lowering of 8-lane 64-bit floating point shuffles. static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); @@ -11351,7 +11351,7 @@ static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// \brief Handle lowering of 16-lane 32-bit floating point shuffles. static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); @@ -11369,7 +11369,7 @@ static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// \brief Handle lowering of 8-lane 64-bit integer shuffles. static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); @@ -11391,7 +11391,7 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// \brief Handle lowering of 16-lane 32-bit integer shuffles. static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); @@ -11409,7 +11409,7 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// \brief Handle lowering of 32-lane 16-bit integer shuffles. static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); @@ -11417,14 +11417,14 @@ static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); - assert(Subtarget->hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); + assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG); } /// \brief Handle lowering of 64-lane 8-bit integer shuffles. static SDValue lowerV64I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); @@ -11432,7 +11432,7 @@ static SDValue lowerV64I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!"); - assert(Subtarget->hasBWI() && "We can only lower v64i8 with AVX-512-BWI!"); + assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!"); // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); @@ -11444,12 +11444,12 @@ static SDValue lowerV64I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// shuffle or splits it into two 256-bit shuffles and fuses the results back /// together based on the available instructions. static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, - MVT VT, const X86Subtarget *Subtarget, + MVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); - assert(Subtarget->hasAVX512() && + assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/ basic ISA!"); // Check for being able to broadcast a single element. @@ -11471,11 +11471,11 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, case MVT::v16i32: return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v32i16: - if (Subtarget->hasBWI()) + if (Subtarget.hasBWI()) return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG); break; case MVT::v64i8: - if (Subtarget->hasBWI()) + if (Subtarget.hasBWI()) return lowerV64I8VectorShuffle(Op, V1, V2, Subtarget, DAG); break; @@ -11492,12 +11492,12 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, // The only way to shuffle bits is to sign-extend the mask vector to SIMD // vector, shuffle and then truncate it back. static SDValue lower1BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, - MVT VT, const X86Subtarget *Subtarget, + MVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); - assert(Subtarget->hasAVX512() && + assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/o basic ISA!"); MVT ExtVT; switch (VT.SimpleTy) { @@ -11548,7 +11548,7 @@ static SDValue lower1BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, /// above in helper routines. The canonicalization attempts to widen shuffles /// to involve fewer lanes of wider elements, consolidate symmetric patterns /// s.t. only one of the two inputs needs to be tested, etc. -static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, +static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); @@ -11729,7 +11729,7 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, /// \brief Try to lower a VSELECT instruction to a vector shuffle. static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue Cond = Op.getOperand(0); SDValue LHS = Op.getOperand(1); @@ -11767,7 +11767,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { return BlendOp; // Variable blends are only legal from SSE4.1 onward. - if (!Subtarget->hasSSE41()) + if (!Subtarget.hasSSE41()) return SDValue(); // Only some types will be legal on some subtargets. If we can emit a legal @@ -11780,7 +11780,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { case MVT::v32i8: // The byte blends for AVX vectors were introduced only in AVX2. - if (Subtarget->hasAVX2()) + if (Subtarget.hasAVX2()) return Op; return SDValue(); @@ -11788,7 +11788,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { case MVT::v8i16: case MVT::v16i16: // AVX-512 BWI and VLX features support VSELECT with i16 elements. - if (Subtarget->hasBWI() && Subtarget->hasVLX()) + if (Subtarget.hasBWI() && Subtarget.hasVLX()) return Op; // FIXME: We should custom lower this by fixing the condition and using i8 @@ -11866,7 +11866,7 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const MVT EltVT = Op.getSimpleValueType(); assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector"); - assert((VecVT.getVectorNumElements() <= 16 || Subtarget->hasBWI()) && + assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) && "Unexpected vector type in ExtractBitFromMaskVector"); // variable index can't be handled in mask registers, @@ -11881,7 +11881,7 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const unsigned IdxVal = cast(Idx)->getZExtValue(); const TargetRegisterClass* rc = getRegClassFor(VecVT); - if (!Subtarget->hasDQI() && (VecVT.getVectorNumElements() <= 8)) + if (!Subtarget.hasDQI() && (VecVT.getVectorNumElements() <= 8)) rc = getRegClassFor(MVT::v16i1); unsigned MaxSift = rc->getSize()*8 - 1; Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec, @@ -11905,7 +11905,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, if (!isa(Idx)) { if (VecVT.is512BitVector() || - (VecVT.is256BitVector() && Subtarget->hasInt256() && + (VecVT.is256BitVector() && Subtarget.hasInt256() && VecVT.getVectorElementType().getSizeInBits() == 32)) { MVT MaskEltVT = @@ -11946,7 +11946,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, assert(VecVT.is128BitVector() && "Unexpected vector length"); - if (Subtarget->hasSSE41()) + if (Subtarget.hasSSE41()) if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG)) return Res; @@ -12060,8 +12060,8 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // TODO: It is worthwhile to cast integer to floating point and back // and incur a domain crossing penalty if that's what we'll end up // doing anyway after extracting to a 128-bit vector. - if ((Subtarget->hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || - (Subtarget->hasAVX2() && EltVT == MVT::i32)) { + if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || + (Subtarget.hasAVX2() && EltVT == MVT::i32)) { SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); N2 = DAG.getIntPtrConstant(1, dl); return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2); @@ -12085,7 +12085,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, } assert(VT.is128BitVector() && "Only 128-bit vector types should be left!"); - if (Subtarget->hasSSE41()) { + if (Subtarget.hasSSE41()) { if (EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) { unsigned Opc; if (VT == MVT::v8i16) { @@ -12185,7 +12185,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { // Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in // a simple subregister reference or explicit instructions to grab // upper bits of a vector. -static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); SDValue In = Op.getOperand(0); @@ -12194,7 +12194,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, MVT ResVT = Op.getSimpleValueType(); MVT InVT = In.getSimpleValueType(); - if (Subtarget->hasFp256()) { + if (Subtarget.hasFp256()) { if (ResVT.is128BitVector() && (InVT.is256BitVector() || InVT.is512BitVector()) && isa(Idx)) { @@ -12211,9 +12211,9 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, // Lower a node with an INSERT_SUBVECTOR opcode. This may result in a // simple superregister reference or explicit instructions to insert // the upper bits of a vector. -static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - if (!Subtarget->hasAVX()) + if (!Subtarget.hasAVX()) return SDValue(); SDLoc dl(Op); @@ -12246,7 +12246,7 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, bool Fast; unsigned Alignment = FirstLd->getAlignment(); unsigned AS = FirstLd->getAddressSpace(); - const X86TargetLowering *TLI = Subtarget->getTargetLowering(); + const X86TargetLowering *TLI = Subtarget.getTargetLowering(); if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), OpVT, AS, Alignment, &Fast) && Fast) { SDValue Ops[] = { SubVec2, SubVec }; @@ -12286,12 +12286,12 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { unsigned WrapperKind = X86ISD::Wrapper; CodeModel::Model M = DAG.getTarget().getCodeModel(); - if (Subtarget->isPICStyleRIPRel() && + if (Subtarget.isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) WrapperKind = X86ISD::WrapperRIP; - else if (Subtarget->isPICStyleGOT()) + else if (Subtarget.isPICStyleGOT()) OpFlag = X86II::MO_GOTOFF; - else if (Subtarget->isPICStyleStubPIC()) + else if (Subtarget.isPICStyleStubPIC()) OpFlag = X86II::MO_PIC_BASE_OFFSET; auto PtrVT = getPointerTy(DAG.getDataLayout()); @@ -12318,12 +12318,12 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { unsigned WrapperKind = X86ISD::Wrapper; CodeModel::Model M = DAG.getTarget().getCodeModel(); - if (Subtarget->isPICStyleRIPRel() && + if (Subtarget.isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) WrapperKind = X86ISD::WrapperRIP; - else if (Subtarget->isPICStyleGOT()) + else if (Subtarget.isPICStyleGOT()) OpFlag = X86II::MO_GOTOFF; - else if (Subtarget->isPICStyleStubPIC()) + else if (Subtarget.isPICStyleStubPIC()) OpFlag = X86II::MO_PIC_BASE_OFFSET; auto PtrVT = getPointerTy(DAG.getDataLayout()); @@ -12350,16 +12350,16 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { unsigned WrapperKind = X86ISD::Wrapper; CodeModel::Model M = DAG.getTarget().getCodeModel(); - if (Subtarget->isPICStyleRIPRel() && + if (Subtarget.isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) { - if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF()) + if (Subtarget.isTargetDarwin() || Subtarget.isTargetELF()) OpFlag = X86II::MO_GOTPCREL; WrapperKind = X86ISD::WrapperRIP; - } else if (Subtarget->isPICStyleGOT()) { + } else if (Subtarget.isPICStyleGOT()) { OpFlag = X86II::MO_GOT; - } else if (Subtarget->isPICStyleStubPIC()) { + } else if (Subtarget.isPICStyleStubPIC()) { OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE; - } else if (Subtarget->isPICStyleStubNoDynamic()) { + } else if (Subtarget.isPICStyleStubNoDynamic()) { OpFlag = X86II::MO_DARWIN_NONLAZY; } @@ -12371,7 +12371,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { // With PIC, the address is actually $g + Offset. if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ && - !Subtarget->is64Bit()) { + !Subtarget.is64Bit()) { Result = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); @@ -12391,7 +12391,7 @@ SDValue X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { // Create the TargetBlockAddressAddress node. unsigned char OpFlags = - Subtarget->ClassifyBlockAddressReference(); + Subtarget.ClassifyBlockAddressReference(); CodeModel::Model M = DAG.getTarget().getCodeModel(); const BlockAddress *BA = cast(Op)->getBlockAddress(); int64_t Offset = cast(Op)->getOffset(); @@ -12399,7 +12399,7 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags); - if (Subtarget->isPICStyleRIPRel() && + if (Subtarget.isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result); else @@ -12420,7 +12420,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, // Create the TargetGlobalAddress node, folding in the constant // offset if it is legal. unsigned char OpFlags = - Subtarget->ClassifyGlobalReference(GV, DAG.getTarget()); + Subtarget.ClassifyGlobalReference(GV, DAG.getTarget()); CodeModel::Model M = DAG.getTarget().getCodeModel(); auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; @@ -12433,7 +12433,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags); } - if (Subtarget->isPICStyleRIPRel() && + if (Subtarget.isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result); else @@ -12627,35 +12627,35 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = GA->getGlobal(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - if (Subtarget->isTargetELF()) { + if (Subtarget.isTargetELF()) { TLSModel::Model model = DAG.getTarget().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: - if (Subtarget->is64Bit()) + if (Subtarget.is64Bit()) return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT); case TLSModel::LocalDynamic: return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, - Subtarget->is64Bit()); + Subtarget.is64Bit()); case TLSModel::InitialExec: case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget->is64Bit(), + return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(), DAG.getTarget().getRelocationModel() == Reloc::PIC_); } llvm_unreachable("Unknown TLS model."); } - if (Subtarget->isTargetDarwin()) { + if (Subtarget.isTargetDarwin()) { // Darwin only has one model of TLS. Lower to that. unsigned char OpFlag = 0; - unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ? + unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ? X86ISD::WrapperRIP : X86ISD::Wrapper; // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. bool PIC32 = (DAG.getTarget().getRelocationModel() == Reloc::PIC_) && - !Subtarget->is64Bit(); + !Subtarget.is64Bit(); if (PIC32) OpFlag = X86II::MO_TLVP_PIC_BASE; else @@ -12689,12 +12689,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // And our return value (tls address) is in the standard call return value // location. - unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; + unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1)); } - if (Subtarget->isTargetKnownWindowsMSVC() || - Subtarget->isTargetWindowsGNU()) { + if (Subtarget.isTargetKnownWindowsMSVC() || + Subtarget.isTargetWindowsGNU()) { // Just use the implicit TLS architecture // Need to generate someting similar to: // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage @@ -12712,15 +12712,15 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly // use its literal value of 0x2C. - Value *Ptr = Constant::getNullValue(Subtarget->is64Bit() + Value *Ptr = Constant::getNullValue(Subtarget.is64Bit() ? Type::getInt8PtrTy(*DAG.getContext(), 256) : Type::getInt32PtrTy(*DAG.getContext(), 257)); - SDValue TlsArray = Subtarget->is64Bit() + SDValue TlsArray = Subtarget.is64Bit() ? DAG.getIntPtrConstant(0x58, dl) - : (Subtarget->isTargetWindowsGNU() + : (Subtarget.isTargetWindowsGNU() ? DAG.getIntPtrConstant(0x2C, dl) : DAG.getExternalSymbol("_tls_array", PtrVT)); @@ -12734,7 +12734,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { } else { // Load the _tls_index variable SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT); - if (Subtarget->is64Bit()) + if (Subtarget.is64Bit()) IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX, MachinePointerInfo(), MVT::i32, false, false, false, 0); @@ -12850,13 +12850,13 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) return Op; if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) && - Subtarget->is64Bit()) { + Subtarget.is64Bit()) { return Op; } SDValue ValueToStore = Op.getOperand(0); if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) && - !Subtarget->is64Bit()) + !Subtarget.is64Bit()) // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come // with two 32-bit stores. @@ -12989,7 +12989,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); SDValue Result; - if (Subtarget->hasSSE3()) { + if (Subtarget.hasSSE3()) { // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'. Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); } else { @@ -13184,10 +13184,10 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, } case MVT::v4i32: case MVT::v8i32: - return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget); + return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget); case MVT::v16i8: case MVT::v16i16: - assert(Subtarget->hasAVX512()); + assert(Subtarget.hasAVX512()); return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0)); } @@ -13211,8 +13211,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, MVT SrcVT = N0.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); - if (Subtarget->hasAVX512() && isScalarFPTypeInSSEReg(DstVT) && - (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget->is64Bit()))) { + if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) && + (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) { // Conversions from unsigned i32 to f32/f64 are legal, // using VCVTUSI2SS/SD. Same for i64 in 64-bit mode. return Op; @@ -13222,7 +13222,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, return LowerUINT_TO_FP_i64(Op, DAG); if (SrcVT == MVT::i32 && X86ScalarSSEf64) return LowerUINT_TO_FP_i32(Op, DAG); - if (Subtarget->is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32) + if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32) return SDValue(); // Make a 64-bit buffer, and use it to build an FILD. @@ -13242,7 +13242,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); SDValue ValueToStore = Op.getOperand(0); - if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit()) + if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come // with two 32-bit stores. @@ -13325,10 +13325,10 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, // used for the 32-bit subtarget, but also for f80 on a 64-bit target. bool UnsignedFixup = !IsSigned && DstTy == MVT::i64 && - (!Subtarget->is64Bit() || + (!Subtarget.is64Bit() || !isScalarFPTypeInSSEReg(TheVT)); - if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) { + if (!IsSigned && DstTy != MVT::i64 && !Subtarget.hasAVX512()) { // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST. // The low 32 bits of the fist result will have the correct uint32 result. assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); @@ -13343,7 +13343,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, if (DstTy == MVT::i32 && isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); - if (Subtarget->is64Bit() && + if (Subtarget.is64Bit() && DstTy == MVT::i64 && isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); @@ -13459,7 +13459,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, false, false, false, 0); High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust); - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { // Join High32 and Low32 into a 64-bit result. // (High32 << 32) | Low32 Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32); @@ -13486,7 +13486,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, } static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); @@ -13513,7 +13513,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, ((VT != MVT::v4i64) || (InVT != MVT::v4i32))) return SDValue(); - if (Subtarget->hasInt256()) + if (Subtarget.hasInt256()) return DAG.getNode(X86ISD::VZEXT, dl, VT, In); SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl); @@ -13532,13 +13532,13 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, } static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, - const X86Subtarget *Subtarget, SelectionDAG &DAG) { + const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); SDLoc DL(Op); unsigned int NumElts = VT.getVectorNumElements(); - if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI()) + if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI()) return SDValue(); if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) @@ -13557,16 +13557,16 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, return DAG.getNode(X86ISD::VTRUNC, DL, VT, V); } -static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - if (Subtarget->hasFp256()) + if (Subtarget.hasFp256()) if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget)) return Res; return SDValue(); } -static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); @@ -13576,7 +13576,7 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1) return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG); - if (Subtarget->hasFp256()) + if (Subtarget.hasFp256()) if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget)) return Res; @@ -13586,7 +13586,7 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, } static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); @@ -13598,10 +13598,10 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, // Shift LSB to MSB and use VPMOVB2M - SKX. unsigned ShiftInx = InVT.getScalarSizeInBits() - 1; if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 && - Subtarget->hasBWI()) || // legal, will go to VPMOVB2M, VPMOVW2M + Subtarget.hasBWI()) || // legal, will go to VPMOVB2M, VPMOVW2M ((InVT.is256BitVector() || InVT.is128BitVector()) && - InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI() && - Subtarget->hasVLX())) { // legal, will go to VPMOVB2M, VPMOVW2M + InVT.getScalarSizeInBits() <= 16 && Subtarget.hasBWI() && + Subtarget.hasVLX())) { // legal, will go to VPMOVB2M, VPMOVW2M // Shift packed bytes not supported natively, bitcast to dword MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16); SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT, @@ -13611,10 +13611,10 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode); } if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 && - Subtarget->hasDQI()) || // legal, will go to VPMOVD2M, VPMOVQ2M + Subtarget.hasDQI()) || // legal, will go to VPMOVD2M, VPMOVQ2M ((InVT.is256BitVector() || InVT.is128BitVector()) && - InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI() && - Subtarget->hasVLX())) { // legal, will go to VPMOVD2M, VPMOVQ2M + InVT.getScalarSizeInBits() >= 32 && Subtarget.hasDQI() && + Subtarget.hasVLX())) { // legal, will go to VPMOVD2M, VPMOVQ2M SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In, DAG.getConstant(ShiftInx, DL, InVT)); @@ -13625,7 +13625,7 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, unsigned NumElts = InVT.getVectorNumElements(); if (InVT.getSizeInBits() < 512 && (InVT.getScalarType() == MVT::i8 || InVT.getScalarType() == MVT::i16 || - !Subtarget->hasVLX())) { + !Subtarget.hasVLX())) { assert((NumElts == 8 || NumElts == 16) && "Unexpected vector type."); // TESTD/Q should be used (if BW supported we use CVT2MASK above), @@ -13662,16 +13662,16 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { return LowerTruncateVecI1(Op, DAG, Subtarget); // vpmovqb/w/d, vpmovdb/w, vpmovwb - if (Subtarget->hasAVX512()) { + if (Subtarget.hasAVX512()) { // word to byte only under BWI - if (InVT == MVT::v16i16 && !Subtarget->hasBWI()) // v16i16 -> v16i8 + if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) // v16i16 -> v16i8 return DAG.getNode(X86ISD::VTRUNC, DL, VT, DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In)); return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); } if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. - if (Subtarget->hasInt256()) { + if (Subtarget.hasInt256()) { static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; In = DAG.getBitcast(MVT::v8i32, In); In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32), @@ -13692,7 +13692,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { // On AVX2, v8i32 -> v8i16 becomed PSHUFB. - if (Subtarget->hasInt256()) { + if (Subtarget.hasInt256()) { In = DAG.getBitcast(MVT::v32i8, In); SmallVector pshufbMask; @@ -13750,7 +13750,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { if (!VT.is128BitVector() || !InVT.is256BitVector()) return SDValue(); - assert(Subtarget->hasFp256() && "256-bit vector without AVX!"); + assert(Subtarget.hasFp256() && "256-bit vector without AVX!"); unsigned NumElems = VT.getVectorNumElements(); MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2); @@ -13998,11 +13998,11 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { } // Check whether an OR'd tree is PTEST-able. -static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree."); - if (!Subtarget->hasSSE41()) + if (!Subtarget.hasSSE41()) return SDValue(); if (!Op->hasOneUse()) @@ -14210,14 +14210,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, if (ConstantSDNode *C = dyn_cast(ArithOp.getNode()->getOperand(1))) { // An add of one will be selected as an INC. - if (C->isOne() && !Subtarget->slowIncDec()) { + if (C->isOne() && !Subtarget.slowIncDec()) { Opcode = X86ISD::INC; NumOperands = 1; break; } // An add of negative one (subtract of one) will be selected as a DEC. - if (C->isAllOnesValue() && !Subtarget->slowIncDec()) { + if (C->isAllOnesValue() && !Subtarget.slowIncDec()) { Opcode = X86ISD::DEC; NumOperands = 1; break; @@ -14360,7 +14360,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, // of memory operations. if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 && !DAG.getMachineFunction().getFunction()->optForMinSize() && - !Subtarget->isAtom()) { + !Subtarget.isAtom()) { unsigned ExtendOp = isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0); @@ -14380,7 +14380,7 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const { // If the subtarget does not support the FUCOMI instruction, floating-point // comparisons have to be converted. - if (Subtarget->hasCMov() || + if (Subtarget.hasCMov() || Cmp.getOpcode() != X86ISD::CMP || !Cmp.getOperand(0).getValueType().isFloatingPoint() || !Cmp.getOperand(1).getValueType().isFloatingPoint()) @@ -14398,7 +14398,7 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl); // Some 64-bit targets lack SAHF support, but they do support FCOMI. - assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?"); + assert(Subtarget.hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?"); return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl); } @@ -14418,10 +14418,10 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, // instructions: convert to single, rsqrtss, convert back to double, refine // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if (VT == MVT::f32 && Subtarget->hasSSE1()) + if (VT == MVT::f32 && Subtarget.hasSSE1()) RecipOp = "sqrtf"; - else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) || - (VT == MVT::v8f32 && Subtarget->hasAVX())) + else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) || + (VT == MVT::v8f32 && Subtarget.hasAVX())) RecipOp = "vec-sqrtf"; else return SDValue(); @@ -14450,10 +14450,10 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, // 15 instructions: convert to single, rcpss, convert back to double, refine // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if (VT == MVT::f32 && Subtarget->hasSSE1()) + if (VT == MVT::f32 && Subtarget.hasSSE1()) RecipOp = "divf"; - else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) || - (VT == MVT::v8f32 && Subtarget->hasAVX())) + else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) || + (VT == MVT::v8f32 && Subtarget.hasAVX())) RecipOp = "vec-divf"; else return SDValue(); @@ -14665,7 +14665,7 @@ static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); @@ -14734,7 +14734,7 @@ static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG) return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1); } -static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -14752,7 +14752,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1); unsigned Opc = X86ISD::CMPP; - if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) { + if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) { assert(VT.getVectorNumElements() <= 16); Opc = X86ISD::CMPM; } @@ -14804,11 +14804,11 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // The non-AVX512 code below works under the assumption that source and // destination types are the same. - assert((Subtarget->hasAVX512() || (VT == VTOp0)) && + assert((Subtarget.hasAVX512() || (VT == VTOp0)) && "Value types for source and destination must be the same!"); // Break 256-bit integer vector compare into smaller ones. - if (VT.is256BitVector() && !Subtarget->hasInt256()) + if (VT.is256BitVector() && !Subtarget.hasInt256()) return Lower256IntVSETCC(Op, DAG); MVT OpVT = Op1.getSimpleValueType(); @@ -14816,9 +14816,9 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, return LowerBoolVSETCC_AVX512(Op, DAG); bool MaskResult = (VT.getVectorElementType() == MVT::i1); - if (Subtarget->hasAVX512()) { + if (Subtarget.hasAVX512()) { if (Op1.getSimpleValueType().is512BitVector() || - (Subtarget->hasBWI() && Subtarget->hasVLX()) || + (Subtarget.hasBWI() && Subtarget.hasVLX()) || (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32)) return LowerIntVSETCC_AVX512(Op, DAG, Subtarget); @@ -14835,7 +14835,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // Lower using XOP integer comparisons. if ((VT == MVT::v16i8 || VT == MVT::v8i16 || - VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget->hasXOP()) { + VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget.hasXOP()) { // Translate compare code to XOP PCOM compare mode. unsigned CmpMode = 0; switch (SetCCOpcode) { @@ -14887,8 +14887,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // Special case: Use min/max operations for SETULE/SETUGE MVT VET = VT.getVectorElementType(); bool hasMinMax = - (Subtarget->hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) - || (Subtarget->hasSSE2() && (VET == MVT::i8)); + (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) + || (Subtarget.hasSSE2() && (VET == MVT::i8)); if (hasMinMax) { switch (SetCCOpcode) { @@ -14900,7 +14900,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, if (MinMax) { Swap = false; Invert = false; FlipSigns = false; } } - bool hasSubus = Subtarget->hasSSE2() && (VET == MVT::i8 || VET == MVT::i16); + bool hasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16); if (!MinMax && hasSubus) { // As another special case, use PSUBUS[BW] when it's profitable. E.g. for // Op0 u<= Op1: @@ -14914,7 +14914,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // beneficial because the constant in the register is no longer // destructed as the destination so it can be hoisted out of a loop. // Only do this pre-AVX since vpcmp* is no longer destructive. - if (Subtarget->hasAVX()) + if (Subtarget.hasAVX()) break; SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG); if (ULEOp1.getNode()) { @@ -14940,8 +14940,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). if (VT == MVT::v2i64) { - if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) { - assert(Subtarget->hasSSE2() && "Don't know how to lower!"); + if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) { + assert(Subtarget.hasSSE2() && "Don't know how to lower!"); // First cast everything to the right type. Op0 = DAG.getBitcast(MVT::v4i32, Op0); @@ -14982,10 +14982,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, return DAG.getBitcast(VT, Result); } - if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) { + if (Opc == X86ISD::PCMPEQ && !Subtarget.hasSSE41()) { // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with // pcmpeqd + pshufd + pand. - assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); + assert(Subtarget.hasSSE2() && !FlipSigns && "Don't know how to lower!"); // First cast everything to the right type. Op0 = DAG.getBitcast(MVT::v4i32, Op0); @@ -15038,7 +15038,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); - assert(((!Subtarget->hasAVX512() && VT == MVT::i8) || (VT == MVT::i1)) + assert(((!Subtarget.hasAVX512() && VT == MVT::i8) || (VT == MVT::i1)) && "SetCC type must be 8-bit or 1-bit integer"); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -15168,15 +15168,15 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // are available or VBLENDV if AVX is available. // Otherwise FP cmovs get lowered into a less efficient branch sequence later. if (Cond.getOpcode() == ISD::SETCC && - ((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) || - (Subtarget->hasSSE1() && VT == MVT::f32)) && + ((Subtarget.hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) || + (Subtarget.hasSSE1() && VT == MVT::f32)) && VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) { SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1); int SSECC = translateX86FSETCC( cast(Cond.getOperand(2))->get(), CondOp0, CondOp1); if (SSECC != 8) { - if (Subtarget->hasAVX512()) { + if (Subtarget.hasAVX512()) { SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2); @@ -15198,7 +15198,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // instructions as the AND/ANDN/OR sequence due to register moves, so // don't bother. - if (Subtarget->hasAVX() && + if (Subtarget.hasAVX() && !isa(Op1) && !isa(Op2)) { // Convert to vectors, do a VSELECT, and convert back to scalar. @@ -15438,7 +15438,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); @@ -15449,22 +15449,22 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, // SKX processor if ((InVTElt == MVT::i1) && - (((Subtarget->hasBWI() && Subtarget->hasVLX() && + (((Subtarget.hasBWI() && Subtarget.hasVLX() && VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) || - ((Subtarget->hasBWI() && VT.is512BitVector() && + ((Subtarget.hasBWI() && VT.is512BitVector() && VTElt.getSizeInBits() <= 16)) || - ((Subtarget->hasDQI() && Subtarget->hasVLX() && + ((Subtarget.hasDQI() && Subtarget.hasVLX() && VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) || - ((Subtarget->hasDQI() && VT.is512BitVector() && + ((Subtarget.hasDQI() && VT.is512BitVector() && VTElt.getSizeInBits() >= 32)))) return DAG.getNode(X86ISD::VSEXT, dl, VT, In); unsigned int NumElts = VT.getVectorNumElements(); - if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI()) + if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI()) return SDValue(); if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) { @@ -15488,7 +15488,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, } static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue In = Op->getOperand(0); MVT VT = Op->getSimpleValueType(0); @@ -15506,7 +15506,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SDLoc dl(Op); // SSE41 targets can use the pmovsx* instructions directly. - if (Subtarget->hasSSE41()) + if (Subtarget.hasSSE41()) return DAG.getNode(X86ISD::VSEXT, dl, VT, In); // pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI. @@ -15543,7 +15543,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, return SDValue(); } -static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); @@ -15558,7 +15558,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, (VT != MVT::v16i16 || InVT != MVT::v16i8)) return SDValue(); - if (Subtarget->hasInt256()) + if (Subtarget.hasInt256()) return DAG.getNode(X86ISD::VSEXT, dl, VT, In); // Optimize vectors in AVX mode @@ -15601,7 +15601,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, // FIXME: Is the expansion actually better than scalar code? It doesn't seem so. // TODO: It is possible to support ZExt by zeroing the undef values during // the shuffle phase or after the shuffle. -static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT RegVT = Op.getSimpleValueType(); assert(RegVT.isVector() && "We only custom lower vector sext loads."); @@ -15609,7 +15609,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, "We only custom lower integer vector sext loads."); // Nothing useful we can do without SSE2 shuffles. - assert(Subtarget->hasSSE2() && "We only custom lower sext loads with SSE2."); + assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2."); LoadSDNode *Ld = cast(Op.getNode()); SDLoc dl(Ld); @@ -15628,7 +15628,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, unsigned MemSz = MemVT.getSizeInBits(); assert(RegSz > MemSz && "Register size must be greater than the mem size"); - if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256()) { + if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget.hasInt256()) { // The only way in which we have a legal 256-bit vector result but not the // integer 256-bit operations needed to directly lower a sextload is if we // have AVX1 but not AVX2. In that case, we can always emit a sextload to @@ -15751,7 +15751,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, if (Ext == ISD::SEXTLOAD) { // If we have SSE4.1, we can directly emit a VSEXT node. - if (Subtarget->hasSSE41()) { + if (Subtarget.hasSSE41()) { SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF); return Sext; @@ -16087,7 +16087,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool SplitStack = MF.shouldSplitStack(); - bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMachO()) || + bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) || SplitStack; SDLoc dl(Op); @@ -16102,7 +16102,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // pointer when other instructions are using the stack. Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); - bool Is64Bit = Subtarget->is64Bit(); + bool Is64Bit = Subtarget.is64Bit(); MVT SPTy = getPointerTy(DAG.getDataLayout()); SDValue Result; @@ -16117,7 +16117,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast(Tmp3)->getZExtValue(); - const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); + const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); unsigned StackAlign = TFI.getStackAlignment(); Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) @@ -16146,7 +16146,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, DAG.getRegister(Vreg, SPTy)); } else { SDValue Flag; - const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX); + const unsigned Reg = (Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX); Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); Flag = Chain.getValue(1); @@ -16154,7 +16154,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned SPReg = RegInfo->getStackRegister(); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy); Chain = SP.getValue(1); @@ -16183,8 +16183,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { const Value *SV = cast(Op.getOperand(2))->getValue(); SDLoc DL(Op); - if (!Subtarget->is64Bit() || - Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) { + if (!Subtarget.is64Bit() || + Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv())) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); @@ -16224,21 +16224,21 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant( - Subtarget->isTarget64BitLP64() ? 8 : 4, DL)); + Subtarget.isTarget64BitLP64() ? 8 : 4, DL)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT); Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo( - SV, Subtarget->isTarget64BitLP64() ? 16 : 12), false, false, 0); + SV, Subtarget.isTarget64BitLP64() ? 16 : 12), false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->is64Bit() && + assert(Subtarget.is64Bit() && "LowerVAARG only handles 64-bit va_arg!"); assert(Op.getNode()->getNumOperands() == 4); MachineFunction &MF = DAG.getMachineFunction(); - if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) + if (Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv())) // The Win64 ABI uses char* instead of a structure. return DAG.expandVAArg(Op.getNode()); @@ -16268,9 +16268,9 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { if (ArgMode == 2) { // Sanity Check: Make sure using fp_offset makes sense. - assert(!Subtarget->useSoftFloat() && + assert(!Subtarget.useSoftFloat() && !(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) && - Subtarget->hasSSE1()); + Subtarget.hasSSE1()); } // Insert VAARG_64 node into the DAG @@ -16296,12 +16296,12 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { false, false, false, 0); } -static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows, // where a va_list is still an i8*. - assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); - if (Subtarget->isCallingConvWin64( + assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!"); + if (Subtarget.isCallingConvWin64( DAG.getMachineFunction().getFunction()->getCallingConv())) // Probably a Win64 va_copy. return DAG.expandVACopy(Op.getNode()); @@ -16424,7 +16424,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, // Let the shuffle legalizer expand this shift amount node. SDValue Op0 = ShAmt.getOperand(0); Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0); - ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, &Subtarget, DAG); + ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, Subtarget, DAG); } else { // Need to build a vector containing shift amount. // SSE/AVX packed shifts only use the lower 64-bit of the shift count. @@ -16452,7 +16452,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, /// \brief Return Mask with the necessary casting or extending /// for \p Mask according to \p MaskVT when lowering masking intrinsics static SDValue getMaskNode(SDValue Mask, MVT MaskVT, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG, SDLoc dl) { if (MaskVT.bitsGT(Mask.getSimpleValueType())) { @@ -16461,9 +16461,9 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT, MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask); } - if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) { + if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) { if (MaskVT == MVT::v64i1) { - assert(Subtarget->hasBWI() && "Expected AVX512BW target!"); + assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); // In case 32bit mode, bitcast i64 is illegal, extend/split it. SDValue Lo, Hi; Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, @@ -16499,7 +16499,7 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT, /// necessary casting or extending for \p Mask when lowering masking intrinsics static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); @@ -16544,7 +16544,7 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, /// for a scalar instruction. static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { if (isAllOnesConstant(Mask)) return Op; @@ -16626,7 +16626,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset); } -static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); @@ -17352,7 +17352,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget // Returns one of the stack, base, or frame pointer registers, depending on // which is used to reference local variables. MachineFunction &MF = DAG.getMachineFunction(); - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned Reg; if (RegInfo->hasBasePointer(MF)) Reg = RegInfo->getBaseRegister(); @@ -17366,7 +17366,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, - const X86Subtarget * Subtarget) { + const X86Subtarget &Subtarget) { SDLoc dl(Op); auto *C = cast(ScaleOp); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); @@ -17452,7 +17452,7 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, // getReadPerformanceCounter - Handles the lowering of builtin intrinsics that // read performance monitor counters (x86_rdpmc). static void getReadPerformanceCounter(SDNode *N, SDLoc DL, - SelectionDAG &DAG, const X86Subtarget *Subtarget, + SelectionDAG &DAG, const X86Subtarget &Subtarget, SmallVectorImpl &Results) { assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -17466,7 +17466,7 @@ static void getReadPerformanceCounter(SDNode *N, SDLoc DL, // Reads the content of a 64-bit performance counter and returns it in the // registers EDX:EAX. - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, LO.getValue(2)); @@ -17477,7 +17477,7 @@ static void getReadPerformanceCounter(SDNode *N, SDLoc DL, } Chain = HI.getValue(1); - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { // The EAX register is loaded with the low-order 32 bits. The EDX register // is loaded with the supported high-order bits of the counter. SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, @@ -17498,7 +17498,7 @@ static void getReadPerformanceCounter(SDNode *N, SDLoc DL, // read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is // also used to custom lower READCYCLECOUNTER nodes. static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, - SelectionDAG &DAG, const X86Subtarget *Subtarget, + SelectionDAG &DAG, const X86Subtarget &Subtarget, SmallVectorImpl &Results) { SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0)); @@ -17507,7 +17507,7 @@ static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, // The processor's time-stamp counter (a 64-bit MSR) is stored into the // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR // and the EAX register is loaded with the low-order 32 bits. - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1)); HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, LO.getValue(2)); @@ -17531,7 +17531,7 @@ static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, MachinePointerInfo(), false, false, 0); } - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { // The EDX register is loaded with the high-order 32 bits of the MSR, and // the EAX register is loaded with the low-order 32 bits. SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, @@ -17548,7 +17548,7 @@ static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, Results.push_back(Chain); } -static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SmallVector Results; SDLoc DL(Op); @@ -17575,7 +17575,7 @@ static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) { return Chain; } -static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); @@ -17832,7 +17832,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, PtrVT, @@ -17850,7 +17850,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); EVT VT = Op.getValueType(); MFI->setFrameAddressIsTaken(true); @@ -17889,7 +17889,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // this table could be generated automatically from RegInfo. unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { - const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); + const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); const MachineFunction &MF = DAG.getMachineFunction(); unsigned Reg = StringSwitch(RegName) @@ -17905,7 +17905,7 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, " is allocatable: function has no frame pointer"); #ifndef NDEBUG else { - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); assert((FrameReg == X86::EBP || FrameReg == X86::RBP) && @@ -17922,23 +17922,23 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op)); } unsigned X86TargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR) - return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX; + return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX; - return Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX; + return Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX; } unsigned X86TargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { // Funclet personalities don't use selectors (the runtime does the selection). assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))); - return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX; + return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX; } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { @@ -17948,7 +17948,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDLoc dl (Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || (FrameReg == X86::EBP && PtrVT == MVT::i32)) && @@ -17996,9 +17996,9 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDLoc dl (Op); const Value *TrmpAddr = cast(Op.getOperand(4))->getValue(); - const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { SDValue OutChains[6]; // Large code-model. @@ -18161,7 +18161,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, */ MachineFunction &MF = DAG.getMachineFunction(); - const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); + const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); @@ -18266,14 +18266,14 @@ static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta); } -static SDValue LowerCTLZ(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); SDLoc dl(Op); - if (VT.isVector() && Subtarget->hasAVX512()) + if (VT.isVector() && Subtarget.hasAVX512()) return LowerVectorCTLZ_AVX512(Op, DAG); Op = Op.getOperand(0); @@ -18305,7 +18305,7 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget *Subtarget, return Op; } -static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); EVT OpVT = VT; @@ -18434,7 +18434,7 @@ static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { return Lower256IntArith(Op, DAG); } -static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); @@ -18443,7 +18443,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1)); // Decompose 256-bit ops into smaller 128-bit ops. - if (VT.is256BitVector() && !Subtarget->hasInt256()) + if (VT.is256BitVector() && !Subtarget.hasInt256()) return Lower256IntArith(Op, DAG); SDValue A = Op.getOperand(0); @@ -18452,7 +18452,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, // Lower v16i8/v32i8 mul as promotion to v8i16/v16i16 vector // pairs, multiply and truncate. if (VT == MVT::v16i8 || VT == MVT::v32i8) { - if (Subtarget->hasInt256()) { + if (Subtarget.hasInt256()) { if (VT == MVT::v32i8) { MVT SubVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() / 2); SDValue Lo = DAG.getIntPtrConstant(0, dl); @@ -18480,7 +18480,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, // Extract the lo parts and sign extend to i16 SDValue ALo, BLo; - if (Subtarget->hasSSE41()) { + if (Subtarget.hasSSE41()) { ALo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, A); BLo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, B); } else { @@ -18496,7 +18496,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, // Extract the hi parts and sign extend to i16 SDValue AHi, BHi; - if (Subtarget->hasSSE41()) { + if (Subtarget.hasSSE41()) { const int ShufMask[] = {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}; AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); @@ -18524,7 +18524,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle. if (VT == MVT::v4i32) { - assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() && + assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() && "Should not custom lower when pmuldq is available!"); // Extract the odd parts. @@ -18589,7 +18589,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, } SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->isTargetWin64() && "Unexpected target"); + assert(Subtarget.isTargetWin64() && "Unexpected target"); EVT VT = Op.getValueType(); assert(VT.isInteger() && VT.getSizeInBits() == 128 && "Unexpected return type for lowering"); @@ -18640,14 +18640,14 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons return DAG.getBitcast(VT, CallInfo.first); } -static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); MVT VT = Op0.getSimpleValueType(); SDLoc dl(Op); - assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) || - (VT == MVT::v8i32 && Subtarget->hasInt256())); + assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) || + (VT == MVT::v8i32 && Subtarget.hasInt256())); // PMULxD operations multiply each even value (starting at 0) of LHS with // the related value of RHS and produce a widen result. @@ -18672,7 +18672,7 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64; bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI; unsigned Opcode = - (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ; + (!IsSigned || !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ; // PMULUDQ <4 x i32> , <4 x i32> // => <2 x i64> SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1)); @@ -18696,7 +18696,7 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, // If we have a signed multiply but no PMULDQ fix up the high parts of a // unsigned multiply. - if (IsSigned && !Subtarget->hasSSE41()) { + if (IsSigned && !Subtarget.hasSSE41()) { SDValue ShAmt = DAG.getConstant( 31, dl, DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout())); @@ -18717,19 +18717,19 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, // Return true if the required (according to Opcode) shift-imm form is natively // supported by the Subtarget -static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, +static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget, unsigned Opcode) { if (VT.getScalarSizeInBits() < 16) return false; if (VT.is512BitVector() && - (VT.getScalarSizeInBits() > 16 || Subtarget->hasBWI())) + (VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI())) return true; bool LShift = VT.is128BitVector() || - (VT.is256BitVector() && Subtarget->hasInt256()); + (VT.is256BitVector() && Subtarget.hasInt256()); - bool AShift = LShift && (Subtarget->hasVLX() || + bool AShift = LShift && (Subtarget.hasVLX() || (VT != MVT::v2i64 && VT != MVT::v4i64)); return (Opcode == ISD::SRA) ? AShift : LShift; } @@ -18737,24 +18737,24 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, // The shift amount is a variable, but it is the same for all vector lanes. // These instructions are defined together with shift-immediate. static -bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget, +bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget, unsigned Opcode) { return SupportedVectorShiftWithImm(VT, Subtarget, Opcode); } // Return true if the required (according to Opcode) variable-shift form is // natively supported by the Subtarget -static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget, +static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget, unsigned Opcode) { - if (!Subtarget->hasInt256() || VT.getScalarSizeInBits() < 16) + if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16) return false; // vXi16 supported only on AVX-512, BWI - if (VT.getScalarSizeInBits() == 16 && !Subtarget->hasBWI()) + if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI()) return false; - if (VT.is512BitVector() || Subtarget->hasVLX()) + if (VT.is512BitVector() || Subtarget.hasVLX()) return true; bool LShift = VT.is128BitVector() || VT.is256BitVector(); @@ -18763,7 +18763,7 @@ static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget, } static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); @@ -18813,12 +18813,12 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); // i64 SRA needs to be performed as partial shifts. - if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && - Op.getOpcode() == ISD::SRA && !Subtarget->hasXOP()) + if ((VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64)) && + Op.getOpcode() == ISD::SRA && !Subtarget.hasXOP()) return ArithmeticShiftRight64(ShiftAmt); if (VT == MVT::v16i8 || - (Subtarget->hasInt256() && VT == MVT::v32i8) || + (Subtarget.hasInt256() && VT == MVT::v32i8) || VT == MVT::v64i8) { unsigned NumElts = VT.getVectorNumElements(); MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); @@ -18834,7 +18834,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, } // XOP can shift v16i8 directly instead of as shift v8i16 + mask. - if (VT == MVT::v16i8 && Subtarget->hasXOP()) + if (VT == MVT::v16i8 && Subtarget.hasXOP()) return SDValue(); if (Op.getOpcode() == ISD::SHL) { @@ -18870,8 +18870,8 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, } // Special case in 32-bit mode, where i64 is expanded into high and low parts. - if (!Subtarget->is64Bit() && !Subtarget->hasXOP() && - (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64))) { + if (!Subtarget.is64Bit() && !Subtarget.hasXOP() && + (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64))) { // Peek through any splat that was introduced for i64 shift vectorization. int SplatIndex = -1; @@ -18928,7 +18928,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, } static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, - const X86Subtarget* Subtarget) { + const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); @@ -18989,7 +18989,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, } // Special case in 32-bit mode, where i64 is expanded into high and low parts. - if (!Subtarget->is64Bit() && VT == MVT::v2i64 && + if (!Subtarget.is64Bit() && VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST && Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { Amt = Amt.getOperand(0); @@ -19010,7 +19010,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, return SDValue(); } -static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, +static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); @@ -19018,7 +19018,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, SDValue Amt = Op.getOperand(1); assert(VT.isVector() && "Custom lowering only for vector shifts!"); - assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!"); + assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!"); if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget)) return V; @@ -19031,7 +19031,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, // XOP has 128-bit variable logical/arithmetic shifts. // +ve/-ve Amt = shift left/right. - if (Subtarget->hasXOP() && + if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) { if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) { @@ -19058,7 +19058,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, // i64 vector arithmetic shift can be emulated with the transform: // M = lshr(SIGN_BIT, Amt) // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M) - if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget->hasInt256())) && + if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) && Op.getOpcode() == ISD::SRA) { SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT); SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); @@ -19073,7 +19073,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, // Do this only if the vector shift count is a constant build_vector. if (Op.getOpcode() == ISD::SHL && (VT == MVT::v8i16 || VT == MVT::v4i32 || - (Subtarget->hasInt256() && VT == MVT::v16i16)) && + (Subtarget.hasInt256() && VT == MVT::v16i16)) && ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { SmallVector Elts; MVT SVT = VT.getVectorElementType(); @@ -19233,14 +19233,14 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, } if (VT == MVT::v16i8 || - (VT == MVT::v32i8 && Subtarget->hasInt256() && !Subtarget->hasXOP())) { + (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) { MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); unsigned ShiftOpcode = Op->getOpcode(); auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { // On SSE41 targets we make use of the fact that VSELECT lowers // to PBLENDVB which selects bytes based just on the sign bit. - if (Subtarget->hasSSE41()) { + if (Subtarget.hasSSE41()) { V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); @@ -19343,7 +19343,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, // It's worth extending once and using the v8i32 shifts for 16-bit types, but // the extra overheads to get from v16i8 to v8i32 make the existing SSE // solution better. - if (Subtarget->hasInt256() && VT == MVT::v8i16) { + if (Subtarget.hasInt256() && VT == MVT::v8i16) { MVT ExtVT = MVT::v8i32; unsigned ExtOpc = Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; @@ -19353,7 +19353,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt)); } - if (Subtarget->hasInt256() && !Subtarget->hasXOP() && VT == MVT::v16i16) { + if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) { MVT ExtVT = MVT::v8i32; SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z); @@ -19377,7 +19377,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) { // On SSE41 targets we make use of the fact that VSELECT lowers // to PBLENDVB which selects bytes based just on the sign bit. - if (Subtarget->hasSSE41()) { + if (Subtarget.hasSSE41()) { MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2); V0 = DAG.getBitcast(ExtVT, V0); V1 = DAG.getBitcast(ExtVT, V1); @@ -19394,7 +19394,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, }; // Turn 'a' into a mask suitable for VSELECT: a = a << 12; - if (Subtarget->hasSSE41()) { + if (Subtarget.hasSSE41()) { // On SSE41 targets we need to replicate the shift mask in both // bytes for PBLENDVB. Amt = DAG.getNode( @@ -19469,7 +19469,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, return SDValue(); } -static SDValue LowerRotate(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); @@ -19477,7 +19477,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget *Subtarget, SDValue Amt = Op.getOperand(1); assert(VT.isVector() && "Custom lowering only for vector rotates!"); - assert(Subtarget->hasXOP() && "XOP support required for vector rotates!"); + assert(Subtarget.hasXOP() && "XOP support required for vector rotates!"); assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported"); // XOP has 128-bit vector variable + immediate rotates. @@ -19589,9 +19589,9 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { unsigned OpWidth = MemType->getPrimitiveSizeInBits(); if (OpWidth == 64) - return !Subtarget->is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b + return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b else if (OpWidth == 128) - return Subtarget->hasCmpxchg16b(); + return Subtarget.hasCmpxchg16b(); else return false; } @@ -19611,7 +19611,7 @@ X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32; + unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; Type *MemType = AI->getType(); // If the operand is too big, we must see if cmpxchg8/16b is available @@ -19648,7 +19648,7 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { } } -static bool hasMFENCE(const X86Subtarget& Subtarget) { +static bool hasMFENCE(const X86Subtarget &Subtarget) { // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for // no-sse2). There isn't any reason to disable it if the target processor // supports it. @@ -19657,7 +19657,7 @@ static bool hasMFENCE(const X86Subtarget& Subtarget) { LoadInst * X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { - unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32; + unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; Type *MemType = AI->getType(); // Accesses larger than the native width are turned into cmpxchg/libcalls, so // there is no benefit in turning such RMWs into loads, and it is actually @@ -19694,7 +19694,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // the IR level, so we must wrap it in an intrinsic. return nullptr; - if (!hasMFENCE(*Subtarget)) + if (!hasMFENCE(Subtarget)) // FIXME: it might make sense to use a locked operation here but on a // different cache-line to prevent cache-line bouncing. In practice it // is probably a small win, and x86 processors without mfence are rare @@ -19714,7 +19714,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { return Loaded; } -static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); AtomicOrdering FenceOrdering = static_cast( @@ -19725,7 +19725,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) { - if (hasMFENCE(*Subtarget)) + if (hasMFENCE(Subtarget)) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); SDValue Chain = Op.getOperand(0); @@ -19747,7 +19747,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); } -static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT T = Op.getSimpleValueType(); SDLoc DL(Op); @@ -19759,7 +19759,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, case MVT::i16: Reg = X86::AX; size = 2; break; case MVT::i32: Reg = X86::EAX; size = 4; break; case MVT::i64: - assert(Subtarget->is64Bit() && "Node not type legal!"); + assert(Subtarget.is64Bit() && "Node not type legal!"); Reg = X86::RAX; size = 8; break; } @@ -19789,14 +19789,14 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, return SDValue(); } -static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT SrcVT = Op.getOperand(0).getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || SrcVT == MVT::i64) { - assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); if (DstVT != MVT::f64) // This conversion needs to be expanded. return SDValue(); @@ -19816,7 +19816,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0, DAG.getIntPtrConstant(i, dl))); } else { - assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() && + assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() && "Unexpected source type in LowerBITCAST"); Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0, DAG.getIntPtrConstant(0, dl))); @@ -19835,8 +19835,8 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, DAG.getIntPtrConstant(0, dl)); } - assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && - Subtarget->hasMMX() && "Unexpected custom BITCAST"); + assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() && + Subtarget.hasMMX() && "Unexpected custom BITCAST"); assert((DstVT == MVT::i64 || (DstVT.isVector() && DstVT.getSizeInBits()==64)) && "Unexpected custom BITCAST"); @@ -19859,7 +19859,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, /// how many bytes of V are summed horizontally to produce each element of the /// result. static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(V); MVT ByteVecVT = V.getSimpleValueType(); @@ -19924,7 +19924,7 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, } static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, SDLoc DL, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); @@ -19984,7 +19984,7 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, SDLoc DL, } static SDValue LowerVectorCTPOPBitmath(SDValue Op, SDLoc DL, - const X86Subtarget *Subtarget, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); assert(VT.is128BitVector() && @@ -20054,7 +20054,7 @@ static SDValue LowerVectorCTPOPBitmath(SDValue Op, SDLoc DL, DAG); } -static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); // FIXME: Need to add AVX-512 support here! @@ -20063,13 +20063,13 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget, SDLoc DL(Op.getNode()); SDValue Op0 = Op.getOperand(0); - if (!Subtarget->hasSSSE3()) { + if (!Subtarget.hasSSSE3()) { // We can't use the fast LUT approach, so fall back on vectorized bitmath. assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!"); return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG); } - if (VT.is256BitVector() && !Subtarget->hasInt256()) { + if (VT.is256BitVector() && !Subtarget.hasInt256()) { unsigned NumElems = VT.getVectorNumElements(); // Extract each 128-bit vector, compute pop count and concat the result. @@ -20084,7 +20084,7 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget, return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG); } -static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Op.getSimpleValueType().isVector() && "We only do custom lowering for vector population count."); @@ -20157,9 +20157,9 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } -static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit()); + assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit()); // For MacOSX, we want to call an alternative entry point: __sincos_stret, // which returns the values as { float, float } (in XMM0) or @@ -20261,9 +20261,9 @@ static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, InOp, DAG.getIntPtrConstant(0, dl)); } -static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - assert(Subtarget->hasAVX512() && + assert(Subtarget.hasAVX512() && "MGATHER/MSCATTER are supported on AVX-512 arch only"); // X86 scatter kills mask register, so its type should be added to @@ -20312,7 +20312,7 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, } unsigned NumElts = VT.getVectorNumElements(); - if (!Subtarget->hasVLX() && !VT.is512BitVector() && + if (!Subtarget.hasVLX() && !VT.is512BitVector() && !Index.getSimpleValueType().is512BitVector()) { // AVX512F supports only 512-bit vectors. Or data or index should // be 512 bit wide. If now the both index and data are 256-bit, but @@ -20355,7 +20355,7 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, return SDValue(NewScatter.getNode(), 0); } -static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MaskedLoadSDNode *N = cast(Op.getNode()); @@ -20363,7 +20363,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget, SDValue Mask = N->getMask(); SDLoc dl(Op); - if (Subtarget->hasAVX512() && !Subtarget->hasVLX() && + if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) { // This operation is legal for targets with VLX, but without // VLX the vector should be widened to 512 bit @@ -20387,7 +20387,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget, return Op; } -static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MaskedStoreSDNode *N = cast(Op.getNode()); SDValue DataToStore = N->getValue(); @@ -20395,7 +20395,7 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget, SDValue Mask = N->getMask(); SDLoc dl(Op); - if (Subtarget->hasAVX512() && !Subtarget->hasVLX() && + if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) { // This operation is legal for targets with VLX, but without // VLX the vector should be widened to 512 bit @@ -20411,9 +20411,9 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget, return Op; } -static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - assert(Subtarget->hasAVX512() && + assert(Subtarget.hasAVX512() && "MGATHER/MSCATTER are supported on AVX-512 arch only"); MaskedGatherSDNode *N = cast(Op.getNode()); @@ -20428,7 +20428,7 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget, unsigned NumElts = VT.getVectorNumElements(); assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); - if (!Subtarget->hasVLX() && !VT.is512BitVector() && + if (!Subtarget.hasVLX() && !VT.is512BitVector() && !Index.getSimpleValueType().is512BitVector()) { // AVX512F supports only 512-bit vectors. Or data or index should // be 512 bit wide. If now the both index and data are 256-bit, but @@ -20656,15 +20656,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, llvm_unreachable("Do not know how to custom type legalize this operation!"); case X86ISD::AVG: { // Legalize types for X86ISD::AVG by expanding vectors. - assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); auto InVT = N->getValueType(0); auto InVTSize = InVT.getSizeInBits(); const unsigned RegSize = (InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128; - assert((!Subtarget->hasAVX512() || RegSize < 512) && + assert((!Subtarget.hasAVX512() || RegSize < 512) && "512-bit vector requires AVX512"); - assert((!Subtarget->hasAVX2() || RegSize < 256) && + assert((!Subtarget.hasAVX2() || RegSize < 256) && "256-bit vector requires AVX2"); auto ElemVT = InVT.getVectorElementType(); @@ -20736,7 +20736,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::UINT_TO_FP: { - assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); if (N->getOperand(0).getValueType() != MVT::v2i32 || N->getValueType(0) != MVT::v2f32) return; @@ -20863,7 +20863,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, break; } case ISD::BITCAST: { - assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); EVT DstVT = N->getValueType(0); EVT SrcVT = N->getOperand(0)->getValueType(0); @@ -21154,7 +21154,7 @@ bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, if (AM.BaseGV) { unsigned GVFlags = - Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine()); + Subtarget.ClassifyGlobalReference(AM.BaseGV, getTargetMachine()); // If a reference to this global requires an extra load, we can't fold it. if (isGlobalStubReference(GVFlags)) @@ -21167,7 +21167,7 @@ bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, // If lower 4G is not available, then we must use rip-relative addressing. if ((M != CodeModel::Small || R != Reloc::Static) && - Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1)) + Subtarget.is64Bit() && (AM.BaseOffs || AM.Scale > 1)) return false; } @@ -21204,7 +21204,7 @@ bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { // On AVX2 there are new vpsllv[dq] instructions (and other shifts), that make // variable shifts just as cheap as scalar ones. - if (Subtarget->hasInt256() && (Bits == 32 || Bits == 64)) + if (Subtarget.hasInt256() && (Bits == 32 || Bits == 64)) return false; // Otherwise, it's significantly cheaper to shift by a scalar amount than by a @@ -21253,12 +21253,12 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit(); + return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit(); } bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit(); + return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit(); } bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { @@ -21289,7 +21289,7 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; } bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { - if (!Subtarget->hasAnyFMA()) + if (!Subtarget.hasAnyFMA()) return false; VT = VT.getScalarType(); @@ -21478,9 +21478,9 @@ static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB, } static MachineBasicBlock *EmitWRPKRU(MachineInstr *MI, MachineBasicBlock *BB, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // insert input VAL into EAX BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX) @@ -21501,9 +21501,9 @@ static MachineBasicBlock *EmitWRPKRU(MachineInstr *MI, MachineBasicBlock *BB, } static MachineBasicBlock *EmitRDPKRU(MachineInstr *MI, MachineBasicBlock *BB, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // insert zero to ECX BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX) @@ -21519,12 +21519,12 @@ static MachineBasicBlock *EmitRDPKRU(MachineInstr *MI, MachineBasicBlock *BB, } static MachineBasicBlock *EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // Address into RAX/EAX, other two args into ECX, EDX. - unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; - unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; + unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r; + unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); for (int i = 0; i < X86::AddrNumOperands; ++i) MIB.addOperand(MI->getOperand(i)); @@ -21575,7 +21575,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr *MI, MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); // Machine Information - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64); const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); @@ -21829,14 +21829,14 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( XMMSaveMBB->addSuccessor(EndMBB); // Now add the instructions. - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned CountReg = MI->getOperand(0).getReg(); int64_t RegSaveFrameIndex = MI->getOperand(1).getImm(); int64_t VarArgsFPOffset = MI->getOperand(2).getImm(); - if (!Subtarget->isCallingConvWin64(F->getFunction()->getCallingConv())) { + if (!Subtarget.isCallingConvWin64(F->getFunction()->getCallingConv())) { // If %al is 0, branch around the XMM save block. BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB); @@ -21849,7 +21849,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( !MI->getOperand(MI->getNumOperands() - 1).isReg() || MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS) && "Expected last argument to be EFLAGS"); - unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr; + unsigned MOVOpc = Subtarget.hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr; // In the XMM save block, save all the XMM argument registers. for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) { int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; @@ -21944,7 +21944,7 @@ static bool isCMOVPseudo(MachineInstr *MI) { MachineBasicBlock * X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); // To "insert" a SELECT_CC instruction, we actually have to insert the @@ -22112,7 +22112,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. - const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); MachineInstr *LastEFLAGSUser = CascadedCMOV ? CascadedCMOV : LastCMOV; if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) && @@ -22234,7 +22234,7 @@ X86TargetLowering::EmitLoweredAtomicFP(MachineInstr *MI, case X86::RELEASE_FADD32mr: MOp = X86::MOVSSmr; FOp = X86::ADDSSrm; break; case X86::RELEASE_FADD64mr: MOp = X86::MOVSDmr; FOp = X86::ADDSDrm; break; } - const X86InstrInfo *TII = Subtarget->getInstrInfo(); + const X86InstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); MachineOperand MSrc = MI->getOperand(0); @@ -22267,14 +22267,14 @@ MachineBasicBlock * X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); assert(MF->shouldSplitStack()); - const bool Is64Bit = Subtarget->is64Bit(); - const bool IsLP64 = Subtarget->isTarget64BitLP64(); + const bool Is64Bit = Subtarget.is64Bit(); + const bool IsLP64 = Subtarget.isTarget64BitLP64(); const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30; @@ -22308,7 +22308,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, tmpSPVReg = MRI.createVirtualRegister(AddrRegClass), SPLimitVReg = MRI.createVirtualRegister(AddrRegClass), sizeVReg = MI->getOperand(1).getReg(), - physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP; + physSPReg = IsLP64 || Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP; MachineFunction::iterator MBBIter = ++BB->getIterator(); @@ -22340,7 +22340,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, // Calls into a routine in libgcc to allocate more space from the heap. const uint32_t *RegMask = - Subtarget->getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C); + Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C); if (IsLP64) { BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) .addReg(sizeVReg); @@ -22397,9 +22397,9 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock * X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { - assert(!Subtarget->isTargetMachO()); + assert(!Subtarget.isTargetMachO()); DebugLoc DL = MI->getDebugLoc(); - MachineInstr *ResumeMI = Subtarget->getFrameLowering()->emitStackProbe( + MachineInstr *ResumeMI = Subtarget.getFrameLowering()->emitStackProbe( *BB->getParent(), *BB, MI, DL, false); MachineBasicBlock *ResumeBB = ResumeMI->getParent(); MI->eraseFromParent(); // The pseudo instruction is gone now. @@ -22410,7 +22410,7 @@ MachineBasicBlock * X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); - const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); MachineBasicBlock *TargetMBB = MI->getOperand(0).getMBB(); DebugLoc DL = MI->getDebugLoc(); @@ -22419,7 +22419,7 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI, "SEH does not use catchret!"); // Only 32-bit EH needs to worry about manually restoring stack pointers. - if (!Subtarget->is32Bit()) + if (!Subtarget.is32Bit()) return BB; // C++ EH creates a new target block to hold the restore code, and wires up @@ -22445,8 +22445,8 @@ X86TargetLowering::EmitLoweredCatchPad(MachineInstr *MI, const Constant *PerFn = MF->getFunction()->getPersonalityFn(); bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn)); // Only 32-bit SEH requires special handling for catchpad. - if (IsSEH && Subtarget->is32Bit()) { - const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + if (IsSEH && Subtarget.is32Bit()) { + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE)); } @@ -22462,20 +22462,20 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, // or EAX and doing an indirect call. The return value will then // be in the normal return register. MachineFunction *F = BB->getParent(); - const X86InstrInfo *TII = Subtarget->getInstrInfo(); + const X86InstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); - assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?"); + assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?"); assert(MI->getOperand(3).isGlobal() && "This should be a global"); // Get a register mask for the lowered call. // FIXME: The 32-bit calls have non-standard calling conventions. Use a // proper register mask. const uint32_t *RegMask = - Subtarget->is64Bit() ? - Subtarget->getRegisterInfo()->getDarwinTLSCallPreservedMask() : - Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); - if (Subtarget->is64Bit()) { + Subtarget.is64Bit() ? + Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() : + Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); + if (Subtarget.is64Bit()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) .addReg(X86::RIP) @@ -22519,7 +22519,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); @@ -22591,7 +22591,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; const TargetRegisterClass *PtrRC = getRegClassFor(PVT); LabelReg = MRI.createVirtualRegister(PtrRC); - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg) .addReg(X86::RIP) .addImm(0) @@ -22604,7 +22604,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, .addReg(XII->getGlobalBaseReg(MF)) .addImm(0) .addReg(0) - .addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference()) + .addMBB(restoreMBB, Subtarget.ClassifyBlockAddressReference()) .addReg(0); } } else @@ -22626,7 +22626,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup)) .addMBB(restoreMBB); - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); MIB.addRegMask(RegInfo->getNoPreservedMask()); thisMBB->addSuccessor(mainMBB); thisMBB->addSuccessor(restoreMBB); @@ -22645,7 +22645,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // restoreMBB: if (RegInfo->hasBasePointer(*MF)) { const bool Uses64BitFramePtr = - Subtarget->isTarget64BitLP64() || Subtarget->isTargetNaCl64(); + Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); X86MachineFunctionInfo *X86FI = MF->getInfo(); X86FI->setRestoreBasePointer(MF); unsigned FramePtr = RegInfo->getFrameRegister(*MF); @@ -22668,7 +22668,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); // Memory Reference @@ -22683,7 +22683,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; unsigned Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP; unsigned SP = RegInfo->getStackRegister(); @@ -22803,7 +22803,7 @@ X86TargetLowering::emitFMA3Instr(MachineInstr *MI, default: llvm_unreachable("Unrecognized FMA variant."); } - const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(NewFMAOpc)) .addOperand(MI->getOperand(0)) @@ -22873,7 +22873,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::RDFLAGS32: case X86::RDFLAGS64: { DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); unsigned PushF = MI->getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64; unsigned Pop = @@ -22888,7 +22888,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::WRFLAGS32: case X86::WRFLAGS64: { DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); unsigned Push = MI->getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r; unsigned PopF = @@ -22914,7 +22914,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::FP80_TO_INT32_IN_MEM: case X86::FP80_TO_INT64_IN_MEM: { MachineFunction *F = BB->getParent(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); // Change the floating point control register to use "round towards zero" @@ -22996,9 +22996,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::VPCMPESTRM128REG: case X86::PCMPESTRM128MEM: case X86::VPCMPESTRM128MEM: - assert(Subtarget->hasSSE42() && + assert(Subtarget.hasSSE42() && "Target must have SSE4.2 or AVX features enabled"); - return EmitPCMPSTRM(MI, BB, Subtarget->getInstrInfo()); + return EmitPCMPSTRM(MI, BB, Subtarget.getInstrInfo()); // String/text processing lowering. case X86::PCMPISTRIREG: @@ -23009,9 +23009,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::VPCMPESTRIREG: case X86::PCMPESTRIMEM: case X86::VPCMPESTRIMEM: - assert(Subtarget->hasSSE42() && + assert(Subtarget.hasSSE42() && "Target must have SSE4.2 or AVX features enabled"); - return EmitPCMPSTRI(MI, BB, Subtarget->getInstrInfo()); + return EmitPCMPSTRI(MI, BB, Subtarget.getInstrInfo()); // Thread synchronization. case X86::MONITOR: @@ -23023,7 +23023,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitRDPKRU(MI, BB, Subtarget); // xbegin case X86::XBEGIN: - return EmitXBegin(MI, BB, Subtarget->getInstrInfo()); + return EmitXBegin(MI, BB, Subtarget.getInstrInfo()); case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); @@ -23186,7 +23186,7 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, /// FIXME: This could be expanded to support 512 bit vectors as well. static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget* Subtarget) { + const X86Subtarget &Subtarget) { SDLoc dl(N); ShuffleVectorSDNode *SVOp = cast(N); SDValue V1 = SVOp->getOperand(0); @@ -23272,7 +23272,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, int Depth, bool HasPSHUFB, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { assert(!Mask.empty() && "Cannot combine an empty shuffle mask!"); // Find the operand that enters the chain. Note that multiple uses are OK @@ -23324,7 +23324,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, // Check if we have SSE3 which will let us use MOVDDUP. That instruction // is no slower than UNPCKLPD but has the option to fold the input operand // into even an unaligned memory load. - if (Lo && Subtarget->hasSSE3()) { + if (Lo && Subtarget.hasSSE3()) { Shuffle = X86ISD::MOVDDUP; ShuffleVT = MVT::v2f64; } else { @@ -23346,7 +23346,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, /*AddTo*/ true); return true; } - if (Subtarget->hasSSE3() && + if (Subtarget.hasSSE3() && (Mask.equals({0, 0, 2, 2}) || Mask.equals({1, 1, 3, 3}))) { bool Lo = Mask.equals({0, 0, 2, 2}); unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP; @@ -23419,7 +23419,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, // can replace them with a single PSHUFB instruction profitably. Intel's // manuals suggest only using PSHUFB if doing so replacing 5 instructions, but // in practice PSHUFB tends to be *very* fast so we're more aggressive. - if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) { + if ((Depth >= 3 || HasPSHUFB) && Subtarget.hasSSSE3()) { SmallVector PSHUFBMask; int NumBytes = VT.getSizeInBits() / 8; int Ratio = NumBytes / Mask.size(); @@ -23484,7 +23484,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, int Depth, bool HasPSHUFB, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { // Bound the depth of our recursive combine because this is ultimately // quadratic in nature. if (Depth > 8) @@ -23888,7 +23888,7 @@ static bool setTargetShuffleZeroElements(SDValue N, /// \brief Try to combine x86 target specific shuffles. static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDLoc DL(N); MVT VT = N.getSimpleValueType(); SmallVector Mask; @@ -24153,12 +24153,12 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, /// the operands which explicitly discard the lanes which are unused by this /// operation to try to flow through the rest of the combiner the fact that /// they're unused. -static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget, +static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc DL(N); EVT VT = N->getValueType(0); - if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && - (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) + if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && + (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) return SDValue(); // We only handle target-independent shuffles. @@ -24207,7 +24207,7 @@ static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget, /// PerformShuffleCombine - Performs several different shuffle combines. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDLoc dl(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -24225,7 +24225,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return AddSub; // Combine 256-bit vector shuffles. This is only profitable when in AVX mode - if (TLI.isTypeLegal(VT) && Subtarget->hasFp256() && VT.is256BitVector() && + if (TLI.isTypeLegal(VT) && Subtarget.hasFp256() && VT.is256BitVector() && N->getOpcode() == ISD::VECTOR_SHUFFLE) return PerformShuffleCombine256(N, DAG, DCI, Subtarget); @@ -24418,7 +24418,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, } static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -24447,8 +24447,8 @@ static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG, case ISD::XOR: FPOpcode = X86ISD::FXOR; break; default: return SDValue(); } - if (((Subtarget->hasSSE1() && VT == MVT::f32) || - (Subtarget->hasSSE2() && VT == MVT::f64)) && + if (((Subtarget.hasSSE1() && VT == MVT::f32) || + (Subtarget.hasSSE2() && VT == MVT::f64)) && isa(N0.getOperand(1)) && N0.getOperand(0).getOpcode() == ISD::BITCAST && N0.getOperand(0).getOperand(0).getValueType() == VT) { @@ -24613,7 +24613,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, static SDValue transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDLoc dl(N); SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); @@ -24659,7 +24659,7 @@ transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG, /// nodes. static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDLoc DL(N); SDValue Cond = N->getOperand(0); // Get the LHS/RHS of the select. @@ -24676,8 +24676,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && VT != MVT::f80 && VT != MVT::f128 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && - (Subtarget->hasSSE2() || - (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) { + (Subtarget.hasSSE2() || + (Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); unsigned Opcode = 0; @@ -24815,7 +24815,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } EVT CondVT = Cond.getValueType(); - if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() && + if (Subtarget.hasAVX512() && VT.isVector() && CondVT.isVector() && CondVT.getVectorElementType() == MVT::i1) { // v16i8 (select v16i1, v16i8, v16i8) does not have a proper // lowering on KNL. In this case we convert it to @@ -24826,7 +24826,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if ((OpVT.is128BitVector() || OpVT.is256BitVector()) && (OpVT.getVectorElementType() == MVT::i8 || OpVT.getVectorElementType() == MVT::i16) && - !(Subtarget->hasBWI() && Subtarget->hasVLX())) { + !(Subtarget.hasBWI() && Subtarget.hasVLX())) { Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond); DCI.AddToWorklist(Cond.getNode()); return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS); @@ -24964,8 +24964,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Match VSELECTs into subs with unsigned saturation. if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && // psubus is available in SSE2 and AVX2 for i8 and i16 vectors. - ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) || - (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) { + ((Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) || + (Subtarget.hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); // Check if one of the arms of the VSELECT is a zero vector. If it's on the @@ -25119,10 +25119,10 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (VT.getVectorElementType() == MVT::i16) return SDValue(); // Dynamic blending was only available from SSE4.1 onward. - if (VT.is128BitVector() && !Subtarget->hasSSE41()) + if (VT.is128BitVector() && !Subtarget.hasSSE41()) return SDValue(); // Byte blends are only available in AVX2 - if (VT == MVT::v32i8 && !Subtarget->hasAVX2()) + if (VT == MVT::v32i8 && !Subtarget.hasAVX2()) return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); @@ -25350,7 +25350,7 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDLoc DL(N); // If the flag operand isn't dead, don't touch this CMOV. @@ -25763,11 +25763,11 @@ static SDValue PerformSRACombine(SDNode *N, SelectionDAG &DAG) { /// shift by a constant amount which is known to be bigger than or equal /// to the vector element size in bits. static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && - (!Subtarget->hasInt256() || + (!Subtarget.hasInt256() || (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16))) return SDValue(); @@ -25793,7 +25793,7 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, /// PerformShiftCombine - Combine shifts. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { if (N->getOpcode() == ISD::SHL) if (SDValue V = PerformSHLCombine(N, DAG)) return V; @@ -25815,12 +25815,12 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, // and friends. Likewise for OR -> CMPNEQSS. static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { unsigned opcode; // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but // we're requiring SSE2 for both. - if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { + if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CMP0 = N0->getOperand(1); @@ -25869,7 +25869,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, // FIXME: need symbolic constants for these magic numbers. // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; - if (Subtarget->hasAVX512()) { + if (Subtarget.hasAVX512()) { SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00, CMP01, DAG.getConstant(x86cc, DL, MVT::i8)); @@ -25886,7 +25886,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, bool is64BitFP = (CMP00.getValueType() == MVT::f64); MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; - if (is64BitFP && !Subtarget->is64Bit()) { + if (is64BitFP && !Subtarget.is64Bit()) { // On a 32-bit target, we cannot bitcast the 64-bit float to a // 64-bit integer, since that's not a legal type. Since // OnesOrZeroesF is all ones of all zeroes, we don't need all the @@ -25949,7 +25949,7 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) { // some of the transition sequences. static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); if (!VT.is256BitVector()) return SDValue(); @@ -26028,7 +26028,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDLoc DL(N); @@ -26120,7 +26120,7 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG, /// types, try to convert this into a floating point logic node to avoid /// unnecessary moves from SSE to integer registers. static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { unsigned FPOpcode = ISD::DELETED_NODE; if (N->getOpcode() == ISD::AND) FPOpcode = X86ISD::FAND; @@ -26137,8 +26137,8 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG, SDValue N1 = N->getOperand(1); SDLoc DL(N); if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST && - ((Subtarget->hasSSE1() && VT == MVT::i32) || - (Subtarget->hasSSE2() && VT == MVT::i64))) { + ((Subtarget.hasSSE1() && VT == MVT::i32) || + (Subtarget.hasSSE2() && VT == MVT::i64))) { SDValue N00 = N0.getOperand(0); SDValue N10 = N1.getOperand(0); EVT N00Type = N00.getValueType(); @@ -26153,7 +26153,7 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG, static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -26175,7 +26175,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, // BEXTR is ((X >> imm) & (2**size-1)) if (VT == MVT::i32 || VT == MVT::i64) { // Check for BEXTR. - if ((Subtarget->hasBMI() || Subtarget->hasTBM()) && + if ((Subtarget.hasBMI() || Subtarget.hasTBM()) && (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) { ConstantSDNode *MaskNode = dyn_cast(N1); ConstantSDNode *ShiftNode = dyn_cast(N0.getOperand(1)); @@ -26219,7 +26219,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -26235,8 +26235,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, // look for psign/blend if (VT == MVT::v2i64 || VT == MVT::v4i64) { - if (!Subtarget->hasSSSE3() || - (VT == MVT::v4i64 && !Subtarget->hasInt256())) + if (!Subtarget.hasSSSE3() || + (VT == MVT::v4i64 && !Subtarget.hasInt256())) return SDValue(); // Canonicalize pandn to RHS @@ -26297,7 +26297,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return DAG.getBitcast(VT, Mask); } // PBLENDVB only available on SSE 4.1 - if (!Subtarget->hasSSE41()) + if (!Subtarget.hasSSE41()) return SDValue(); MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; @@ -26321,7 +26321,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, // series of shifts/or that would otherwise be generated. // Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions // have higher latencies and we are not optimizing for size. - if (!OptForSize && Subtarget->isSHLDSlow()) + if (!OptForSize && Subtarget.isSHLDSlow()) return SDValue(); if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) @@ -26460,14 +26460,14 @@ static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) { static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) return RV; - if (Subtarget->hasCMov()) + if (Subtarget.hasCMov()) if (SDValue RV = performIntegerAbsCombine(N, DAG)) return RV; @@ -26481,7 +26481,7 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// X86ISD::AVG instruction. static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, - const X86Subtarget *Subtarget, SDLoc DL) { + const X86Subtarget &Subtarget, SDLoc DL) { if (!VT.isVector() || !VT.isSimple()) return SDValue(); EVT InVT = In.getValueType(); @@ -26498,10 +26498,10 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits()) return SDValue(); - if (Subtarget->hasAVX512()) { + if (Subtarget.hasAVX512()) { if (VT.getSizeInBits() > 512) return SDValue(); - } else if (Subtarget->hasAVX2()) { + } else if (Subtarget.hasAVX2()) { if (VT.getSizeInBits() > 256) return SDValue(); } else { @@ -26600,7 +26600,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, /// PerformLOADCombine - Do target-specific dag combines on LOAD nodes. static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { LoadSDNode *Ld = cast(N); EVT RegVT = Ld->getValueType(0); EVT MemVT = Ld->getMemoryVT(); @@ -26652,7 +26652,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, /// PerformMLOADCombine - Resolve extending loads static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { MaskedLoadSDNode *Mld = cast(N); if (Mld->getExtensionType() != ISD::SEXTLOAD) return SDValue(); @@ -26731,7 +26731,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, } /// PerformMSTORECombine - Resolve truncating stores static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { MaskedStoreSDNode *Mst = cast(N); if (!Mst->isTruncatingStore()) return SDValue(); @@ -26820,7 +26820,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, } /// PerformSTORECombine - Do target-specific dag combines on STORE nodes. static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { StoreSDNode *St = cast(N); EVT VT = St->getValue().getValueType(); EVT StVT = St->getMemoryVT(); @@ -26965,9 +26965,9 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const Function *F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F->hasFnAttribute(Attribute::NoImplicitFloat); bool F64IsLegal = - !Subtarget->useSoftFloat() && !NoImplicitFloatOps && Subtarget->hasSSE2(); + !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2(); if ((VT.isVector() || - (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && + (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) && isa(St->getValue()) && !cast(St->getValue())->isVolatile() && St->getChain().hasOneUse() && !St->isVolatile()) { @@ -27006,8 +27006,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // If we are a 64-bit capable x86, lower to a single movq load/store pair. // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store // pair instead. - if (Subtarget->is64Bit() || F64IsLegal) { - MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; + if (Subtarget.is64Bit() || F64IsLegal) { + MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), @@ -27067,7 +27067,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // to get past legalization. The execution dependencies fixup pass will // choose the optimal machine instruction for the store if this really is // an integer or v2f32 rather than an f64. - if (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit() && + if (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit() && St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue OldExtract = St->getOperand(1); SDValue ExtOp0 = OldExtract.getOperand(0); @@ -27212,14 +27212,14 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) { /// Do target-specific dag combines on floating point adds. static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || - (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && + if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + (Subtarget.hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, true)) return DAG.getNode(X86ISD::FHADD, SDLoc(N), VT, LHS, RHS); return SDValue(); @@ -27227,14 +27227,14 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, /// Do target-specific dag combines on floating point subs. static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); // Try to synthesize horizontal subs from subs of shuffles. - if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || - (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && + if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + (Subtarget.hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, false)) return DAG.getNode(X86ISD::FHSUB, SDLoc(N), VT, LHS, RHS); return SDValue(); @@ -27327,7 +27327,7 @@ combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG, /// element that is extracted from a vector and then truncated, and it is /// diffcult to do this optimization based on them. static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { EVT OutVT = N->getValueType(0); if (!OutVT.isVector()) return SDValue(); @@ -27342,7 +27342,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, // TODO: On AVX2, the behavior of X86ISD::PACKUS is different from that on // SSE2, and we need to take care of it specially. // AVX512 provides vpmovdb. - if (!Subtarget->hasSSE2() || Subtarget->hasAVX2()) + if (!Subtarget.hasSSE2() || Subtarget.hasAVX2()) return SDValue(); EVT OutSVT = OutVT.getVectorElementType(); @@ -27353,7 +27353,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, return SDValue(); // SSSE3's pshufb results in less instructions in the cases below. - if (Subtarget->hasSSSE3() && NumElems == 8 && + if (Subtarget.hasSSSE3() && NumElems == 8 && ((OutSVT == MVT::i8 && InSVT != MVT::i64) || (InSVT == MVT::i32 && OutSVT == MVT::i16))) return SDValue(); @@ -27373,7 +27373,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to // truncate 2 x v4i32 to v8i16. - if (Subtarget->hasSSE41() || OutSVT == MVT::i8) + if (Subtarget.hasSSE41() || OutSVT == MVT::i8) return combineVectorTruncationWithPACKUS(N, DAG, SubVec); else if (InSVT == MVT::i32) return combineVectorTruncationWithPACKSS(N, DAG, SubVec); @@ -27382,7 +27382,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, } static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { // Try to detect AVG pattern first. SDValue Avg = detectAVGPattern(N->getOperand(0), N->getValueType(0), DAG, Subtarget, SDLoc(N)); @@ -27394,7 +27394,7 @@ static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG, /// Do target-specific dag combines on floating point negations. static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); EVT SVT = VT.getScalarType(); SDValue Arg = N->getOperand(0); @@ -27408,7 +27408,7 @@ static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG, // use of a constant by performing (-0 - A*B) instead. // FIXME: Check rounding control flags as well once it becomes available. if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) && - Arg->getFlags()->hasNoSignedZeros() && Subtarget->hasAnyFMA()) { + Arg->getFlags()->hasNoSignedZeros() && Subtarget.hasAnyFMA()) { SDValue Zero = DAG.getConstantFP(0.0, DL, VT); return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), Arg.getOperand(1), Zero); @@ -27436,9 +27436,9 @@ static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG, } static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); - if (VT.is512BitVector() && !Subtarget->hasDQI()) { + if (VT.is512BitVector() && !Subtarget.hasDQI()) { // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention. // These logic operations may be executed in the integer domain. SDLoc dl(N); @@ -27462,7 +27462,7 @@ static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, } /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes. static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); // F[X]OR(0.0, x) -> x @@ -27500,8 +27500,8 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { } static SDValue performFMinNumFMaxNumCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { - if (Subtarget->useSoftFloat()) + const X86Subtarget &Subtarget) { + if (Subtarget.useSoftFloat()) return SDValue(); // TODO: Check for global or instruction-level "nnan". In that case, we @@ -27510,9 +27510,9 @@ static SDValue performFMinNumFMaxNumCombine(SDNode *N, SelectionDAG &DAG, // should be an optional swap and FMAX/FMIN. EVT VT = N->getValueType(0); - if (!((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || - (Subtarget->hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) || - (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64)))) + if (!((Subtarget.hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || + (Subtarget.hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) || + (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64)))) return SDValue(); // This takes at least 3 instructions, so favor a library call when operating @@ -27557,7 +27557,7 @@ static SDValue performFMinNumFMaxNumCombine(SDNode *N, SelectionDAG &DAG, /// Do target-specific dag combines on X86ISD::FAND nodes. static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { // FAND(0.0, x) -> 0.0 if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) @@ -27573,7 +27573,7 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG, /// Do target-specific dag combines on X86ISD::FANDN nodes static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { // FANDN(0.0, x) -> x if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) @@ -27620,7 +27620,7 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { } static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); if (!VT.isVector()) return SDValue(); @@ -27641,7 +27641,7 @@ static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, // EXTLOAD has a better solution on AVX2, // it may be replaced with X86ISD::VSEXT node. - if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256()) + if (N00.getOpcode() == ISD::LOAD && Subtarget.hasInt256()) if (!ISD::isNormalLoad(N00.getNode())) return SDValue(); @@ -27659,7 +27659,7 @@ static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, /// to combine math ops, use an LEA, or use a complex addressing mode. This can /// eliminate extend, add, and shift instructions. static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { // TODO: This should be valid for other integer types. EVT VT = Sext->getValueType(0); if (VT != MVT::i64) @@ -27733,7 +27733,7 @@ static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) { static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT SVT = VT.getScalarType(); @@ -27754,7 +27754,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } - if (VT.isVector() && Subtarget->hasSSE2()) { + if (VT.isVector() && Subtarget.hasSSE2()) { auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) { EVT InVT = N.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(), @@ -27790,7 +27790,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, // On pre-AVX2 targets, split into 128-bit nodes of // ISD::SIGN_EXTEND_VECTOR_INREG. - if (!Subtarget->hasInt256() && !(VT.getSizeInBits() % 128) && + if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128) && (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) && (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) { unsigned NumVecs = VT.getSizeInBits() / 128; @@ -27811,7 +27811,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, } } - if (Subtarget->hasAVX() && VT.is256BitVector()) + if (Subtarget.hasAVX() && VT.is256BitVector()) if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) return R; @@ -27822,7 +27822,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, } static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget* Subtarget) { + const X86Subtarget &Subtarget) { SDLoc dl(N); EVT VT = N->getValueType(0); @@ -27831,7 +27831,7 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT ScalarVT = VT.getScalarType(); - if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasAnyFMA()) + if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA()) return SDValue(); SDValue A = N->getOperand(0); @@ -27862,7 +27862,7 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> // (and (i32 x86isd::setcc_carry), 1) // This eliminates the zext. This transformation is necessary because @@ -27910,7 +27910,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, // Optimize x == -y --> x+y == 0 // x != -y --> x+y != 0 static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget* Subtarget) { + const X86Subtarget &Subtarget) { ISD::CondCode CC = cast(N->getOperand(2))->get(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -28003,7 +28003,7 @@ static SDValue MaterializeSETB(SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG, // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDLoc DL(N); X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); SDValue EFLAGS = N->getOperand(1); @@ -28044,7 +28044,7 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, // static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDLoc DL(N); SDValue Chain = N->getOperand(0); SDValue Dest = N->getOperand(1); @@ -28107,7 +28107,7 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, } static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT InVT = Op0.getValueType(); @@ -28132,7 +28132,7 @@ static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, } static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { // First try to optimize away the conversion entirely when it's // conditionally from a constant. Vectors only. if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG)) @@ -28156,7 +28156,7 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have // a 32-bit target where SSE doesn't support i64->FP operations. - if (!Subtarget->useSoftFloat() && Op0.getOpcode() == ISD::LOAD) { + if (!Subtarget.useSoftFloat() && Op0.getOpcode() == ISD::LOAD) { LoadSDNode *Ld = cast(Op0.getNode()); EVT LdVT = Ld->getValueType(0); @@ -28166,8 +28166,8 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, if (!Ld->isVolatile() && !VT.isVector() && ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() && - !Subtarget->is64Bit() && LdVT == MVT::i64) { - SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD( + !Subtarget.is64Bit() && LdVT == MVT::i64) { + SDValue FILDChain = Subtarget.getTargetLowering()->BuildFILD( SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG); DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1)); return FILDChain; @@ -28245,14 +28245,14 @@ static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) { /// PerformADDCombine - Do target-specific dag combines on integer adds. static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || - (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && + if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || + (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1); @@ -28260,7 +28260,7 @@ static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG, } static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); @@ -28284,8 +28284,8 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, // Try to synthesize horizontal adds from adds of shuffles. EVT VT = N->getValueType(0); - if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || - (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && + if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || + (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1); @@ -28295,7 +28295,7 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, /// performVZEXTCombine - Performs build vector combines static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { + const X86Subtarget &Subtarget) { SDLoc DL(N); MVT VT = N->getSimpleValueType(0); SDValue Op = N->getOperand(0); @@ -28738,13 +28738,13 @@ TargetLowering::ConstraintWeight weight = CW_SpecificReg; break; case 'y': - if (type->isX86_MMXTy() && Subtarget->hasMMX()) + if (type->isX86_MMXTy() && Subtarget.hasMMX()) weight = CW_SpecificReg; break; case 'x': case 'Y': - if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) || - ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasFp256())) + if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || + ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasFp256())) weight = CW_Register; break; case 'I': @@ -28814,9 +28814,9 @@ LowerXConstraint(EVT ConstraintVT) const { // FP X constraints get lowered to SSE1/2 registers if available, otherwise // 'f' like normal targets. if (ConstraintVT.isFloatingPoint()) { - if (Subtarget->hasSSE2()) + if (Subtarget.hasSSE2()) return "Y"; - if (Subtarget->hasSSE1()) + if (Subtarget.hasSSE1()) return "x"; } @@ -28867,7 +28867,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'L': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff || - (Subtarget->is64Bit() && C->getZExtValue() == 0xffffffff)) { + (Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) { Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType()); break; @@ -28940,7 +28940,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // In any sort of PIC mode addresses need to be computed at runtime by // adding in a register or some sort of table lookup. These can't // be used as immediates. - if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC()) + if (Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC()) return; // If we are in non-pic codegen mode, we allow the address of a global (with @@ -28975,7 +28975,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // If we require an extra load to get this address, as in PIC mode, we // can't accept it. if (isGlobalStubReference( - Subtarget->ClassifyGlobalReference(GV, DAG.getTarget()))) + Subtarget.ClassifyGlobalReference(GV, DAG.getTarget()))) return; Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op), @@ -29005,7 +29005,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // RIP in the class. Do they matter any more here than they do // in the normal allocation? case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. - if (Subtarget->is64Bit()) { + if (Subtarget.is64Bit()) { if (VT == MVT::i32 || VT == MVT::f32) return std::make_pair(0U, &X86::GR32RegClass); if (VT == MVT::i16) @@ -29033,7 +29033,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &X86::GR8RegClass); if (VT == MVT::i16) return std::make_pair(0U, &X86::GR16RegClass); - if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit()) + if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget.is64Bit()) return std::make_pair(0U, &X86::GR32RegClass); return std::make_pair(0U, &X86::GR64RegClass); case 'R': // LEGACY_REGS @@ -29041,7 +29041,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &X86::GR8_NOREXRegClass); if (VT == MVT::i16) return std::make_pair(0U, &X86::GR16_NOREXRegClass); - if (VT == MVT::i32 || !Subtarget->is64Bit()) + if (VT == MVT::i32 || !Subtarget.is64Bit()) return std::make_pair(0U, &X86::GR32_NOREXRegClass); return std::make_pair(0U, &X86::GR64_NOREXRegClass); case 'f': // FP Stack registers. @@ -29053,13 +29053,13 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &X86::RFP64RegClass); return std::make_pair(0U, &X86::RFP80RegClass); case 'y': // MMX_REGS if MMX allowed. - if (!Subtarget->hasMMX()) break; + if (!Subtarget.hasMMX()) break; return std::make_pair(0U, &X86::VR64RegClass); case 'Y': // SSE_REGS if SSE2 allowed - if (!Subtarget->hasSSE2()) break; + if (!Subtarget.hasSSE2()) break; // FALL THROUGH. case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed - if (!Subtarget->hasSSE1()) break; + if (!Subtarget.hasSSE1()) break; switch (VT.SimpleTy) { default: break; @@ -29242,7 +29242,7 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const { } void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { - if (!Subtarget->is64Bit()) + if (!Subtarget.is64Bit()) return; // Update IsSplitCSR in X86MachineFunctionInfo. @@ -29254,12 +29254,12 @@ void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { void X86TargetLowering::insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const { - const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); if (!IStart) return; - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 6e48a2d..be135fc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -955,9 +955,9 @@ namespace llvm { MVT VT) const override; private: - /// Keep a pointer to the X86Subtarget around so that we can + /// Keep a reference to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. - const X86Subtarget *Subtarget; + const X86Subtarget &Subtarget; /// Select between SSE or x87 floating point ops. /// When SSE is available, use it for f32 operations. -- 2.7.4