1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineJumpTableInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30 #include "llvm/CodeGen/ValueTypes.h"
31 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/DiagnosticPrinter.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/IntrinsicsRISCV.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/KnownBits.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/raw_ostream.h"
44 #define DEBUG_TYPE "riscv-lower"
46 STATISTIC(NumTailCalls, "Number of tail calls");
48 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
49 const RISCVSubtarget &STI)
50 : TargetLowering(TM), Subtarget(STI) {
52 if (Subtarget.isRV32E())
53 report_fatal_error("Codegen not yet implemented for RV32E");
55 RISCVABI::ABI ABI = Subtarget.getTargetABI();
56 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
58 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
59 !Subtarget.hasStdExtF()) {
60 errs() << "Hard-float 'f' ABI can't be used for a target that "
61 "doesn't support the F instruction set extension (ignoring "
63 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
64 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
65 !Subtarget.hasStdExtD()) {
66 errs() << "Hard-float 'd' ABI can't be used for a target that "
67 "doesn't support the D instruction set extension (ignoring "
69 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
74 report_fatal_error("Don't know how to lower this ABI");
75 case RISCVABI::ABI_ILP32:
76 case RISCVABI::ABI_ILP32F:
77 case RISCVABI::ABI_ILP32D:
78 case RISCVABI::ABI_LP64:
79 case RISCVABI::ABI_LP64F:
80 case RISCVABI::ABI_LP64D:
84 MVT XLenVT = Subtarget.getXLenVT();
86 // Set up the register classes.
87 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
89 if (Subtarget.hasStdExtZfh())
90 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
91 if (Subtarget.hasStdExtF())
92 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
93 if (Subtarget.hasStdExtD())
94 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
96 static const MVT::SimpleValueType BoolVecVTs[] = {
97 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
98 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
99 static const MVT::SimpleValueType IntVecVTs[] = {
100 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
101 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
102 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
103 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
104 MVT::nxv4i64, MVT::nxv8i64};
105 static const MVT::SimpleValueType F16VecVTs[] = {
106 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
107 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
108 static const MVT::SimpleValueType F32VecVTs[] = {
109 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
110 static const MVT::SimpleValueType F64VecVTs[] = {
111 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
113 if (Subtarget.hasVInstructions()) {
114 auto addRegClassForRVV = [this](MVT VT) {
115 // Disable the smallest fractional LMUL types if ELEN is less than
117 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
118 if (VT.getVectorMinNumElements() < MinElts)
121 unsigned Size = VT.getSizeInBits().getKnownMinValue();
122 const TargetRegisterClass *RC;
123 if (Size <= RISCV::RVVBitsPerBlock)
124 RC = &RISCV::VRRegClass;
125 else if (Size == 2 * RISCV::RVVBitsPerBlock)
126 RC = &RISCV::VRM2RegClass;
127 else if (Size == 4 * RISCV::RVVBitsPerBlock)
128 RC = &RISCV::VRM4RegClass;
129 else if (Size == 8 * RISCV::RVVBitsPerBlock)
130 RC = &RISCV::VRM8RegClass;
132 llvm_unreachable("Unexpected size");
134 addRegisterClass(VT, RC);
137 for (MVT VT : BoolVecVTs)
138 addRegClassForRVV(VT);
139 for (MVT VT : IntVecVTs) {
140 if (VT.getVectorElementType() == MVT::i64 &&
141 !Subtarget.hasVInstructionsI64())
143 addRegClassForRVV(VT);
146 if (Subtarget.hasVInstructionsF16())
147 for (MVT VT : F16VecVTs)
148 addRegClassForRVV(VT);
150 if (Subtarget.hasVInstructionsF32())
151 for (MVT VT : F32VecVTs)
152 addRegClassForRVV(VT);
154 if (Subtarget.hasVInstructionsF64())
155 for (MVT VT : F64VecVTs)
156 addRegClassForRVV(VT);
158 if (Subtarget.useRVVForFixedLengthVectors()) {
159 auto addRegClassForFixedVectors = [this](MVT VT) {
160 MVT ContainerVT = getContainerForFixedLengthVector(VT);
161 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
162 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
163 addRegisterClass(VT, TRI.getRegClass(RCID));
165 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
166 if (useRVVForFixedLengthVectorVT(VT))
167 addRegClassForFixedVectors(VT);
169 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
170 if (useRVVForFixedLengthVectorVT(VT))
171 addRegClassForFixedVectors(VT);
175 // Compute derived properties from the register classes.
176 computeRegisterProperties(STI.getRegisterInfo());
178 setStackPointerRegisterToSaveRestore(RISCV::X2);
180 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
183 // TODO: add all necessary setOperationAction calls.
184 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
186 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
187 setOperationAction(ISD::BR_CC, XLenVT, Expand);
188 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
189 setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
191 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
193 setOperationAction(ISD::VASTART, MVT::Other, Custom);
194 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
196 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
198 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
200 if (!Subtarget.hasStdExtZbb())
201 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
203 if (Subtarget.is64Bit()) {
204 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
206 setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
209 setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
213 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
215 setLibcallName(RTLIB::MULO_I64, nullptr);
218 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
219 setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
221 if (Subtarget.is64Bit()) {
222 setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
224 setOperationAction(ISD::MUL, MVT::i64, Custom);
228 if (!Subtarget.hasStdExtM()) {
229 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
232 if (Subtarget.is64Bit()) {
233 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
234 {MVT::i8, MVT::i16, MVT::i32}, Custom);
239 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
242 setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
245 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
246 Subtarget.hasStdExtZbkb()) {
247 if (Subtarget.is64Bit())
248 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
250 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
253 if (Subtarget.hasStdExtZbp()) {
254 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
256 setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, XLenVT, Custom);
258 // BSWAP i8 doesn't exist.
259 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
261 setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, MVT::i16, Custom);
263 if (Subtarget.is64Bit())
264 setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, MVT::i32, Custom);
266 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
267 // pattern match it directly in isel.
268 setOperationAction(ISD::BSWAP, XLenVT,
269 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
272 // Zbkb can use rev8+brev8 to implement bitreverse.
273 setOperationAction(ISD::BITREVERSE, XLenVT,
274 Subtarget.hasStdExtZbkb() ? Custom : Expand);
277 if (Subtarget.hasStdExtZbb()) {
278 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
281 if (Subtarget.is64Bit())
283 {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
286 setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
288 if (Subtarget.is64Bit())
289 setOperationAction(ISD::ABS, MVT::i32, Custom);
292 if (Subtarget.hasStdExtZbt()) {
293 setOperationAction({ISD::FSHL, ISD::FSHR}, XLenVT, Custom);
294 setOperationAction(ISD::SELECT, XLenVT, Legal);
296 if (Subtarget.is64Bit())
297 setOperationAction({ISD::FSHL, ISD::FSHR}, MVT::i32, Custom);
299 setOperationAction(ISD::SELECT, XLenVT, Custom);
302 static const unsigned FPLegalNodeTypes[] = {
303 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
304 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
305 ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
306 ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD,
307 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
308 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
310 static const ISD::CondCode FPCCToExpand[] = {
311 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
312 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
313 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
315 static const unsigned FPOpToExpand[] = {
316 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
317 ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
319 if (Subtarget.hasStdExtZfh())
320 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
322 if (Subtarget.hasStdExtZfh()) {
323 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
324 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
325 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
326 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
327 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
328 setOperationAction(ISD::SELECT, MVT::f16, Custom);
329 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
331 setOperationAction({ISD::FREM, ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT,
332 ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, ISD::FTRUNC,
333 ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN,
334 ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FLOG,
335 ISD::FLOG2, ISD::FLOG10},
338 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
339 // complete support for all operations in LegalizeDAG.
341 // We need to custom promote this.
342 if (Subtarget.is64Bit())
343 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
346 if (Subtarget.hasStdExtF()) {
347 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
348 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
349 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
350 setOperationAction(ISD::SELECT, MVT::f32, Custom);
351 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
352 setOperationAction(FPOpToExpand, MVT::f32, Expand);
353 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
354 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
357 if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
358 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
360 if (Subtarget.hasStdExtD()) {
361 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
362 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
363 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
364 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
365 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
366 setOperationAction(ISD::SELECT, MVT::f64, Custom);
367 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
368 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
369 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
370 setOperationAction(FPOpToExpand, MVT::f64, Expand);
371 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
372 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
375 if (Subtarget.is64Bit())
376 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
377 ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
380 if (Subtarget.hasStdExtF()) {
381 setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
384 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
385 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
388 setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
389 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
392 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
396 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
398 if (Subtarget.is64Bit())
399 setOperationAction(ISD::Constant, MVT::i64, Custom);
401 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
402 // Unfortunately this can't be determined just from the ISA naming string.
403 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
404 Subtarget.is64Bit() ? Legal : Custom);
406 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
407 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
408 if (Subtarget.is64Bit())
409 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
411 if (Subtarget.hasStdExtA()) {
412 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
413 setMinCmpXchgSizeInBits(32);
415 setMaxAtomicSizeInBitsSupported(0);
418 setBooleanContents(ZeroOrOneBooleanContent);
420 if (Subtarget.hasVInstructions()) {
421 setBooleanVectorContents(ZeroOrOneBooleanContent);
423 setOperationAction(ISD::VSCALE, XLenVT, Custom);
425 // RVV intrinsics may have illegal operands.
426 // We also need to custom legalize vmv.x.s.
427 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
428 {MVT::i8, MVT::i16}, Custom);
429 if (Subtarget.is64Bit())
430 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
432 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
435 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
438 static const unsigned IntegerVPOps[] = {
439 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
440 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
441 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
442 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
443 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
444 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
445 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
446 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
447 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
448 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE};
450 static const unsigned FloatingPointVPOps[] = {
451 ISD::VP_FADD, ISD::VP_FSUB,
452 ISD::VP_FMUL, ISD::VP_FDIV,
453 ISD::VP_FNEG, ISD::VP_FMA,
454 ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
455 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX,
456 ISD::VP_MERGE, ISD::VP_SELECT,
457 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
458 ISD::VP_SETCC, ISD::VP_FP_ROUND,
461 static const unsigned IntegerVecReduceOps[] = {
462 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
463 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
464 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
466 static const unsigned FloatingPointVecReduceOps[] = {
467 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
468 ISD::VECREDUCE_FMAX};
470 if (!Subtarget.is64Bit()) {
471 // We must custom-lower certain vXi64 operations on RV32 due to the vector
472 // element type being illegal.
473 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
476 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
478 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
479 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
480 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
481 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
485 for (MVT VT : BoolVecVTs) {
486 if (!isTypeLegal(VT))
489 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
491 // Mask VTs are custom-expanded into a series of standard nodes
492 setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
493 ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
496 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
499 setOperationAction(ISD::SELECT, VT, Custom);
501 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
504 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
507 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
511 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
514 // RVV has native int->float & float->int conversions where the
515 // element type sizes are within one power-of-two of each other. Any
516 // wider distances between type sizes have to be lowered as sequences
517 // which progressively narrow the gap in stages.
519 {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
522 // Expand all extending loads to types larger than this, and truncating
523 // stores from types larger than this.
524 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
525 setTruncStoreAction(OtherVT, VT, Expand);
526 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
530 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
531 ISD::VP_TRUNCATE, ISD::VP_SETCC},
533 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
535 setOperationPromotedToType(
536 ISD::VECTOR_SPLICE, VT,
537 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
540 for (MVT VT : IntVecVTs) {
541 if (!isTypeLegal(VT))
544 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
545 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
547 // Vectors implement MULHS/MULHU.
548 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
550 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
551 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
552 setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
554 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
557 setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
559 setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP, ISD::BSWAP}, VT,
562 setOperationAction(ISD::BSWAP, VT, Expand);
564 // Custom-lower extensions and truncations from/to mask types.
565 setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
568 // RVV has native int->float & float->int conversions where the
569 // element type sizes are within one power-of-two of each other. Any
570 // wider distances between type sizes have to be lowered as sequences
571 // which progressively narrow the gap in stages.
573 {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
577 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
579 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
580 // nodes which truncate by one power of two at a time.
581 setOperationAction(ISD::TRUNCATE, VT, Custom);
583 // Custom-lower insert/extract operations to simplify patterns.
584 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
587 // Custom-lower reduction operations to set up the corresponding custom
589 setOperationAction(IntegerVecReduceOps, VT, Custom);
591 setOperationAction(IntegerVPOps, VT, Custom);
593 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
595 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
599 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
600 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
604 {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
607 setOperationAction(ISD::SELECT, VT, Custom);
608 setOperationAction(ISD::SELECT_CC, VT, Expand);
610 setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
612 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
613 setTruncStoreAction(VT, OtherVT, Expand);
614 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
619 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
621 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
622 // type that can represent the value exactly.
623 if (VT.getVectorElementType() != MVT::i64) {
625 VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
626 EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
627 if (isTypeLegal(FloatVT)) {
628 setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
634 // Expand various CCs to best match the RVV ISA, which natively supports UNE
635 // but no other unordered comparisons, and supports all ordered comparisons
636 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
637 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
638 // and we pattern-match those back to the "original", swapping operands once
639 // more. This way we catch both operations and both "vf" and "fv" forms with
641 static const ISD::CondCode VFPCCToExpand[] = {
642 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
643 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
644 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
647 // Sets common operation actions on RVV floating-point vector types.
648 const auto SetCommonVFPActions = [&](MVT VT) {
649 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
650 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
651 // sizes are within one power-of-two of each other. Therefore conversions
652 // between vXf16 and vXf64 must be lowered as sequences which convert via
654 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
655 // Custom-lower insert/extract operations to simplify patterns.
656 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
658 // Expand various condition codes (explained above).
659 setCondCodeAction(VFPCCToExpand, VT, Expand);
661 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
663 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND},
666 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
668 // Expand FP operations that need libcalls.
669 setOperationAction(ISD::FREM, VT, Expand);
670 setOperationAction(ISD::FPOW, VT, Expand);
671 setOperationAction(ISD::FCOS, VT, Expand);
672 setOperationAction(ISD::FSIN, VT, Expand);
673 setOperationAction(ISD::FSINCOS, VT, Expand);
674 setOperationAction(ISD::FEXP, VT, Expand);
675 setOperationAction(ISD::FEXP2, VT, Expand);
676 setOperationAction(ISD::FLOG, VT, Expand);
677 setOperationAction(ISD::FLOG2, VT, Expand);
678 setOperationAction(ISD::FLOG10, VT, Expand);
679 setOperationAction(ISD::FRINT, VT, Expand);
680 setOperationAction(ISD::FNEARBYINT, VT, Expand);
682 setOperationAction(ISD::FCOPYSIGN, VT, Legal);
684 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
686 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
690 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
691 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
694 setOperationAction(ISD::SELECT, VT, Custom);
695 setOperationAction(ISD::SELECT_CC, VT, Expand);
698 {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
701 setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
703 setOperationAction(FloatingPointVPOps, VT, Custom);
706 // Sets common extload/truncstore actions on RVV floating-point vector
708 const auto SetCommonVFPExtLoadTruncStoreActions =
709 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
710 for (auto SmallVT : SmallerVTs) {
711 setTruncStoreAction(VT, SmallVT, Expand);
712 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
716 if (Subtarget.hasVInstructionsF16()) {
717 for (MVT VT : F16VecVTs) {
718 if (!isTypeLegal(VT))
720 SetCommonVFPActions(VT);
724 if (Subtarget.hasVInstructionsF32()) {
725 for (MVT VT : F32VecVTs) {
726 if (!isTypeLegal(VT))
728 SetCommonVFPActions(VT);
729 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
733 if (Subtarget.hasVInstructionsF64()) {
734 for (MVT VT : F64VecVTs) {
735 if (!isTypeLegal(VT))
737 SetCommonVFPActions(VT);
738 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
739 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
743 if (Subtarget.useRVVForFixedLengthVectors()) {
744 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
745 if (!useRVVForFixedLengthVectorVT(VT))
748 // By default everything must be expanded.
749 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
750 setOperationAction(Op, VT, Expand);
751 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
752 setTruncStoreAction(VT, OtherVT, Expand);
753 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD},
754 OtherVT, VT, Expand);
757 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
758 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
761 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
764 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
767 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
769 setOperationAction(ISD::SETCC, VT, Custom);
771 setOperationAction(ISD::SELECT, VT, Custom);
773 setOperationAction(ISD::TRUNCATE, VT, Custom);
775 setOperationAction(ISD::BITCAST, VT, Custom);
778 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
782 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
785 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
789 // Operations below are different for between masks and other vectors.
790 if (VT.getVectorElementType() == MVT::i1) {
791 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
795 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
796 ISD::VP_SETCC, ISD::VP_TRUNCATE},
801 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
802 // it before type legalization for i64 vectors on RV32. It will then be
803 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
804 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
805 // improvements first.
806 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
807 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
808 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
811 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
812 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
815 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
817 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
818 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
819 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
823 setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
824 ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
825 ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
829 {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
831 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
832 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
833 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
836 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
839 setOperationAction(ISD::VSELECT, VT, Custom);
840 setOperationAction(ISD::SELECT_CC, VT, Expand);
843 {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
845 // Custom-lower reduction operations to set up the corresponding custom
847 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
848 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
849 ISD::VECREDUCE_UMIN},
852 setOperationAction(IntegerVPOps, VT, Custom);
854 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
855 // type that can represent the value exactly.
856 if (VT.getVectorElementType() != MVT::i64) {
858 VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
860 MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
861 if (isTypeLegal(FloatVT))
862 setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
867 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
868 if (!useRVVForFixedLengthVectorVT(VT))
871 // By default everything must be expanded.
872 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
873 setOperationAction(Op, VT, Expand);
874 for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
875 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
876 setTruncStoreAction(VT, OtherVT, Expand);
879 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
880 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
883 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
884 ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
885 ISD::EXTRACT_VECTOR_ELT},
888 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
889 ISD::MGATHER, ISD::MSCATTER},
892 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
893 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
894 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
898 setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
899 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
900 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM},
903 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
905 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND},
908 setCondCodeAction(VFPCCToExpand, VT, Expand);
910 setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
911 setOperationAction(ISD::SELECT_CC, VT, Expand);
913 setOperationAction(ISD::BITCAST, VT, Custom);
915 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
917 setOperationAction(FloatingPointVPOps, VT, Custom);
920 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
921 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
923 if (Subtarget.hasStdExtZfh())
924 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
925 if (Subtarget.hasStdExtF())
926 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
927 if (Subtarget.hasStdExtD())
928 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
932 // Function alignments.
933 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
934 setMinFunctionAlignment(FunctionAlignment);
935 setPrefFunctionAlignment(FunctionAlignment);
937 setMinimumJumpTableEntries(5);
939 // Jumps are expensive, compared to logic
940 setJumpIsExpensive();
942 setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
943 ISD::OR, ISD::XOR, ISD::SETCC});
944 if (Subtarget.is64Bit())
945 setTargetDAGCombine(ISD::SRA);
947 if (Subtarget.hasStdExtF())
948 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
950 if (Subtarget.hasStdExtZbp())
951 setTargetDAGCombine({ISD::ROTL, ISD::ROTR});
953 if (Subtarget.hasStdExtZbb())
954 setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
956 if (Subtarget.hasStdExtZbkb())
957 setTargetDAGCombine(ISD::BITREVERSE);
958 if (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZbb())
959 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
960 if (Subtarget.hasStdExtF())
961 setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
962 ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
963 if (Subtarget.hasVInstructions())
964 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
965 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
966 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR});
967 if (Subtarget.useRVVForFixedLengthVectors())
968 setTargetDAGCombine(ISD::BITCAST);
970 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
971 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
974 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
975 LLVMContext &Context,
978 return getPointerTy(DL);
979 if (Subtarget.hasVInstructions() &&
980 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
981 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
982 return VT.changeVectorElementTypeToInteger();
985 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
986 return Subtarget.getXLenVT();
989 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
992 unsigned Intrinsic) const {
993 auto &DL = I.getModule()->getDataLayout();
997 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
998 case Intrinsic::riscv_masked_atomicrmw_add_i32:
999 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1000 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1001 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1002 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1003 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1004 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1005 case Intrinsic::riscv_masked_cmpxchg_i32:
1006 Info.opc = ISD::INTRINSIC_W_CHAIN;
1007 Info.memVT = MVT::i32;
1008 Info.ptrVal = I.getArgOperand(0);
1010 Info.align = Align(4);
1011 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1012 MachineMemOperand::MOVolatile;
1014 case Intrinsic::riscv_masked_strided_load:
1015 Info.opc = ISD::INTRINSIC_W_CHAIN;
1016 Info.ptrVal = I.getArgOperand(1);
1017 Info.memVT = getValueType(DL, I.getType()->getScalarType());
1018 Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
1019 Info.size = MemoryLocation::UnknownSize;
1020 Info.flags |= MachineMemOperand::MOLoad;
1022 case Intrinsic::riscv_masked_strided_store:
1023 Info.opc = ISD::INTRINSIC_VOID;
1024 Info.ptrVal = I.getArgOperand(1);
1026 getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
1028 DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
1030 Info.size = MemoryLocation::UnknownSize;
1031 Info.flags |= MachineMemOperand::MOStore;
1033 case Intrinsic::riscv_seg2_load:
1034 case Intrinsic::riscv_seg3_load:
1035 case Intrinsic::riscv_seg4_load:
1036 case Intrinsic::riscv_seg5_load:
1037 case Intrinsic::riscv_seg6_load:
1038 case Intrinsic::riscv_seg7_load:
1039 case Intrinsic::riscv_seg8_load:
1040 Info.opc = ISD::INTRINSIC_W_CHAIN;
1041 Info.ptrVal = I.getArgOperand(0);
1043 getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
1045 Align(DL.getTypeSizeInBits(
1046 I.getType()->getStructElementType(0)->getScalarType()) /
1048 Info.size = MemoryLocation::UnknownSize;
1049 Info.flags |= MachineMemOperand::MOLoad;
1054 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1055 const AddrMode &AM, Type *Ty,
1057 Instruction *I) const {
1058 // No global is ever allowed as a base.
1062 // RVV instructions only support register addressing.
1063 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1064 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1066 // Require a 12-bit signed offset.
1067 if (!isInt<12>(AM.BaseOffs))
1071 case 0: // "r+i" or just "i", depending on HasBaseReg.
1074 if (!AM.HasBaseReg) // allow "r+i".
1076 return false; // disallow "r+r" or "r+r+i".
1084 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1085 return isInt<12>(Imm);
1088 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1089 return isInt<12>(Imm);
1092 // On RV32, 64-bit integers are split into their high and low parts and held
1093 // in two different registers, so the trunc is free since the low register can
1095 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1096 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1098 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1099 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1100 return (SrcBits == 64 && DestBits == 32);
1103 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1104 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
1105 !SrcVT.isInteger() || !DstVT.isInteger())
1107 unsigned SrcBits = SrcVT.getSizeInBits();
1108 unsigned DestBits = DstVT.getSizeInBits();
1109 return (SrcBits == 64 && DestBits == 32);
1112 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1113 // Zexts are free if they can be combined with a load.
1114 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1115 // poorly with type legalization of compares preferring sext.
1116 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1117 EVT MemVT = LD->getMemoryVT();
1118 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1119 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1120 LD->getExtensionType() == ISD::ZEXTLOAD))
1124 return TargetLowering::isZExtFree(Val, VT2);
1127 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1128 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1131 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
1132 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1135 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
1136 return Subtarget.hasStdExtZbb();
1139 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
1140 return Subtarget.hasStdExtZbb();
1143 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1144 EVT VT = Y.getValueType();
1146 // FIXME: Support vectors once we have tests.
1150 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
1151 Subtarget.hasStdExtZbkb()) &&
1152 !isa<ConstantSDNode>(Y);
1155 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
1156 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1157 auto *C = dyn_cast<ConstantSDNode>(Y);
1158 return C && C->getAPIntValue().ule(10);
1161 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
1163 assert(Ty->isIntegerTy());
1165 unsigned BitSize = Ty->getIntegerBitWidth();
1166 if (BitSize > Subtarget.getXLen())
1169 // Fast path, assume 32-bit immediates are cheap.
1170 int64_t Val = Imm.getSExtValue();
1174 // A constant pool entry may be more aligned thant he load we're trying to
1175 // replace. If we don't support unaligned scalar mem, prefer the constant
1177 // TODO: Can the caller pass down the alignment?
1178 if (!Subtarget.enableUnalignedScalarMem())
1181 // Prefer to keep the load if it would require many instructions.
1182 // This uses the same threshold we use for constant pools but doesn't
1183 // check useConstantPoolForLargeInts.
1184 // TODO: Should we keep the load only when we're definitely going to emit a
1187 RISCVMatInt::InstSeq Seq =
1188 RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
1189 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1192 bool RISCVTargetLowering::
1193 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1194 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1195 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1196 SelectionDAG &DAG) const {
1197 // One interesting pattern that we'd want to form is 'bit extract':
1198 // ((1 >> Y) & 1) ==/!= 0
1199 // But we also need to be careful not to try to reverse that fold.
1201 // Is this '((1 >> Y) & 1)'?
1202 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1203 return false; // Keep the 'bit extract' pattern.
1205 // Will this be '((1 >> Y) & 1)' after the transform?
1206 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1207 return true; // Do form the 'bit extract' pattern.
1209 // If 'X' is a constant, and we transform, then we will immediately
1210 // try to undo the fold, thus causing endless combine loop.
1211 // So only do the transform if X is not a constant. This matches the default
1212 // implementation of this function.
1216 /// Check if sinking \p I's operands to I's basic block is profitable, because
1217 /// the operands can be folded into a target instruction, e.g.
1218 /// splats of scalars can fold into vector instructions.
1219 bool RISCVTargetLowering::shouldSinkOperands(
1220 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1221 using namespace llvm::PatternMatch;
1223 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1226 auto IsSinker = [&](Instruction *I, int Operand) {
1227 switch (I->getOpcode()) {
1228 case Instruction::Add:
1229 case Instruction::Sub:
1230 case Instruction::Mul:
1231 case Instruction::And:
1232 case Instruction::Or:
1233 case Instruction::Xor:
1234 case Instruction::FAdd:
1235 case Instruction::FSub:
1236 case Instruction::FMul:
1237 case Instruction::FDiv:
1238 case Instruction::ICmp:
1239 case Instruction::FCmp:
1241 case Instruction::Shl:
1242 case Instruction::LShr:
1243 case Instruction::AShr:
1244 case Instruction::UDiv:
1245 case Instruction::SDiv:
1246 case Instruction::URem:
1247 case Instruction::SRem:
1248 return Operand == 1;
1249 case Instruction::Call:
1250 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1251 switch (II->getIntrinsicID()) {
1252 case Intrinsic::fma:
1253 case Intrinsic::vp_fma:
1254 return Operand == 0 || Operand == 1;
1255 // FIXME: Our patterns can only match vx/vf instructions when the splat
1256 // it on the RHS, because TableGen doesn't recognize our VP operations
1258 case Intrinsic::vp_add:
1259 case Intrinsic::vp_mul:
1260 case Intrinsic::vp_and:
1261 case Intrinsic::vp_or:
1262 case Intrinsic::vp_xor:
1263 case Intrinsic::vp_fadd:
1264 case Intrinsic::vp_fmul:
1265 case Intrinsic::vp_shl:
1266 case Intrinsic::vp_lshr:
1267 case Intrinsic::vp_ashr:
1268 case Intrinsic::vp_udiv:
1269 case Intrinsic::vp_sdiv:
1270 case Intrinsic::vp_urem:
1271 case Intrinsic::vp_srem:
1272 return Operand == 1;
1273 // ... with the exception of vp.sub/vp.fsub/vp.fdiv, which have
1274 // explicit patterns for both LHS and RHS (as 'vr' versions).
1275 case Intrinsic::vp_sub:
1276 case Intrinsic::vp_fsub:
1277 case Intrinsic::vp_fdiv:
1278 return Operand == 0 || Operand == 1;
1289 for (auto OpIdx : enumerate(I->operands())) {
1290 if (!IsSinker(I, OpIdx.index()))
1293 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1294 // Make sure we are not already sinking this operand
1295 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1298 // We are looking for a splat that can be sunk.
1299 if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
1300 m_Undef(), m_ZeroMask())))
1303 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1304 // and vector registers
1305 for (Use &U : Op->uses()) {
1306 Instruction *Insn = cast<Instruction>(U.getUser());
1307 if (!IsSinker(Insn, U.getOperandNo()))
1311 Ops.push_back(&Op->getOperandUse(0));
1312 Ops.push_back(&OpIdx.value());
1317 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
1318 unsigned Opc = VecOp.getOpcode();
1320 // Assume target opcodes can't be scalarized.
1321 // TODO - do we have any exceptions?
1322 if (Opc >= ISD::BUILTIN_OP_END)
1325 // If the vector op is not supported, try to convert to scalar.
1326 EVT VecVT = VecOp.getValueType();
1327 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
1330 // If the vector op is supported, but the scalar op is not, the transform may
1331 // not be worthwhile.
1332 EVT ScalarVT = VecVT.getScalarType();
1333 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
1336 bool RISCVTargetLowering::isOffsetFoldingLegal(
1337 const GlobalAddressSDNode *GA) const {
1338 // In order to maximise the opportunity for common subexpression elimination,
1339 // keep a separate ADD node for the global address offset instead of folding
1340 // it in the global address node. Later peephole optimisations may choose to
1341 // fold it back in when profitable.
1345 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1346 bool ForCodeSize) const {
1347 // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1348 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1350 if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1352 if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1354 return Imm.isZero();
1357 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
1358 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1359 (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1360 (VT == MVT::f64 && Subtarget.hasStdExtD());
1363 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1366 // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1367 // We might still end up using a GPR but that will be decided based on ABI.
1368 // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1369 if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1372 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1375 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1378 // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1379 // We might still end up using a GPR but that will be decided based on ABI.
1380 // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1381 if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1384 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1387 // Changes the condition code and swaps operands if necessary, so the SetCC
1388 // operation matches one of the comparisons supported directly by branches
1389 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1391 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1392 ISD::CondCode &CC, SelectionDAG &DAG) {
1393 // If this is a single bit test that can't be handled by ANDI, shift the
1394 // bit to be tested to the MSB and perform a signed compare with 0.
1395 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1396 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1397 isa<ConstantSDNode>(LHS.getOperand(1))) {
1398 uint64_t Mask = LHS.getConstantOperandVal(1);
1399 if (isPowerOf2_64(Mask) && !isInt<12>(Mask)) {
1400 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1401 unsigned ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1402 LHS = LHS.getOperand(0);
1404 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1405 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1410 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1411 int64_t C = RHSC->getSExtValue();
1415 // Convert X > -1 to X >= 0.
1417 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1423 // Convert X < 1 to 0 <= X.
1426 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1441 CC = ISD::getSetCCSwappedOperands(CC);
1442 std::swap(LHS, RHS);
1447 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
1448 assert(VT.isScalableVector() && "Expecting a scalable vector type");
1449 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1450 if (VT.getVectorElementType() == MVT::i1)
1453 switch (KnownSize) {
1455 llvm_unreachable("Invalid LMUL.");
1457 return RISCVII::VLMUL::LMUL_F8;
1459 return RISCVII::VLMUL::LMUL_F4;
1461 return RISCVII::VLMUL::LMUL_F2;
1463 return RISCVII::VLMUL::LMUL_1;
1465 return RISCVII::VLMUL::LMUL_2;
1467 return RISCVII::VLMUL::LMUL_4;
1469 return RISCVII::VLMUL::LMUL_8;
1473 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
1476 llvm_unreachable("Invalid LMUL.");
1477 case RISCVII::VLMUL::LMUL_F8:
1478 case RISCVII::VLMUL::LMUL_F4:
1479 case RISCVII::VLMUL::LMUL_F2:
1480 case RISCVII::VLMUL::LMUL_1:
1481 return RISCV::VRRegClassID;
1482 case RISCVII::VLMUL::LMUL_2:
1483 return RISCV::VRM2RegClassID;
1484 case RISCVII::VLMUL::LMUL_4:
1485 return RISCV::VRM4RegClassID;
1486 case RISCVII::VLMUL::LMUL_8:
1487 return RISCV::VRM8RegClassID;
1491 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
1492 RISCVII::VLMUL LMUL = getLMUL(VT);
1493 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1494 LMUL == RISCVII::VLMUL::LMUL_F4 ||
1495 LMUL == RISCVII::VLMUL::LMUL_F2 ||
1496 LMUL == RISCVII::VLMUL::LMUL_1) {
1497 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1498 "Unexpected subreg numbering");
1499 return RISCV::sub_vrm1_0 + Index;
1501 if (LMUL == RISCVII::VLMUL::LMUL_2) {
1502 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1503 "Unexpected subreg numbering");
1504 return RISCV::sub_vrm2_0 + Index;
1506 if (LMUL == RISCVII::VLMUL::LMUL_4) {
1507 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1508 "Unexpected subreg numbering");
1509 return RISCV::sub_vrm4_0 + Index;
1511 llvm_unreachable("Invalid vector type.");
1514 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
1515 if (VT.getVectorElementType() == MVT::i1)
1516 return RISCV::VRRegClassID;
1517 return getRegClassIDForLMUL(getLMUL(VT));
1520 // Attempt to decompose a subvector insert/extract between VecVT and
1521 // SubVecVT via subregister indices. Returns the subregister index that
1522 // can perform the subvector insert/extract with the given element index, as
1523 // well as the index corresponding to any leftover subvectors that must be
1524 // further inserted/extracted within the register class for SubVecVT.
1525 std::pair<unsigned, unsigned>
1526 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1527 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1528 const RISCVRegisterInfo *TRI) {
1529 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1530 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1531 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1532 "Register classes not ordered");
1533 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1534 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1535 // Try to compose a subregister index that takes us from the incoming
1536 // LMUL>1 register class down to the outgoing one. At each step we half
1538 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1539 // Note that this is not guaranteed to find a subregister index, such as
1540 // when we are extracting from one VR type to another.
1541 unsigned SubRegIdx = RISCV::NoSubRegister;
1542 for (const unsigned RCID :
1543 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1544 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1545 VecVT = VecVT.getHalfNumVectorElementsVT();
1547 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1548 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1549 getSubregIndexByMVT(VecVT, IsHi));
1551 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1553 return {SubRegIdx, InsertExtractIdx};
1556 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1557 // stores for those types.
1558 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1559 return !Subtarget.useRVVForFixedLengthVectors() ||
1560 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
1563 bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
1564 if (ScalarTy->isPointerTy())
1567 if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1568 ScalarTy->isIntegerTy(32))
1571 if (ScalarTy->isIntegerTy(64))
1572 return Subtarget.hasVInstructionsI64();
1574 if (ScalarTy->isHalfTy())
1575 return Subtarget.hasVInstructionsF16();
1576 if (ScalarTy->isFloatTy())
1577 return Subtarget.hasVInstructionsF32();
1578 if (ScalarTy->isDoubleTy())
1579 return Subtarget.hasVInstructionsF64();
1584 static SDValue getVLOperand(SDValue Op) {
1585 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1586 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
1587 "Unexpected opcode");
1588 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
1589 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
1590 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1591 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
1594 return Op.getOperand(II->VLOperand + 1 + HasChain);
1597 static bool useRVVForFixedLengthVectorVT(MVT VT,
1598 const RISCVSubtarget &Subtarget) {
1599 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1600 if (!Subtarget.useRVVForFixedLengthVectors())
1603 // We only support a set of vector types with a consistent maximum fixed size
1604 // across all supported vector element types to avoid legalization issues.
1605 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1606 // fixed-length vector type we support is 1024 bytes.
1607 if (VT.getFixedSizeInBits() > 1024 * 8)
1610 unsigned MinVLen = Subtarget.getRealMinVLen();
1612 MVT EltVT = VT.getVectorElementType();
1614 // Don't use RVV for vectors we cannot scalarize if required.
1615 switch (EltVT.SimpleTy) {
1616 // i1 is supported but has different rules.
1620 // Masks can only use a single register.
1621 if (VT.getVectorNumElements() > MinVLen)
1630 if (!Subtarget.hasVInstructionsI64())
1634 if (!Subtarget.hasVInstructionsF16())
1638 if (!Subtarget.hasVInstructionsF32())
1642 if (!Subtarget.hasVInstructionsF64())
1647 // Reject elements larger than ELEN.
1648 if (EltVT.getSizeInBits() > Subtarget.getELEN())
1651 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1652 // Don't use RVV for types that don't fit.
1653 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1656 // TODO: Perhaps an artificial restriction, but worth having whilst getting
1657 // the base fixed length RVV support in place.
1658 if (!VT.isPow2VectorType())
1664 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1665 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
1668 // Return the largest legal scalable vector type that matches VT's element type.
1669 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
1670 const RISCVSubtarget &Subtarget) {
1671 // This may be called before legal types are setup.
1672 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1673 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1674 "Expected legal fixed length vector!");
1676 unsigned MinVLen = Subtarget.getRealMinVLen();
1677 unsigned MaxELen = Subtarget.getELEN();
1679 MVT EltVT = VT.getVectorElementType();
1680 switch (EltVT.SimpleTy) {
1682 llvm_unreachable("unexpected element type for RVV container");
1691 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1692 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1693 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1695 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1696 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1697 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1698 return MVT::getScalableVectorVT(EltVT, NumElts);
1703 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
1704 const RISCVSubtarget &Subtarget) {
1705 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
1709 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
1710 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
1713 // Grow V to consume an entire RVV register.
1714 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1715 const RISCVSubtarget &Subtarget) {
1716 assert(VT.isScalableVector() &&
1717 "Expected to convert into a scalable vector!");
1718 assert(V.getValueType().isFixedLengthVector() &&
1719 "Expected a fixed length vector operand!");
1721 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1722 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1725 // Shrink V so it's just big enough to maintain a VT's worth of data.
1726 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
1727 const RISCVSubtarget &Subtarget) {
1728 assert(VT.isFixedLengthVector() &&
1729 "Expected to convert into a fixed length vector!");
1730 assert(V.getValueType().isScalableVector() &&
1731 "Expected a scalable vector operand!");
1733 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1734 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1737 /// Return the type of the mask type suitable for masking the provided
1738 /// vector type. This is simply an i1 element type vector of the same
1739 /// (possibly scalable) length.
1740 static MVT getMaskTypeFor(MVT VecVT) {
1741 assert(VecVT.isVector());
1742 ElementCount EC = VecVT.getVectorElementCount();
1743 return MVT::getVectorVT(MVT::i1, EC);
1746 /// Creates an all ones mask suitable for masking a vector of type VecTy with
1747 /// vector length VL. .
1748 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, SDLoc DL,
1749 SelectionDAG &DAG) {
1750 MVT MaskVT = getMaskTypeFor(VecVT);
1751 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1754 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1755 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1756 // the vector type that it is contained in.
1757 static std::pair<SDValue, SDValue>
1758 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1759 const RISCVSubtarget &Subtarget) {
1760 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1761 MVT XLenVT = Subtarget.getXLenVT();
1762 SDValue VL = VecVT.isFixedLengthVector()
1763 ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1764 : DAG.getRegister(RISCV::X0, XLenVT);
1765 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
1769 // As above but assuming the given type is a scalable vector type.
1770 static std::pair<SDValue, SDValue>
1771 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
1772 const RISCVSubtarget &Subtarget) {
1773 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1774 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1777 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1778 // of either is (currently) supported. This can get us into an infinite loop
1779 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1781 // Until either (or both) of these can reliably lower any node, reporting that
1782 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1783 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1784 // which is not desirable.
1785 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
1786 EVT VT, unsigned DefinedValues) const {
1790 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
1791 const RISCVSubtarget &Subtarget) {
1792 // RISCV FP-to-int conversions saturate to the destination register size, but
1793 // don't produce 0 for nan. We can use a conversion instruction and fix the
1794 // nan case with a compare and a select.
1795 SDValue Src = Op.getOperand(0);
1797 EVT DstVT = Op.getValueType();
1798 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1800 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1803 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
1804 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1805 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
1808 // FIXME: Support other SatVTs by clamping before or after the conversion.
1811 SDValue FpToInt = DAG.getNode(
1812 Opc, DL, DstVT, Src,
1813 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
1815 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1816 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
1819 // Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
1820 // and back. Taking care to avoid converting values that are nan or already
1822 // TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
1823 // have FRM dependencies modeled yet.
1824 static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG,
1825 const RISCVSubtarget &Subtarget) {
1826 MVT VT = Op.getSimpleValueType();
1827 assert(VT.isVector() && "Unexpected type");
1831 SDValue Src = Op.getOperand(0);
1833 MVT ContainerVT = VT;
1834 if (VT.isFixedLengthVector()) {
1835 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1836 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
1839 SDValue TrueMask, VL;
1840 std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1842 // Freeze the source since we are increasing the number of uses.
1843 Src = DAG.getFreeze(Src);
1845 // We do the conversion on the absolute value and fix the sign at the end.
1847 DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, TrueMask, VL);
1849 // Determine the largest integer that can be represented exactly. This and
1850 // values larger than it don't have any fractional bits so don't need to
1852 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
1853 unsigned Precision = APFloat::semanticsPrecision(FltSem);
1854 APFloat MaxVal = APFloat(FltSem);
1855 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1856 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1857 SDValue MaxValNode =
1858 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
1859 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
1860 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
1862 // If abs(Src) was larger than MaxVal or nan, keep it.
1863 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1864 SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Abs, MaxValSplat,
1865 DAG.getCondCode(ISD::SETOLT), TrueMask, VL);
1867 // Truncate to integer and convert back to FP.
1868 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
1870 DAG.getNode(RISCVISD::FP_TO_SINT_VL, DL, IntVT, Src, Mask, VL);
1871 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
1874 if (Op.getOpcode() == ISD::FCEIL) {
1875 // If the truncated value is the greater than or equal to the original
1876 // value, we've computed the ceil. Otherwise, we went the wrong way and
1877 // need to increase by 1.
1878 // FIXME: This should use a masked operation. Handle here or in isel?
1880 DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType());
1881 SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
1882 DAG.getUNDEF(ContainerVT), SplatVal, VL);
1883 SDValue NeedAdjust =
1884 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Truncated, Src,
1885 DAG.getCondCode(ISD::SETOLT), Mask, VL);
1886 Truncated = DAG.getNode(RISCVISD::FADD_VL, DL, ContainerVT, Truncated,
1887 Splat, Truncated, NeedAdjust, VL);
1888 } else if (Op.getOpcode() == ISD::FFLOOR) {
1889 // If the truncated value is the less than or equal to the original value,
1890 // we've computed the floor. Otherwise, we went the wrong way and need to
1892 // FIXME: This should use a masked operation. Handle here or in isel?
1894 DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType());
1895 SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
1896 DAG.getUNDEF(ContainerVT), SplatVal, VL);
1897 SDValue NeedAdjust =
1898 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Src, Truncated,
1899 DAG.getCondCode(ISD::SETOLT), Mask, VL);
1900 Truncated = DAG.getNode(RISCVISD::FSUB_VL, DL, ContainerVT, Truncated,
1901 Splat, Truncated, NeedAdjust, VL);
1904 // Restore the original sign so that -0.0 is preserved.
1905 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
1906 Src, Src, Mask, VL);
1908 if (!VT.isFixedLengthVector())
1911 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
1914 // ISD::FROUND is defined to round to nearest with ties rounding away from 0.
1915 // This mode isn't supported in vector hardware on RISCV. But as long as we
1916 // aren't compiling with trapping math, we can emulate this with
1917 // floor(X + copysign(nextafter(0.5, 0.0), X)).
1918 // FIXME: Could be shorter by changing rounding mode, but we don't have FRM
1919 // dependencies modeled yet.
1920 static SDValue lowerFROUND(SDValue Op, SelectionDAG &DAG,
1921 const RISCVSubtarget &Subtarget) {
1922 MVT VT = Op.getSimpleValueType();
1923 assert(VT.isVector() && "Unexpected type");
1927 SDValue Src = Op.getOperand(0);
1929 MVT ContainerVT = VT;
1930 if (VT.isFixedLengthVector()) {
1931 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1932 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
1935 SDValue TrueMask, VL;
1936 std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1938 // Freeze the source since we are increasing the number of uses.
1939 Src = DAG.getFreeze(Src);
1941 // We do the conversion on the absolute value and fix the sign at the end.
1943 DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, TrueMask, VL);
1945 // Determine the largest integer that can be represented exactly. This and
1946 // values larger than it don't have any fractional bits so don't need to
1948 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
1949 unsigned Precision = APFloat::semanticsPrecision(FltSem);
1950 APFloat MaxVal = APFloat(FltSem);
1951 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1952 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1953 SDValue MaxValNode =
1954 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
1955 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
1956 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
1958 // If abs(Src) was larger than MaxVal or nan, keep it.
1959 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1960 SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, Abs, MaxValSplat,
1961 DAG.getCondCode(ISD::SETOLT), TrueMask, VL);
1964 APFloat Point5Pred = APFloat(0.5f);
1965 Point5Pred.convert(FltSem, APFloat::rmNearestTiesToEven, &Ignored);
1966 Point5Pred.next(/*nextDown*/ true);
1968 DAG.getConstantFP(Point5Pred, DL, ContainerVT.getVectorElementType());
1969 SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
1970 DAG.getUNDEF(ContainerVT), SplatVal, VL);
1972 // Add the adjustment.
1973 SDValue Adjust = DAG.getNode(RISCVISD::FADD_VL, DL, ContainerVT, Abs, Splat,
1974 DAG.getUNDEF(ContainerVT), Mask, VL);
1976 // Truncate to integer and convert back to fp.
1977 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
1979 DAG.getNode(RISCVISD::FP_TO_SINT_VL, DL, IntVT, Adjust, Mask, VL);
1981 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
1984 // Restore the original sign and merge the original source to masked off
1986 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
1987 Src, Src, Mask, VL);
1989 if (!VT.isFixedLengthVector())
1992 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
1995 struct VIDSequence {
1996 int64_t StepNumerator;
1997 unsigned StepDenominator;
2001 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
2002 // to the (non-zero) step S and start value X. This can be then lowered as the
2003 // RVV sequence (VID * S) + X, for example.
2004 // The step S is represented as an integer numerator divided by a positive
2005 // denominator. Note that the implementation currently only identifies
2006 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
2007 // cannot detect 2/3, for example.
2008 // Note that this method will also match potentially unappealing index
2009 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
2010 // determine whether this is worth generating code for.
2011 static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
2012 unsigned NumElts = Op.getNumOperands();
2013 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
2014 if (!Op.getValueType().isInteger())
2017 Optional<unsigned> SeqStepDenom;
2018 Optional<int64_t> SeqStepNum, SeqAddend;
2019 Optional<std::pair<uint64_t, unsigned>> PrevElt;
2020 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
2021 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2022 // Assume undef elements match the sequence; we just have to be careful
2023 // when interpolating across them.
2024 if (Op.getOperand(Idx).isUndef())
2026 // The BUILD_VECTOR must be all constants.
2027 if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
2030 uint64_t Val = Op.getConstantOperandVal(Idx) &
2031 maskTrailingOnes<uint64_t>(EltSizeInBits);
2034 // Calculate the step since the last non-undef element, and ensure
2035 // it's consistent across the entire sequence.
2036 unsigned IdxDiff = Idx - PrevElt->second;
2037 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
2039 // A zero-value value difference means that we're somewhere in the middle
2040 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
2041 // step change before evaluating the sequence.
2045 int64_t Remainder = ValDiff % IdxDiff;
2046 // Normalize the step if it's greater than 1.
2047 if (Remainder != ValDiff) {
2048 // The difference must cleanly divide the element span.
2056 SeqStepNum = ValDiff;
2057 else if (ValDiff != SeqStepNum)
2061 SeqStepDenom = IdxDiff;
2062 else if (IdxDiff != *SeqStepDenom)
2066 // Record this non-undef element for later.
2067 if (!PrevElt || PrevElt->first != Val)
2068 PrevElt = std::make_pair(Val, Idx);
2071 // We need to have logged a step for this to count as a legal index sequence.
2072 if (!SeqStepNum || !SeqStepDenom)
2075 // Loop back through the sequence and validate elements we might have skipped
2076 // while waiting for a valid step. While doing this, log any sequence addend.
2077 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2078 if (Op.getOperand(Idx).isUndef())
2080 uint64_t Val = Op.getConstantOperandVal(Idx) &
2081 maskTrailingOnes<uint64_t>(EltSizeInBits);
2082 uint64_t ExpectedVal =
2083 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
2084 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
2087 else if (Addend != SeqAddend)
2091 assert(SeqAddend && "Must have an addend if we have a step");
2093 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
2096 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
2097 // and lower it as a VRGATHER_VX_VL from the source vector.
2098 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
2100 const RISCVSubtarget &Subtarget) {
2101 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2103 SDValue Vec = SplatVal.getOperand(0);
2104 // Only perform this optimization on vectors of the same size for simplicity.
2105 // Don't perform this optimization for i1 vectors.
2106 // FIXME: Support i1 vectors, maybe by promoting to i8?
2107 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
2109 SDValue Idx = SplatVal.getOperand(1);
2110 // The index must be a legal type.
2111 if (Idx.getValueType() != Subtarget.getXLenVT())
2114 MVT ContainerVT = VT;
2115 if (VT.isFixedLengthVector()) {
2116 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2117 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2121 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2123 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
2124 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
2126 if (!VT.isFixedLengthVector())
2129 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2132 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
2133 const RISCVSubtarget &Subtarget) {
2134 MVT VT = Op.getSimpleValueType();
2135 assert(VT.isFixedLengthVector() && "Unexpected vector!");
2137 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2141 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2143 MVT XLenVT = Subtarget.getXLenVT();
2144 unsigned NumElts = Op.getNumOperands();
2146 if (VT.getVectorElementType() == MVT::i1) {
2147 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
2148 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
2149 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
2152 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
2153 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
2154 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
2157 // Lower constant mask BUILD_VECTORs via an integer vector type, in
2158 // scalar integer chunks whose bit-width depends on the number of mask
2160 // First, determine the most appropriate scalar integer type to use. This
2161 // is at most XLenVT, but may be shrunk to a smaller vector element type
2162 // according to the size of the final vector - use i8 chunks rather than
2163 // XLenVT if we're producing a v8i1. This results in more consistent
2164 // codegen across RV32 and RV64.
2165 unsigned NumViaIntegerBits =
2166 std::min(std::max(NumElts, 8u), Subtarget.getXLen());
2167 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
2168 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
2169 // If we have to use more than one INSERT_VECTOR_ELT then this
2170 // optimization is likely to increase code size; avoid peforming it in
2171 // such a case. We can use a load from a constant pool in this case.
2172 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
2174 // Now we can create our integer vector type. Note that it may be larger
2175 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
2176 MVT IntegerViaVecVT =
2177 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
2178 divideCeil(NumElts, NumViaIntegerBits));
2181 unsigned BitPos = 0, IntegerEltIdx = 0;
2182 SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
2184 for (unsigned I = 0; I < NumElts; I++, BitPos++) {
2185 // Once we accumulate enough bits to fill our scalar type, insert into
2186 // our vector and clear our accumulated data.
2187 if (I != 0 && I % NumViaIntegerBits == 0) {
2188 if (NumViaIntegerBits <= 32)
2189 Bits = SignExtend64<32>(Bits);
2190 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2191 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
2192 Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2197 SDValue V = Op.getOperand(I);
2198 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
2199 Bits |= ((uint64_t)BitValue << BitPos);
2202 // Insert the (remaining) scalar value into position in our integer
2204 if (NumViaIntegerBits <= 32)
2205 Bits = SignExtend64<32>(Bits);
2206 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2207 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
2208 DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2210 if (NumElts < NumViaIntegerBits) {
2211 // If we're producing a smaller vector than our minimum legal integer
2212 // type, bitcast to the equivalent (known-legal) mask type, and extract
2214 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
2215 Vec = DAG.getBitcast(MVT::v8i1, Vec);
2216 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
2217 DAG.getConstant(0, DL, XLenVT));
2219 // Else we must have produced an integer type with the same size as the
2220 // mask type; bitcast for the final result.
2221 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
2222 Vec = DAG.getBitcast(VT, Vec);
2228 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
2229 // vector type, we have a legal equivalently-sized i8 type, so we can use
2231 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
2232 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
2235 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2236 // For a splat, perform a scalar truncate before creating the wider
2238 assert(Splat.getValueType() == XLenVT &&
2239 "Unexpected type for i1 splat value");
2240 Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
2241 DAG.getConstant(1, DL, XLenVT));
2242 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
2244 SmallVector<SDValue, 8> Ops(Op->op_values());
2245 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
2246 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
2247 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
2250 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
2253 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2254 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
2256 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
2257 : RISCVISD::VMV_V_X_VL;
2259 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
2260 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2263 // Try and match index sequences, which we can lower to the vid instruction
2264 // with optional modifications. An all-undef vector is matched by
2265 // getSplatValue, above.
2266 if (auto SimpleVID = isSimpleVIDSequence(Op)) {
2267 int64_t StepNumerator = SimpleVID->StepNumerator;
2268 unsigned StepDenominator = SimpleVID->StepDenominator;
2269 int64_t Addend = SimpleVID->Addend;
2271 assert(StepNumerator != 0 && "Invalid step");
2272 bool Negate = false;
2273 int64_t SplatStepVal = StepNumerator;
2274 unsigned StepOpcode = ISD::MUL;
2275 if (StepNumerator != 1) {
2276 if (isPowerOf2_64(std::abs(StepNumerator))) {
2277 Negate = StepNumerator < 0;
2278 StepOpcode = ISD::SHL;
2279 SplatStepVal = Log2_64(std::abs(StepNumerator));
2283 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
2284 // threshold since it's the immediate value many RVV instructions accept.
2285 // There is no vmul.vi instruction so ensure multiply constant can fit in
2286 // a single addi instruction.
2287 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
2288 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
2289 isPowerOf2_32(StepDenominator) &&
2290 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
2291 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
2292 // Convert right out of the scalable type so we can use standard ISD
2293 // nodes for the rest of the computation. If we used scalable types with
2294 // these, we'd lose the fixed-length vector info and generate worse
2296 VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
2297 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
2298 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
2299 SDValue SplatStep = DAG.getSplatBuildVector(
2300 VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
2301 VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
2303 if (StepDenominator != 1) {
2304 SDValue SplatStep = DAG.getSplatBuildVector(
2305 VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
2306 VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
2308 if (Addend != 0 || Negate) {
2309 SDValue SplatAddend = DAG.getSplatBuildVector(
2310 VT, DL, DAG.getConstant(Addend, DL, XLenVT));
2311 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
2317 // Attempt to detect "hidden" splats, which only reveal themselves as splats
2318 // when re-interpreted as a vector with a larger element type. For example,
2319 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
2320 // could be instead splat as
2321 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
2322 // TODO: This optimization could also work on non-constant splats, but it
2323 // would require bit-manipulation instructions to construct the splat value.
2324 SmallVector<SDValue> Sequence;
2325 unsigned EltBitSize = VT.getScalarSizeInBits();
2326 const auto *BV = cast<BuildVectorSDNode>(Op);
2327 if (VT.isInteger() && EltBitSize < 64 &&
2328 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
2329 BV->getRepeatedSequence(Sequence) &&
2330 (Sequence.size() * EltBitSize) <= 64) {
2331 unsigned SeqLen = Sequence.size();
2332 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
2333 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
2334 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
2335 ViaIntVT == MVT::i64) &&
2336 "Unexpected sequence type");
2338 unsigned EltIdx = 0;
2339 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2340 uint64_t SplatValue = 0;
2341 // Construct the amalgamated value which can be splatted as this larger
2343 for (const auto &SeqV : Sequence) {
2344 if (!SeqV.isUndef())
2345 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2346 << (EltIdx * EltBitSize));
2350 // On RV64, sign-extend from 32 to 64 bits where possible in order to
2351 // achieve better constant materializion.
2352 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
2353 SplatValue = SignExtend64<32>(SplatValue);
2355 // Since we can't introduce illegal i64 types at this stage, we can only
2356 // perform an i64 splat on RV32 if it is its own sign-extended value. That
2357 // way we can use RVV instructions to splat.
2358 assert((ViaIntVT.bitsLE(XLenVT) ||
2359 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
2360 "Unexpected bitcast sequence");
2361 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2363 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
2364 MVT ViaContainerVT =
2365 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
2367 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
2368 DAG.getUNDEF(ViaContainerVT),
2369 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
2370 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
2371 return DAG.getBitcast(VT, Splat);
2375 // Try and optimize BUILD_VECTORs with "dominant values" - these are values
2376 // which constitute a large proportion of the elements. In such cases we can
2377 // splat a vector with the dominant element and make up the shortfall with
2378 // INSERT_VECTOR_ELTs.
2379 // Note that this includes vectors of 2 elements by association. The
2380 // upper-most element is the "dominant" one, allowing us to use a splat to
2381 // "insert" the upper element, and an insert of the lower element at position
2382 // 0, which improves codegen.
2383 SDValue DominantValue;
2384 unsigned MostCommonCount = 0;
2385 DenseMap<SDValue, unsigned> ValueCounts;
2386 unsigned NumUndefElts =
2387 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
2389 // Track the number of scalar loads we know we'd be inserting, estimated as
2390 // any non-zero floating-point constant. Other kinds of element are either
2391 // already in registers or are materialized on demand. The threshold at which
2392 // a vector load is more desirable than several scalar materializion and
2393 // vector-insertion instructions is not known.
2394 unsigned NumScalarLoads = 0;
2396 for (SDValue V : Op->op_values()) {
2400 ValueCounts.insert(std::make_pair(V, 0));
2401 unsigned &Count = ValueCounts[V];
2403 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2404 NumScalarLoads += !CFP->isExactlyValue(+0.0);
2406 // Is this value dominant? In case of a tie, prefer the highest element as
2407 // it's cheaper to insert near the beginning of a vector than it is at the
2409 if (++Count >= MostCommonCount) {
2411 MostCommonCount = Count;
2415 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
2416 unsigned NumDefElts = NumElts - NumUndefElts;
2417 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2419 // Don't perform this optimization when optimizing for size, since
2420 // materializing elements and inserting them tends to cause code bloat.
2421 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
2422 ((MostCommonCount > DominantValueCountThreshold) ||
2423 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
2424 // Start by splatting the most common element.
2425 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
2427 DenseSet<SDValue> Processed{DominantValue};
2428 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
2429 for (const auto &OpIdx : enumerate(Op->ops())) {
2430 const SDValue &V = OpIdx.value();
2431 if (V.isUndef() || !Processed.insert(V).second)
2433 if (ValueCounts[V] == 1) {
2434 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
2435 DAG.getConstant(OpIdx.index(), DL, XLenVT));
2437 // Blend in all instances of this value using a VSELECT, using a
2438 // mask where each bit signals whether that element is the one
2440 SmallVector<SDValue> Ops;
2441 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
2442 return DAG.getConstant(V == V1, DL, XLenVT);
2444 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
2445 DAG.getBuildVector(SelMaskTy, DL, Ops),
2446 DAG.getSplatBuildVector(VT, DL, V), Vec);
2456 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2457 SDValue Lo, SDValue Hi, SDValue VL,
2458 SelectionDAG &DAG) {
2460 Passthru = DAG.getUNDEF(VT);
2461 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2462 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2463 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2464 // If Hi constant is all the same sign bit as Lo, lower this as a custom
2465 // node in order to try and match RVV vector/scalar instructions.
2466 if ((LoC >> 31) == HiC)
2467 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
2469 // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
2470 // vmv.v.x whose EEW = 32 to lower it.
2471 auto *Const = dyn_cast<ConstantSDNode>(VL);
2472 if (LoC == HiC && Const && Const->isAllOnesValue()) {
2473 MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
2474 // TODO: if vl <= min(VLMAX), we can also do this. But we could not
2475 // access the subtarget here now.
2476 auto InterVec = DAG.getNode(
2477 RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
2478 DAG.getRegister(RISCV::X0, MVT::i32));
2479 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
2483 // Fall back to a stack store and stride x0 vector load.
2484 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
2488 // Called by type legalization to handle splat of i64 on RV32.
2489 // FIXME: We can optimize this when the type has sign or zero bits in one
2491 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2492 SDValue Scalar, SDValue VL,
2493 SelectionDAG &DAG) {
2494 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2495 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2496 DAG.getConstant(0, DL, MVT::i32));
2497 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2498 DAG.getConstant(1, DL, MVT::i32));
2499 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
2502 // This function lowers a splat of a scalar operand Splat with the vector
2503 // length VL. It ensures the final sequence is type legal, which is useful when
2504 // lowering a splat after type legalization.
2505 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
2506 MVT VT, SDLoc DL, SelectionDAG &DAG,
2507 const RISCVSubtarget &Subtarget) {
2508 bool HasPassthru = Passthru && !Passthru.isUndef();
2509 if (!HasPassthru && !Passthru)
2510 Passthru = DAG.getUNDEF(VT);
2511 if (VT.isFloatingPoint()) {
2512 // If VL is 1, we could use vfmv.s.f.
2513 if (isOneConstant(VL))
2514 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
2515 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
2518 MVT XLenVT = Subtarget.getXLenVT();
2520 // Simplest case is that the operand needs to be promoted to XLenVT.
2521 if (Scalar.getValueType().bitsLE(XLenVT)) {
2522 // If the operand is a constant, sign extend to increase our chances
2523 // of being able to use a .vi instruction. ANY_EXTEND would become a
2524 // a zero extend and the simm5 check in isel would fail.
2525 // FIXME: Should we ignore the upper bits in isel instead?
2527 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2528 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2529 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
2530 // If VL is 1 and the scalar value won't benefit from immediate, we could
2532 if (isOneConstant(VL) &&
2533 (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2534 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
2535 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
2538 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2539 "Unexpected scalar for splat lowering!");
2541 if (isOneConstant(VL) && isNullConstant(Scalar))
2542 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
2543 DAG.getConstant(0, DL, XLenVT), VL);
2545 // Otherwise use the more complicated splatting algorithm.
2546 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
2549 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
2550 const RISCVSubtarget &Subtarget) {
2551 // We need to be able to widen elements to the next larger integer type.
2552 if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
2555 int Size = Mask.size();
2556 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
2558 int Srcs[] = {-1, -1};
2559 for (int i = 0; i != Size; ++i) {
2560 // Ignore undef elements.
2564 // Is this an even or odd element.
2567 // Ensure we consistently use the same source for this element polarity.
2568 int Src = Mask[i] / Size;
2571 if (Srcs[Pol] != Src)
2574 // Make sure the element within the source is appropriate for this element
2575 // in the destination.
2576 int Elt = Mask[i] % Size;
2581 // We need to find a source for each polarity and they can't be the same.
2582 if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
2585 // Swap the sources if the second source was in the even polarity.
2586 SwapSources = Srcs[0] > Srcs[1];
2591 /// Match shuffles that concatenate two vectors, rotate the concatenation,
2592 /// and then extract the original number of elements from the rotated result.
2593 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
2594 /// returned rotation amount is for a rotate right, where elements move from
2595 /// higher elements to lower elements. \p LoSrc indicates the first source
2596 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
2597 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
2598 /// 0 or 1 if a rotation is found.
2600 /// NOTE: We talk about rotate to the right which matches how bit shift and
2601 /// rotate instructions are described where LSBs are on the right, but LLVM IR
2602 /// and the table below write vectors with the lowest elements on the left.
2603 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
2604 int Size = Mask.size();
2606 // We need to detect various ways of spelling a rotation:
2607 // [11, 12, 13, 14, 15, 0, 1, 2]
2608 // [-1, 12, 13, 14, -1, -1, 1, -1]
2609 // [-1, -1, -1, -1, -1, -1, 1, 2]
2610 // [ 3, 4, 5, 6, 7, 8, 9, 10]
2611 // [-1, 4, 5, 6, -1, -1, 9, -1]
2612 // [-1, 4, 5, 6, -1, -1, -1, -1]
2616 for (int i = 0; i != Size; ++i) {
2621 // Determine where a rotate vector would have started.
2622 int StartIdx = i - (M % Size);
2623 // The identity rotation isn't interesting, stop.
2627 // If we found the tail of a vector the rotation must be the missing
2628 // front. If we found the head of a vector, it must be how much of the
2630 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
2633 Rotation = CandidateRotation;
2634 else if (Rotation != CandidateRotation)
2635 // The rotations don't match, so we can't match this mask.
2638 // Compute which value this mask is pointing at.
2639 int MaskSrc = M < Size ? 0 : 1;
2641 // Compute which of the two target values this index should be assigned to.
2642 // This reflects whether the high elements are remaining or the low elemnts
2644 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
2646 // Either set up this value if we've not encountered it before, or check
2647 // that it remains consistent.
2649 TargetSrc = MaskSrc;
2650 else if (TargetSrc != MaskSrc)
2651 // This may be a rotation, but it pulls from the inputs in some
2652 // unsupported interleaving.
2656 // Check that we successfully analyzed the mask, and normalize the results.
2657 assert(Rotation != 0 && "Failed to locate a viable rotation!");
2658 assert((LoSrc >= 0 || HiSrc >= 0) &&
2659 "Failed to find a rotated input vector!");
2664 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
2665 const RISCVSubtarget &Subtarget) {
2666 SDValue V1 = Op.getOperand(0);
2667 SDValue V2 = Op.getOperand(1);
2669 MVT XLenVT = Subtarget.getXLenVT();
2670 MVT VT = Op.getSimpleValueType();
2671 unsigned NumElts = VT.getVectorNumElements();
2672 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
2674 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2676 SDValue TrueMask, VL;
2677 std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2679 if (SVN->isSplat()) {
2680 const int Lane = SVN->getSplatIndex();
2682 MVT SVT = VT.getVectorElementType();
2684 // Turn splatted vector load into a strided load with an X0 stride.
2686 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
2688 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
2690 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
2692 V.getOperand(0).getSimpleValueType().getVectorNumElements();
2693 V = V.getOperand(Offset / OpElements);
2694 Offset %= OpElements;
2697 // We need to ensure the load isn't atomic or volatile.
2698 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
2699 auto *Ld = cast<LoadSDNode>(V);
2700 Offset *= SVT.getStoreSize();
2701 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
2702 TypeSize::Fixed(Offset), DL);
2704 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
2705 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
2706 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2708 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
2709 SDValue Ops[] = {Ld->getChain(),
2711 DAG.getUNDEF(ContainerVT),
2713 DAG.getRegister(RISCV::X0, XLenVT),
2715 SDValue NewLoad = DAG.getMemIntrinsicNode(
2716 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
2717 DAG.getMachineFunction().getMachineMemOperand(
2718 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
2719 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
2720 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2723 // Otherwise use a scalar load and splat. This will give the best
2724 // opportunity to fold a splat into the operation. ISel can turn it into
2725 // the x0 strided load if we aren't able to fold away the select.
2726 if (SVT.isFloatingPoint())
2727 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
2728 Ld->getPointerInfo().getWithOffset(Offset),
2729 Ld->getOriginalAlign(),
2730 Ld->getMemOperand()->getFlags());
2732 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
2733 Ld->getPointerInfo().getWithOffset(Offset), SVT,
2734 Ld->getOriginalAlign(),
2735 Ld->getMemOperand()->getFlags());
2736 DAG.makeEquivalentMemoryOrdering(Ld, V);
2739 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
2741 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
2742 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2745 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2746 assert(Lane < (int)NumElts && "Unexpected lane!");
2747 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
2748 V1, DAG.getConstant(Lane, DL, XLenVT),
2749 DAG.getUNDEF(ContainerVT), TrueMask, VL);
2750 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2754 ArrayRef<int> Mask = SVN->getMask();
2756 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
2757 // be undef which can be handled with a single SLIDEDOWN/UP.
2759 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
2763 LoV = LoSrc == 0 ? V1 : V2;
2764 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
2767 HiV = HiSrc == 0 ? V1 : V2;
2768 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
2771 // We found a rotation. We need to slide HiV down by Rotation. Then we need
2772 // to slide LoV up by (NumElts - Rotation).
2773 unsigned InvRotate = NumElts - Rotation;
2775 SDValue Res = DAG.getUNDEF(ContainerVT);
2777 // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN.
2778 // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it
2779 // causes multiple vsetvlis in some test cases such as lowering
2781 SDValue DownVL = VL;
2783 DownVL = DAG.getConstant(InvRotate, DL, XLenVT);
2785 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, Res, HiV,
2786 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, DownVL);
2789 Res = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Res, LoV,
2790 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL);
2792 return convertFromScalableVector(VT, Res, DAG, Subtarget);
2795 // Detect an interleave shuffle and lower to
2796 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
2798 if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
2799 // Swap sources if needed.
2803 // Extract the lower half of the vectors.
2804 MVT HalfVT = VT.getHalfNumVectorElementsVT();
2805 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
2806 DAG.getConstant(0, DL, XLenVT));
2807 V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
2808 DAG.getConstant(0, DL, XLenVT));
2810 // Double the element width and halve the number of elements in an int type.
2811 unsigned EltBits = VT.getScalarSizeInBits();
2812 MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
2814 MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
2815 // Convert this to a scalable vector. We need to base this on the
2816 // destination size to ensure there's always a type with a smaller LMUL.
2817 MVT WideIntContainerVT =
2818 getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
2820 // Convert sources to scalable vectors with the same element count as the
2822 MVT HalfContainerVT = MVT::getVectorVT(
2823 VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
2824 V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
2825 V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
2827 // Cast sources to integer.
2828 MVT IntEltVT = MVT::getIntegerVT(EltBits);
2830 MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
2831 V1 = DAG.getBitcast(IntHalfVT, V1);
2832 V2 = DAG.getBitcast(IntHalfVT, V2);
2834 // Freeze V2 since we use it twice and we need to be sure that the add and
2835 // multiply see the same value.
2836 V2 = DAG.getFreeze(V2);
2838 // Recreate TrueMask using the widened type's element count.
2839 TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG);
2841 // Widen V1 and V2 with 0s and add one copy of V2 to V1.
2843 DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1, V2,
2844 DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
2845 // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
2846 SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
2847 DAG.getUNDEF(IntHalfVT),
2848 DAG.getAllOnesConstant(DL, XLenVT));
2850 DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT, V2, Multiplier,
2851 DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
2852 // Add the new copies to our previous addition giving us 2^eltbits copies of
2853 // V2. This is equivalent to shifting V2 left by eltbits. This should
2854 // combine with the vwmulu.vv above to form vwmaccu.vv.
2855 Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
2856 DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
2857 // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
2858 // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
2861 MVT::getVectorVT(VT.getVectorElementType(),
2862 WideIntContainerVT.getVectorElementCount() * 2);
2863 Add = DAG.getBitcast(ContainerVT, Add);
2864 return convertFromScalableVector(VT, Add, DAG, Subtarget);
2867 // Detect shuffles which can be re-expressed as vector selects; these are
2868 // shuffles in which each element in the destination is taken from an element
2869 // at the corresponding index in either source vectors.
2870 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
2871 int MaskIndex = MaskIdx.value();
2872 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
2875 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
2877 SmallVector<SDValue> MaskVals;
2878 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
2879 // merged with a second vrgather.
2880 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
2882 // By default we preserve the original operand order, and use a mask to
2883 // select LHS as true and RHS as false. However, since RVV vector selects may
2884 // feature splats but only on the LHS, we may choose to invert our mask and
2885 // instead select between RHS and LHS.
2886 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
2887 bool InvertMask = IsSelect == SwapOps;
2889 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
2891 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
2893 // Now construct the mask that will be used by the vselect or blended
2894 // vrgather operation. For vrgathers, construct the appropriate indices into
2896 for (int MaskIndex : Mask) {
2897 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
2898 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
2900 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
2901 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
2902 ? DAG.getConstant(MaskIndex, DL, XLenVT)
2903 : DAG.getUNDEF(XLenVT));
2904 GatherIndicesRHS.push_back(
2905 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
2906 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
2907 if (IsLHSOrUndefIndex && MaskIndex >= 0)
2908 ++LHSIndexCounts[MaskIndex];
2909 if (!IsLHSOrUndefIndex)
2910 ++RHSIndexCounts[MaskIndex - NumElts];
2916 std::swap(GatherIndicesLHS, GatherIndicesRHS);
2919 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
2920 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2921 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
2924 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
2926 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
2927 // On such a large vector we're unable to use i8 as the index type.
2928 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
2929 // may involve vector splitting if we're already at LMUL=8, or our
2930 // user-supplied maximum fixed-length LMUL.
2934 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
2935 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
2936 MVT IndexVT = VT.changeTypeToInteger();
2937 // Since we can't introduce illegal index types at this stage, use i16 and
2938 // vrgatherei16 if the corresponding index type for plain vrgather is greater
2940 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2941 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
2942 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2945 MVT IndexContainerVT =
2946 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2949 // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2950 // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2951 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2952 Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
2955 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2956 // If only one index is used, we can use a "splat" vrgather.
2957 // TODO: We can splat the most-common index and fix-up any stragglers, if
2958 // that's beneficial.
2959 if (LHSIndexCounts.size() == 1) {
2960 int SplatIndex = LHSIndexCounts.begin()->getFirst();
2961 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
2962 DAG.getConstant(SplatIndex, DL, XLenVT),
2963 DAG.getUNDEF(ContainerVT), TrueMask, VL);
2965 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2967 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2969 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
2970 DAG.getUNDEF(ContainerVT), TrueMask, VL);
2974 // If a second vector operand is used by this shuffle, blend it in with an
2975 // additional vrgather.
2976 if (!V2.isUndef()) {
2977 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2979 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2981 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2983 // If only one index is used, we can use a "splat" vrgather.
2984 // TODO: We can splat the most-common index and fix-up any stragglers, if
2985 // that's beneficial.
2986 if (RHSIndexCounts.size() == 1) {
2987 int SplatIndex = RHSIndexCounts.begin()->getFirst();
2988 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
2989 DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
2992 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2994 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2995 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
3000 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3003 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
3004 // Support splats for any type. These should type legalize well.
3005 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
3008 // Only support legal VTs for other shuffles for now.
3009 if (!isTypeLegal(VT))
3012 MVT SVT = VT.getSimpleVT();
3016 return (isElementRotate(LoSrc, HiSrc, M) > 0) ||
3017 isInterleaveShuffle(M, SVT, SwapSources, Subtarget);
3020 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
3022 static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
3023 MVT VT = Op.getSimpleValueType();
3024 unsigned EltSize = VT.getScalarSizeInBits();
3025 SDValue Src = Op.getOperand(0);
3028 // We need a FP type that can represent the value.
3029 // TODO: Use f16 for i8 when possible?
3030 MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
3031 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
3033 // Legal types should have been checked in the RISCVTargetLowering
3035 // TODO: Splitting may make sense in some cases.
3036 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
3037 "Expected legal float type!");
3039 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
3040 // The trailing zero count is equal to log2 of this single bit value.
3041 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
3043 DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
3044 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
3047 // We have a legal FP type, convert to it.
3048 SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
3049 // Bitcast to integer and shift the exponent to the LSB.
3050 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
3051 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
3052 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
3053 SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
3054 DAG.getConstant(ShiftAmt, DL, IntVT));
3055 // Truncate back to original type to allow vnsrl.
3056 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
3057 // The exponent contains log2 of the value in biased form.
3058 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
3060 // For trailing zeros, we just need to subtract the bias.
3061 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
3062 return DAG.getNode(ISD::SUB, DL, VT, Trunc,
3063 DAG.getConstant(ExponentBias, DL, VT));
3065 // For leading zeros, we need to remove the bias and convert from log2 to
3066 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
3067 unsigned Adjust = ExponentBias + (EltSize - 1);
3068 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
3071 // While RVV has alignment restrictions, we should always be able to load as a
3072 // legal equivalently-sized byte-typed vector instead. This method is
3073 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
3074 // the load is already correctly-aligned, it returns SDValue().
3075 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
3076 SelectionDAG &DAG) const {
3077 auto *Load = cast<LoadSDNode>(Op);
3078 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
3080 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
3081 Load->getMemoryVT(),
3082 *Load->getMemOperand()))
3086 MVT VT = Op.getSimpleValueType();
3087 unsigned EltSizeBits = VT.getScalarSizeInBits();
3088 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
3089 "Unexpected unaligned RVV load type");
3091 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
3092 assert(NewVT.isValid() &&
3093 "Expecting equally-sized RVV vector types to be legal");
3094 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
3095 Load->getPointerInfo(), Load->getOriginalAlign(),
3096 Load->getMemOperand()->getFlags());
3097 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
3100 // While RVV has alignment restrictions, we should always be able to store as a
3101 // legal equivalently-sized byte-typed vector instead. This method is
3102 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
3103 // returns SDValue() if the store is already correctly aligned.
3104 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
3105 SelectionDAG &DAG) const {
3106 auto *Store = cast<StoreSDNode>(Op);
3107 assert(Store && Store->getValue().getValueType().isVector() &&
3108 "Expected vector store");
3110 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
3111 Store->getMemoryVT(),
3112 *Store->getMemOperand()))
3116 SDValue StoredVal = Store->getValue();
3117 MVT VT = StoredVal.getSimpleValueType();
3118 unsigned EltSizeBits = VT.getScalarSizeInBits();
3119 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
3120 "Unexpected unaligned RVV store type");
3122 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
3123 assert(NewVT.isValid() &&
3124 "Expecting equally-sized RVV vector types to be legal");
3125 StoredVal = DAG.getBitcast(NewVT, StoredVal);
3126 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
3127 Store->getPointerInfo(), Store->getOriginalAlign(),
3128 Store->getMemOperand()->getFlags());
3131 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
3132 const RISCVSubtarget &Subtarget) {
3133 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
3135 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
3137 // All simm32 constants should be handled by isel.
3138 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
3139 // this check redundant, but small immediates are common so this check
3140 // should have better compile time.
3144 // We only need to cost the immediate, if constant pool lowering is enabled.
3145 if (!Subtarget.useConstantPoolForLargeInts())
3148 RISCVMatInt::InstSeq Seq =
3149 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
3150 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
3153 // Expand to a constant pool using the default expansion code.
3157 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
3158 SelectionDAG &DAG) const {
3159 switch (Op.getOpcode()) {
3161 report_fatal_error("unimplemented operand");
3162 case ISD::GlobalAddress:
3163 return lowerGlobalAddress(Op, DAG);
3164 case ISD::BlockAddress:
3165 return lowerBlockAddress(Op, DAG);
3166 case ISD::ConstantPool:
3167 return lowerConstantPool(Op, DAG);
3168 case ISD::JumpTable:
3169 return lowerJumpTable(Op, DAG);
3170 case ISD::GlobalTLSAddress:
3171 return lowerGlobalTLSAddress(Op, DAG);
3173 return lowerConstant(Op, DAG, Subtarget);
3175 return lowerSELECT(Op, DAG);
3177 return lowerBRCOND(Op, DAG);
3179 return lowerVASTART(Op, DAG);
3180 case ISD::FRAMEADDR:
3181 return lowerFRAMEADDR(Op, DAG);
3182 case ISD::RETURNADDR:
3183 return lowerRETURNADDR(Op, DAG);
3184 case ISD::SHL_PARTS:
3185 return lowerShiftLeftParts(Op, DAG);
3186 case ISD::SRA_PARTS:
3187 return lowerShiftRightParts(Op, DAG, true);
3188 case ISD::SRL_PARTS:
3189 return lowerShiftRightParts(Op, DAG, false);
3190 case ISD::BITCAST: {
3192 EVT VT = Op.getValueType();
3193 SDValue Op0 = Op.getOperand(0);
3194 EVT Op0VT = Op0.getValueType();
3195 MVT XLenVT = Subtarget.getXLenVT();
3196 if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
3197 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
3198 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
3201 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
3202 Subtarget.hasStdExtF()) {
3203 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3205 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
3209 // Consider other scalar<->scalar casts as legal if the types are legal.
3210 // Otherwise expand them.
3211 if (!VT.isVector() && !Op0VT.isVector()) {
3212 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
3217 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
3218 "Unexpected types");
3220 if (VT.isFixedLengthVector()) {
3221 // We can handle fixed length vector bitcasts with a simple replacement
3223 if (Op0VT.isFixedLengthVector())
3225 // When bitcasting from scalar to fixed-length vector, insert the scalar
3226 // into a one-element vector of the result type, and perform a vector
3228 if (!Op0VT.isVector()) {
3229 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
3230 if (!isTypeLegal(BVT))
3232 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
3233 DAG.getUNDEF(BVT), Op0,
3234 DAG.getConstant(0, DL, XLenVT)));
3238 // Custom-legalize bitcasts from fixed-length vector types to scalar types
3239 // thus: bitcast the vector to a one-element vector type whose element type
3240 // is the same as the result type, and extract the first element.
3241 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
3242 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
3243 if (!isTypeLegal(BVT))
3245 SDValue BVec = DAG.getBitcast(BVT, Op0);
3246 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
3247 DAG.getConstant(0, DL, XLenVT));
3251 case ISD::INTRINSIC_WO_CHAIN:
3252 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3253 case ISD::INTRINSIC_W_CHAIN:
3254 return LowerINTRINSIC_W_CHAIN(Op, DAG);
3255 case ISD::INTRINSIC_VOID:
3256 return LowerINTRINSIC_VOID(Op, DAG);
3258 case ISD::BITREVERSE: {
3259 MVT VT = Op.getSimpleValueType();
3261 if (Subtarget.hasStdExtZbp()) {
3262 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
3263 // Start with the maximum immediate value which is the bitwidth - 1.
3264 unsigned Imm = VT.getSizeInBits() - 1;
3265 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
3266 if (Op.getOpcode() == ISD::BSWAP)
3268 return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
3269 DAG.getConstant(Imm, DL, VT));
3271 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
3272 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
3273 // Expand bitreverse to a bswap(rev8) followed by brev8.
3274 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
3275 // We use the Zbp grevi encoding for rev.b/brev8 which will be recognized
3276 // as brev8 by an isel pattern.
3277 return DAG.getNode(RISCVISD::GREV, DL, VT, BSwap,
3278 DAG.getConstant(7, DL, VT));
3282 MVT VT = Op.getSimpleValueType();
3283 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
3285 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
3286 // use log(XLen) bits. Mask the shift amount accordingly to prevent
3287 // accidentally setting the extra bit.
3288 unsigned ShAmtWidth = Subtarget.getXLen() - 1;
3289 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
3290 DAG.getConstant(ShAmtWidth, DL, VT));
3291 // fshl and fshr concatenate their operands in the same order. fsr and fsl
3292 // instruction use different orders. fshl will return its first operand for
3293 // shift of zero, fshr will return its second operand. fsl and fsr both
3294 // return rs1 so the ISD nodes need to have different operand orders.
3295 // Shift amount is in rs2.
3296 SDValue Op0 = Op.getOperand(0);
3297 SDValue Op1 = Op.getOperand(1);
3298 unsigned Opc = RISCVISD::FSL;
3299 if (Op.getOpcode() == ISD::FSHR) {
3300 std::swap(Op0, Op1);
3301 Opc = RISCVISD::FSR;
3303 return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
3306 // Only custom-lower vector truncates
3307 if (!Op.getSimpleValueType().isVector())
3309 return lowerVectorTruncLike(Op, DAG);
3310 case ISD::ANY_EXTEND:
3311 case ISD::ZERO_EXTEND:
3312 if (Op.getOperand(0).getValueType().isVector() &&
3313 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3314 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
3315 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
3316 case ISD::SIGN_EXTEND:
3317 if (Op.getOperand(0).getValueType().isVector() &&
3318 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3319 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
3320 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
3321 case ISD::SPLAT_VECTOR_PARTS:
3322 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
3323 case ISD::INSERT_VECTOR_ELT:
3324 return lowerINSERT_VECTOR_ELT(Op, DAG);
3325 case ISD::EXTRACT_VECTOR_ELT:
3326 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
3328 MVT VT = Op.getSimpleValueType();
3330 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
3331 // We define our scalable vector types for lmul=1 to use a 64 bit known
3332 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
3333 // vscale as VLENB / 8.
3334 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
3335 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
3336 report_fatal_error("Support for VLEN==32 is incomplete.");
3337 // We assume VLENB is a multiple of 8. We manually choose the best shift
3338 // here because SimplifyDemandedBits isn't always able to simplify it.
3339 uint64_t Val = Op.getConstantOperandVal(0);
3340 if (isPowerOf2_64(Val)) {
3341 uint64_t Log2 = Log2_64(Val);
3343 return DAG.getNode(ISD::SRL, DL, VT, VLENB,
3344 DAG.getConstant(3 - Log2, DL, VT));
3346 return DAG.getNode(ISD::SHL, DL, VT, VLENB,
3347 DAG.getConstant(Log2 - 3, DL, VT));
3350 // If the multiplier is a multiple of 8, scale it down to avoid needing
3351 // to shift the VLENB value.
3353 return DAG.getNode(ISD::MUL, DL, VT, VLENB,
3354 DAG.getConstant(Val / 8, DL, VT));
3356 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
3357 DAG.getConstant(3, DL, VT));
3358 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
3361 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
3362 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
3363 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
3364 Op.getOperand(1).getValueType() == MVT::i32) {
3366 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
3368 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
3369 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
3370 DAG.getIntPtrConstant(0, DL));
3374 case ISD::FP_EXTEND:
3376 if (!Op.getValueType().isVector())
3378 return lowerVectorFPExtendOrRoundLike(Op, DAG);
3379 case ISD::FP_TO_SINT:
3380 case ISD::FP_TO_UINT:
3381 case ISD::SINT_TO_FP:
3382 case ISD::UINT_TO_FP: {
3383 // RVV can only do fp<->int conversions to types half/double the size as
3384 // the source. We custom-lower any conversions that do two hops into
3386 MVT VT = Op.getSimpleValueType();
3390 SDValue Src = Op.getOperand(0);
3391 MVT EltVT = VT.getVectorElementType();
3392 MVT SrcVT = Src.getSimpleValueType();
3393 MVT SrcEltVT = SrcVT.getVectorElementType();
3394 unsigned EltSize = EltVT.getSizeInBits();
3395 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3396 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
3397 "Unexpected vector element types");
3399 bool IsInt2FP = SrcEltVT.isInteger();
3400 // Widening conversions
3401 if (EltSize > (2 * SrcEltSize)) {
3403 // Do a regular integer sign/zero extension then convert to float.
3404 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize),
3405 VT.getVectorElementCount());
3406 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
3409 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
3410 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
3413 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
3414 // Do one doubling fp_extend then complete the operation by converting
3416 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3417 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
3418 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
3421 // Narrowing conversions
3422 if (SrcEltSize > (2 * EltSize)) {
3424 // One narrowing int_to_fp, then an fp_round.
3425 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
3426 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3427 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
3428 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
3431 // One narrowing fp_to_int, then truncate the integer. If the float isn't
3432 // representable by the integer, the result is poison.
3433 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
3434 VT.getVectorElementCount());
3435 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
3436 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
3439 // Scalable vectors can exit here. Patterns will handle equally-sized
3440 // conversions halving/doubling ones.
3441 if (!VT.isFixedLengthVector())
3444 // For fixed-length vectors we lower to a custom "VL" node.
3445 unsigned RVVOpc = 0;
3446 switch (Op.getOpcode()) {
3448 llvm_unreachable("Impossible opcode");
3449 case ISD::FP_TO_SINT:
3450 RVVOpc = RISCVISD::FP_TO_SINT_VL;
3452 case ISD::FP_TO_UINT:
3453 RVVOpc = RISCVISD::FP_TO_UINT_VL;
3455 case ISD::SINT_TO_FP:
3456 RVVOpc = RISCVISD::SINT_TO_FP_VL;
3458 case ISD::UINT_TO_FP:
3459 RVVOpc = RISCVISD::UINT_TO_FP_VL;
3463 MVT ContainerVT, SrcContainerVT;
3464 // Derive the reference container type from the larger vector type.
3465 if (SrcEltSize > EltSize) {
3466 SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3468 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3470 ContainerVT = getContainerForFixedLengthVector(VT);
3471 SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
3475 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3477 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3478 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
3479 return convertFromScalableVector(VT, Src, DAG, Subtarget);
3481 case ISD::FP_TO_SINT_SAT:
3482 case ISD::FP_TO_UINT_SAT:
3483 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
3487 return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG, Subtarget);
3489 return lowerFROUND(Op, DAG, Subtarget);
3490 case ISD::VECREDUCE_ADD:
3491 case ISD::VECREDUCE_UMAX:
3492 case ISD::VECREDUCE_SMAX:
3493 case ISD::VECREDUCE_UMIN:
3494 case ISD::VECREDUCE_SMIN:
3495 return lowerVECREDUCE(Op, DAG);
3496 case ISD::VECREDUCE_AND:
3497 case ISD::VECREDUCE_OR:
3498 case ISD::VECREDUCE_XOR:
3499 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3500 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
3501 return lowerVECREDUCE(Op, DAG);
3502 case ISD::VECREDUCE_FADD:
3503 case ISD::VECREDUCE_SEQ_FADD:
3504 case ISD::VECREDUCE_FMIN:
3505 case ISD::VECREDUCE_FMAX:
3506 return lowerFPVECREDUCE(Op, DAG);
3507 case ISD::VP_REDUCE_ADD:
3508 case ISD::VP_REDUCE_UMAX:
3509 case ISD::VP_REDUCE_SMAX:
3510 case ISD::VP_REDUCE_UMIN:
3511 case ISD::VP_REDUCE_SMIN:
3512 case ISD::VP_REDUCE_FADD:
3513 case ISD::VP_REDUCE_SEQ_FADD:
3514 case ISD::VP_REDUCE_FMIN:
3515 case ISD::VP_REDUCE_FMAX:
3516 return lowerVPREDUCE(Op, DAG);
3517 case ISD::VP_REDUCE_AND:
3518 case ISD::VP_REDUCE_OR:
3519 case ISD::VP_REDUCE_XOR:
3520 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
3521 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
3522 return lowerVPREDUCE(Op, DAG);
3523 case ISD::INSERT_SUBVECTOR:
3524 return lowerINSERT_SUBVECTOR(Op, DAG);
3525 case ISD::EXTRACT_SUBVECTOR:
3526 return lowerEXTRACT_SUBVECTOR(Op, DAG);
3527 case ISD::STEP_VECTOR:
3528 return lowerSTEP_VECTOR(Op, DAG);
3529 case ISD::VECTOR_REVERSE:
3530 return lowerVECTOR_REVERSE(Op, DAG);
3531 case ISD::VECTOR_SPLICE:
3532 return lowerVECTOR_SPLICE(Op, DAG);
3533 case ISD::BUILD_VECTOR:
3534 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
3535 case ISD::SPLAT_VECTOR:
3536 if (Op.getValueType().getVectorElementType() == MVT::i1)
3537 return lowerVectorMaskSplat(Op, DAG);
3539 case ISD::VECTOR_SHUFFLE:
3540 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
3541 case ISD::CONCAT_VECTORS: {
3542 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
3543 // better than going through the stack, as the default expansion does.
3545 MVT VT = Op.getSimpleValueType();
3546 unsigned NumOpElts =
3547 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
3548 SDValue Vec = DAG.getUNDEF(VT);
3549 for (const auto &OpIdx : enumerate(Op->ops())) {
3550 SDValue SubVec = OpIdx.value();
3551 // Don't insert undef subvectors.
3552 if (SubVec.isUndef())
3554 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
3555 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
3560 if (auto V = expandUnalignedRVVLoad(Op, DAG))
3562 if (Op.getValueType().isFixedLengthVector())
3563 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
3566 if (auto V = expandUnalignedRVVStore(Op, DAG))
3568 if (Op.getOperand(1).getValueType().isFixedLengthVector())
3569 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
3573 return lowerMaskedLoad(Op, DAG);
3576 return lowerMaskedStore(Op, DAG);
3578 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
3580 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true);
3582 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true);
3584 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true);
3586 return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL, /*HasMergeOp*/ true);
3588 return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL, /*HasMergeOp*/ true);
3590 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
3593 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
3596 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
3599 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true);
3601 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true);
3603 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true);
3605 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true);
3609 if (Op.getSimpleValueType().isFixedLengthVector())
3610 return lowerFixedLengthVectorShiftToRVV(Op, DAG);
3611 // This can be called for an i32 shift amount that needs to be promoted.
3612 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
3613 "Unexpected custom legalisation");
3616 return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL,
3617 /*HasMergeOp*/ true);
3619 return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL,
3620 /*HasMergeOp*/ true);
3622 return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL,
3623 /*HasMergeOp*/ true);
3625 return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL,
3626 /*HasMergeOp*/ true);
3628 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true);
3630 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true);
3632 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true);
3634 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true);
3636 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
3638 return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
3640 return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
3642 return lowerToScalableOp(Op, DAG, RISCVISD::VFMADD_VL);
3644 return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true);
3646 return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true);
3648 return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true);
3650 return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true);
3652 return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL,
3653 /*HasMergeOp*/ true);
3655 return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL,
3656 /*HasMergeOp*/ true);
3658 return lowerABS(Op, DAG);
3659 case ISD::CTLZ_ZERO_UNDEF:
3660 case ISD::CTTZ_ZERO_UNDEF:
3661 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
3663 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
3664 case ISD::FCOPYSIGN:
3665 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
3667 case ISD::VP_GATHER:
3668 return lowerMaskedGather(Op, DAG);
3670 case ISD::VP_SCATTER:
3671 return lowerMaskedScatter(Op, DAG);
3672 case ISD::FLT_ROUNDS_:
3673 return lowerGET_ROUNDING(Op, DAG);
3674 case ISD::SET_ROUNDING:
3675 return lowerSET_ROUNDING(Op, DAG);
3676 case ISD::EH_DWARF_CFA:
3677 return lowerEH_DWARF_CFA(Op, DAG);
3678 case ISD::VP_SELECT:
3679 return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
3681 return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
3683 return lowerVPOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true);
3685 return lowerVPOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true);
3687 return lowerVPOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true);
3689 return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true);
3691 return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true);
3693 return lowerVPOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true);
3695 return lowerVPOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true);
3697 return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
3699 return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
3701 return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
3703 return lowerVPOp(Op, DAG, RISCVISD::SRA_VL, /*HasMergeOp*/ true);
3705 return lowerVPOp(Op, DAG, RISCVISD::SRL_VL, /*HasMergeOp*/ true);
3707 return lowerVPOp(Op, DAG, RISCVISD::SHL_VL, /*HasMergeOp*/ true);
3709 return lowerVPOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true);
3711 return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true);
3713 return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true);
3715 return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true);
3717 return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
3719 return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL);
3720 case ISD::VP_SIGN_EXTEND:
3721 case ISD::VP_ZERO_EXTEND:
3722 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
3723 return lowerVPExtMaskOp(Op, DAG);
3724 return lowerVPOp(Op, DAG,
3725 Op.getOpcode() == ISD::VP_SIGN_EXTEND
3726 ? RISCVISD::VSEXT_VL
3727 : RISCVISD::VZEXT_VL);
3728 case ISD::VP_TRUNCATE:
3729 return lowerVectorTruncLike(Op, DAG);
3730 case ISD::VP_FP_EXTEND:
3731 case ISD::VP_FP_ROUND:
3732 return lowerVectorFPExtendOrRoundLike(Op, DAG);
3733 case ISD::VP_FP_TO_SINT:
3734 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL);
3735 case ISD::VP_FP_TO_UINT:
3736 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_UINT_VL);
3737 case ISD::VP_SINT_TO_FP:
3738 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
3739 case ISD::VP_UINT_TO_FP:
3740 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
3742 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
3743 return lowerVPSetCCMaskOp(Op, DAG);
3744 return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL);
3745 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
3746 return lowerVPStridedLoad(Op, DAG);
3747 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
3748 return lowerVPStridedStore(Op, DAG);
3752 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
3753 SelectionDAG &DAG, unsigned Flags) {
3754 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3757 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
3758 SelectionDAG &DAG, unsigned Flags) {
3759 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3763 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
3764 SelectionDAG &DAG, unsigned Flags) {
3765 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3766 N->getOffset(), Flags);
3769 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
3770 SelectionDAG &DAG, unsigned Flags) {
3771 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3774 template <class NodeTy>
3775 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3776 bool IsLocal) const {
3778 EVT Ty = getPointerTy(DAG.getDataLayout());
3780 if (isPositionIndependent()) {
3781 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3783 // Use PC-relative addressing to access the symbol. This generates the
3784 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
3785 // %pcrel_lo(auipc)).
3786 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
3788 // Use PC-relative addressing to access the GOT for this symbol, then load
3789 // the address from the GOT. This generates the pattern (PseudoLA sym),
3790 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
3791 MachineFunction &MF = DAG.getMachineFunction();
3792 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3793 MachinePointerInfo::getGOT(MF),
3794 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
3795 MachineMemOperand::MOInvariant,
3796 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3798 DAG.getMemIntrinsicNode(RISCVISD::LA, DL, DAG.getVTList(Ty, MVT::Other),
3799 {DAG.getEntryNode(), Addr}, Ty, MemOp);
3803 switch (getTargetMachine().getCodeModel()) {
3805 report_fatal_error("Unsupported code model for lowering");
3806 case CodeModel::Small: {
3807 // Generate a sequence for accessing addresses within the first 2 GiB of
3808 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
3809 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
3810 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
3811 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
3812 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
3814 case CodeModel::Medium: {
3815 // Generate a sequence for accessing addresses within any 2GiB range within
3816 // the address space. This generates the pattern (PseudoLLA sym), which
3817 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
3818 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3819 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
3824 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
3825 SelectionDAG &DAG) const {
3827 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3828 assert(N->getOffset() == 0 && "unexpected offset in global node");
3829 return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
3832 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
3833 SelectionDAG &DAG) const {
3834 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
3836 return getAddr(N, DAG);
3839 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
3840 SelectionDAG &DAG) const {
3841 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
3843 return getAddr(N, DAG);
3846 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
3847 SelectionDAG &DAG) const {
3848 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
3850 return getAddr(N, DAG);
3853 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3855 bool UseGOT) const {
3857 EVT Ty = getPointerTy(DAG.getDataLayout());
3858 const GlobalValue *GV = N->getGlobal();
3859 MVT XLenVT = Subtarget.getXLenVT();
3862 // Use PC-relative addressing to access the GOT for this TLS symbol, then
3863 // load the address from the GOT and add the thread pointer. This generates
3864 // the pattern (PseudoLA_TLS_IE sym), which expands to
3865 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
3866 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3867 MachineFunction &MF = DAG.getMachineFunction();
3868 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3869 MachinePointerInfo::getGOT(MF),
3870 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
3871 MachineMemOperand::MOInvariant,
3872 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3873 SDValue Load = DAG.getMemIntrinsicNode(
3874 RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other),
3875 {DAG.getEntryNode(), Addr}, Ty, MemOp);
3877 // Add the thread pointer.
3878 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3879 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
3882 // Generate a sequence for accessing the address relative to the thread
3883 // pointer, with the appropriate adjustment for the thread pointer offset.
3884 // This generates the pattern
3885 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
3887 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
3889 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
3891 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
3893 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
3894 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3896 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
3897 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
3900 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3901 SelectionDAG &DAG) const {
3903 EVT Ty = getPointerTy(DAG.getDataLayout());
3904 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3905 const GlobalValue *GV = N->getGlobal();
3907 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3908 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
3909 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
3910 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3911 SDValue Load = DAG.getNode(RISCVISD::LA_TLS_GD, DL, Ty, Addr);
3913 // Prepare argument list to generate call.
3918 Args.push_back(Entry);
3920 // Setup call to __tls_get_addr.
3921 TargetLowering::CallLoweringInfo CLI(DAG);
3923 .setChain(DAG.getEntryNode())
3924 .setLibCallee(CallingConv::C, CallTy,
3925 DAG.getExternalSymbol("__tls_get_addr", Ty),
3928 return LowerCallTo(CLI).first;
3931 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3932 SelectionDAG &DAG) const {
3934 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3935 assert(N->getOffset() == 0 && "unexpected offset in global node");
3937 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
3939 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3941 report_fatal_error("In GHC calling convention TLS is not supported");
3945 case TLSModel::LocalExec:
3946 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
3948 case TLSModel::InitialExec:
3949 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
3951 case TLSModel::LocalDynamic:
3952 case TLSModel::GeneralDynamic:
3953 Addr = getDynamicTLSAddr(N, DAG);
3960 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3961 SDValue CondV = Op.getOperand(0);
3962 SDValue TrueV = Op.getOperand(1);
3963 SDValue FalseV = Op.getOperand(2);
3965 MVT VT = Op.getSimpleValueType();
3966 MVT XLenVT = Subtarget.getXLenVT();
3968 // Lower vector SELECTs to VSELECTs by splatting the condition.
3969 if (VT.isVector()) {
3970 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
3971 SDValue CondSplat = VT.isScalableVector()
3972 ? DAG.getSplatVector(SplatCondVT, DL, CondV)
3973 : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
3974 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
3977 // If the result type is XLenVT and CondV is the output of a SETCC node
3978 // which also operated on XLenVT inputs, then merge the SETCC node into the
3979 // lowered RISCVISD::SELECT_CC to take advantage of the integer
3980 // compare+branch instructions. i.e.:
3981 // (select (setcc lhs, rhs, cc), truev, falsev)
3982 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
3983 if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
3984 CondV.getOperand(0).getSimpleValueType() == XLenVT) {
3985 SDValue LHS = CondV.getOperand(0);
3986 SDValue RHS = CondV.getOperand(1);
3987 const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
3988 ISD::CondCode CCVal = CC->get();
3990 // Special case for a select of 2 constants that have a diffence of 1.
3991 // Normally this is done by DAGCombine, but if the select is introduced by
3992 // type legalization or op legalization, we miss it. Restricting to SETLT
3993 // case for now because that is what signed saturating add/sub need.
3994 // FIXME: We don't need the condition to be SETLT or even a SETCC,
3995 // but we would probably want to swap the true/false values if the condition
3996 // is SETGE/SETLE to avoid an XORI.
3997 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
3998 CCVal == ISD::SETLT) {
3999 const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
4000 const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
4001 if (TrueVal - 1 == FalseVal)
4002 return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
4003 if (TrueVal + 1 == FalseVal)
4004 return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
4007 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
4009 SDValue TargetCC = DAG.getCondCode(CCVal);
4010 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
4011 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
4015 // (select condv, truev, falsev)
4016 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
4017 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
4018 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
4020 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
4022 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
4025 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
4026 SDValue CondV = Op.getOperand(1);
4028 MVT XLenVT = Subtarget.getXLenVT();
4030 if (CondV.getOpcode() == ISD::SETCC &&
4031 CondV.getOperand(0).getValueType() == XLenVT) {
4032 SDValue LHS = CondV.getOperand(0);
4033 SDValue RHS = CondV.getOperand(1);
4034 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
4036 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
4038 SDValue TargetCC = DAG.getCondCode(CCVal);
4039 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
4040 LHS, RHS, TargetCC, Op.getOperand(2));
4043 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
4044 CondV, DAG.getConstant(0, DL, XLenVT),
4045 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
4048 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4049 MachineFunction &MF = DAG.getMachineFunction();
4050 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
4053 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4054 getPointerTy(MF.getDataLayout()));
4056 // vastart just stores the address of the VarArgsFrameIndex slot into the
4057 // memory location argument.
4058 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4059 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
4060 MachinePointerInfo(SV));
4063 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
4064 SelectionDAG &DAG) const {
4065 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
4066 MachineFunction &MF = DAG.getMachineFunction();
4067 MachineFrameInfo &MFI = MF.getFrameInfo();
4068 MFI.setFrameAddressIsTaken(true);
4069 Register FrameReg = RI.getFrameRegister(MF);
4070 int XLenInBytes = Subtarget.getXLen() / 8;
4072 EVT VT = Op.getValueType();
4074 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
4075 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4077 int Offset = -(XLenInBytes * 2);
4078 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
4079 DAG.getIntPtrConstant(Offset, DL));
4081 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
4086 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
4087 SelectionDAG &DAG) const {
4088 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
4089 MachineFunction &MF = DAG.getMachineFunction();
4090 MachineFrameInfo &MFI = MF.getFrameInfo();
4091 MFI.setReturnAddressIsTaken(true);
4092 MVT XLenVT = Subtarget.getXLenVT();
4093 int XLenInBytes = Subtarget.getXLen() / 8;
4095 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
4098 EVT VT = Op.getValueType();
4100 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4102 int Off = -XLenInBytes;
4103 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4104 SDValue Offset = DAG.getConstant(Off, DL, VT);
4105 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
4106 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
4107 MachinePointerInfo());
4110 // Return the value of the return address register, marking it an implicit
4112 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
4113 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
4116 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
4117 SelectionDAG &DAG) const {
4119 SDValue Lo = Op.getOperand(0);
4120 SDValue Hi = Op.getOperand(1);
4121 SDValue Shamt = Op.getOperand(2);
4122 EVT VT = Lo.getValueType();
4124 // if Shamt-XLEN < 0: // Shamt < XLEN
4126 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
4129 // Hi = Lo << (Shamt-XLEN)
4131 SDValue Zero = DAG.getConstant(0, DL, VT);
4132 SDValue One = DAG.getConstant(1, DL, VT);
4133 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
4134 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
4135 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
4136 SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
4138 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4139 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4140 SDValue ShiftRightLo =
4141 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
4142 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4143 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4144 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
4146 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
4148 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4149 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4151 SDValue Parts[2] = {Lo, Hi};
4152 return DAG.getMergeValues(Parts, DL);
4155 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
4158 SDValue Lo = Op.getOperand(0);
4159 SDValue Hi = Op.getOperand(1);
4160 SDValue Shamt = Op.getOperand(2);
4161 EVT VT = Lo.getValueType();
4164 // if Shamt-XLEN < 0: // Shamt < XLEN
4165 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
4166 // Hi = Hi >>s Shamt
4168 // Lo = Hi >>s (Shamt-XLEN);
4169 // Hi = Hi >>s (XLEN-1)
4172 // if Shamt-XLEN < 0: // Shamt < XLEN
4173 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
4174 // Hi = Hi >>u Shamt
4176 // Lo = Hi >>u (Shamt-XLEN);
4179 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4181 SDValue Zero = DAG.getConstant(0, DL, VT);
4182 SDValue One = DAG.getConstant(1, DL, VT);
4183 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
4184 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
4185 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
4186 SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
4188 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4189 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4190 SDValue ShiftLeftHi =
4191 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
4192 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4193 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4194 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
4196 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
4198 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
4200 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4201 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4203 SDValue Parts[2] = {Lo, Hi};
4204 return DAG.getMergeValues(Parts, DL);
4207 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
4208 // legal equivalently-sized i8 type, so we can use that as a go-between.
4209 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
4210 SelectionDAG &DAG) const {
4212 MVT VT = Op.getSimpleValueType();
4213 SDValue SplatVal = Op.getOperand(0);
4214 // All-zeros or all-ones splats are handled specially.
4215 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
4216 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
4217 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
4219 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
4220 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
4221 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
4223 MVT XLenVT = Subtarget.getXLenVT();
4224 assert(SplatVal.getValueType() == XLenVT &&
4225 "Unexpected type for i1 splat value");
4226 MVT InterVT = VT.changeVectorElementType(MVT::i8);
4227 SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
4228 DAG.getConstant(1, DL, XLenVT));
4229 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
4230 SDValue Zero = DAG.getConstant(0, DL, InterVT);
4231 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
4234 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
4235 // illegal (currently only vXi64 RV32).
4236 // FIXME: We could also catch non-constant sign-extended i32 values and lower
4237 // them to VMV_V_X_VL.
4238 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
4239 SelectionDAG &DAG) const {
4241 MVT VecVT = Op.getSimpleValueType();
4242 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
4243 "Unexpected SPLAT_VECTOR_PARTS lowering");
4245 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
4246 SDValue Lo = Op.getOperand(0);
4247 SDValue Hi = Op.getOperand(1);
4249 if (VecVT.isFixedLengthVector()) {
4250 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4253 std::tie(Mask, VL) =
4254 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4257 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
4258 return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
4261 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4262 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4263 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4264 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4265 // node in order to try and match RVV vector/scalar instructions.
4266 if ((LoC >> 31) == HiC)
4267 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
4268 Lo, DAG.getRegister(RISCV::X0, MVT::i32));
4271 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4272 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4273 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4274 Hi.getConstantOperandVal(1) == 31)
4275 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo,
4276 DAG.getRegister(RISCV::X0, MVT::i32));
4278 // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
4279 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT,
4280 DAG.getUNDEF(VecVT), Lo, Hi,
4281 DAG.getRegister(RISCV::X0, MVT::i32));
4284 // Custom-lower extensions from mask vectors by using a vselect either with 1
4285 // for zero/any-extension or -1 for sign-extension:
4286 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
4287 // Note that any-extension is lowered identically to zero-extension.
4288 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
4289 int64_t ExtTrueVal) const {
4291 MVT VecVT = Op.getSimpleValueType();
4292 SDValue Src = Op.getOperand(0);
4293 // Only custom-lower extensions from mask types
4294 assert(Src.getValueType().isVector() &&
4295 Src.getValueType().getVectorElementType() == MVT::i1);
4297 if (VecVT.isScalableVector()) {
4298 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
4299 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
4300 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
4303 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4305 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
4307 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
4310 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4312 MVT XLenVT = Subtarget.getXLenVT();
4313 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
4314 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
4316 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4317 DAG.getUNDEF(ContainerVT), SplatZero, VL);
4318 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4319 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
4320 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
4321 SplatTrueVal, SplatZero, VL);
4323 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
4326 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
4327 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
4328 MVT ExtVT = Op.getSimpleValueType();
4329 // Only custom-lower extensions from fixed-length vector types.
4330 if (!ExtVT.isFixedLengthVector())
4332 MVT VT = Op.getOperand(0).getSimpleValueType();
4333 // Grab the canonical container type for the extended type. Infer the smaller
4334 // type from that to ensure the same number of vector elements, as we know
4335 // the LMUL will be sufficient to hold the smaller type.
4336 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
4337 // Get the extended container type manually to ensure the same number of
4338 // vector elements between source and dest.
4339 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
4340 ContainerExtVT.getVectorElementCount());
4343 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
4347 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4349 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
4351 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
4354 // Custom-lower truncations from vectors to mask vectors by using a mask and a
4356 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
4357 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
4358 SelectionDAG &DAG) const {
4359 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
4361 EVT MaskVT = Op.getValueType();
4362 // Only expect to custom-lower truncations to mask types
4363 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
4364 "Unexpected type for vector mask lowering");
4365 SDValue Src = Op.getOperand(0);
4366 MVT VecVT = Src.getSimpleValueType();
4369 Mask = Op.getOperand(1);
4370 VL = Op.getOperand(2);
4372 // If this is a fixed vector, we need to convert it to a scalable vector.
4373 MVT ContainerVT = VecVT;
4375 if (VecVT.isFixedLengthVector()) {
4376 ContainerVT = getContainerForFixedLengthVector(VecVT);
4377 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4379 MVT MaskContainerVT =
4380 getContainerForFixedLengthVector(Mask.getSimpleValueType());
4381 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
4386 std::tie(Mask, VL) =
4387 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4390 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
4391 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
4393 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4394 DAG.getUNDEF(ContainerVT), SplatOne, VL);
4395 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4396 DAG.getUNDEF(ContainerVT), SplatZero, VL);
4398 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4399 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
4400 DAG.getUNDEF(ContainerVT), Mask, VL);
4401 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
4402 DAG.getCondCode(ISD::SETNE), Mask, VL);
4403 if (MaskVT.isFixedLengthVector())
4404 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
4408 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
4409 SelectionDAG &DAG) const {
4410 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
4413 MVT VT = Op.getSimpleValueType();
4414 // Only custom-lower vector truncates
4415 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
4417 // Truncates to mask types are handled differently
4418 if (VT.getVectorElementType() == MVT::i1)
4419 return lowerVectorMaskTruncLike(Op, DAG);
4421 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
4422 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
4423 // truncate by one power of two at a time.
4424 MVT DstEltVT = VT.getVectorElementType();
4426 SDValue Src = Op.getOperand(0);
4427 MVT SrcVT = Src.getSimpleValueType();
4428 MVT SrcEltVT = SrcVT.getVectorElementType();
4430 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
4431 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
4432 "Unexpected vector truncate lowering");
4434 MVT ContainerVT = SrcVT;
4437 Mask = Op.getOperand(1);
4438 VL = Op.getOperand(2);
4440 if (SrcVT.isFixedLengthVector()) {
4441 ContainerVT = getContainerForFixedLengthVector(SrcVT);
4442 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4444 MVT MaskVT = getMaskTypeFor(ContainerVT);
4445 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4449 SDValue Result = Src;
4451 std::tie(Mask, VL) =
4452 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4455 LLVMContext &Context = *DAG.getContext();
4456 const ElementCount Count = ContainerVT.getVectorElementCount();
4458 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
4459 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
4460 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
4462 } while (SrcEltVT != DstEltVT);
4464 if (SrcVT.isFixedLengthVector())
4465 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
4471 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
4472 SelectionDAG &DAG) const {
4474 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
4476 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
4477 // RVV can only do truncate fp to types half the size as the source. We
4478 // custom-lower f64->f16 rounds via RVV's round-to-odd float
4479 // conversion instruction.
4481 MVT VT = Op.getSimpleValueType();
4483 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
4485 SDValue Src = Op.getOperand(0);
4486 MVT SrcVT = Src.getSimpleValueType();
4488 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
4489 SrcVT.getVectorElementType() != MVT::f16);
4490 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
4491 SrcVT.getVectorElementType() != MVT::f64);
4493 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
4495 // Prepare any fixed-length vector operands.
4496 MVT ContainerVT = VT;
4499 Mask = Op.getOperand(1);
4500 VL = Op.getOperand(2);
4502 if (VT.isFixedLengthVector()) {
4503 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
4505 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
4506 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4508 MVT MaskVT = getMaskTypeFor(ContainerVT);
4509 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
4514 std::tie(Mask, VL) =
4515 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4517 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
4520 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
4521 if (VT.isFixedLengthVector())
4522 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
4526 unsigned InterConvOpc =
4527 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
4529 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
4530 SDValue IntermediateConv =
4531 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
4533 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
4534 if (VT.isFixedLengthVector())
4535 return convertFromScalableVector(VT, Result, DAG, Subtarget);
4539 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
4540 // first position of a vector, and that vector is slid up to the insert index.
4541 // By limiting the active vector length to index+1 and merging with the
4542 // original vector (with an undisturbed tail policy for elements >= VL), we
4543 // achieve the desired result of leaving all elements untouched except the one
4544 // at VL-1, which is replaced with the desired value.
4545 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
4546 SelectionDAG &DAG) const {
4548 MVT VecVT = Op.getSimpleValueType();
4549 SDValue Vec = Op.getOperand(0);
4550 SDValue Val = Op.getOperand(1);
4551 SDValue Idx = Op.getOperand(2);
4553 if (VecVT.getVectorElementType() == MVT::i1) {
4554 // FIXME: For now we just promote to an i8 vector and insert into that,
4555 // but this is probably not optimal.
4556 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
4557 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
4558 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
4559 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
4562 MVT ContainerVT = VecVT;
4563 // If the operand is a fixed-length vector, convert to a scalable one.
4564 if (VecVT.isFixedLengthVector()) {
4565 ContainerVT = getContainerForFixedLengthVector(VecVT);
4566 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4569 MVT XLenVT = Subtarget.getXLenVT();
4571 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
4572 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
4573 // Even i64-element vectors on RV32 can be lowered without scalar
4574 // legalization if the most-significant 32 bits of the value are not affected
4575 // by the sign-extension of the lower 32 bits.
4576 // TODO: We could also catch sign extensions of a 32-bit value.
4577 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
4578 const auto *CVal = cast<ConstantSDNode>(Val);
4579 if (isInt<32>(CVal->getSExtValue())) {
4580 IsLegalInsert = true;
4581 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
4586 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4590 if (IsLegalInsert) {
4592 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4593 if (isNullConstant(Idx)) {
4594 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
4595 if (!VecVT.isFixedLengthVector())
4597 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
4600 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
4602 // On RV32, i64-element vectors must be specially handled to place the
4603 // value at element 0, by using two vslide1up instructions in sequence on
4604 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
4606 SDValue One = DAG.getConstant(1, DL, XLenVT);
4607 SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
4608 SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
4609 MVT I32ContainerVT =
4610 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
4612 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
4613 // Limit the active VL to two.
4614 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
4615 // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
4616 // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
4617 ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT,
4618 DAG.getUNDEF(I32ContainerVT), Zero, InsertI64VL);
4619 // First slide in the hi value, then the lo in underneath it.
4620 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT,
4621 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
4622 I32Mask, InsertI64VL);
4623 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT,
4624 DAG.getUNDEF(I32ContainerVT), ValInVec, ValLo,
4625 I32Mask, InsertI64VL);
4626 // Bitcast back to the right container type.
4627 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
4630 // Now that the value is in a vector, slide it into position.
4632 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
4633 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
4634 ValInVec, Idx, Mask, InsertVL);
4635 if (!VecVT.isFixedLengthVector())
4637 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
4640 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
4641 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
4642 // types this is done using VMV_X_S to allow us to glean information about the
4643 // sign bits of the result.
4644 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
4645 SelectionDAG &DAG) const {
4647 SDValue Idx = Op.getOperand(1);
4648 SDValue Vec = Op.getOperand(0);
4649 EVT EltVT = Op.getValueType();
4650 MVT VecVT = Vec.getSimpleValueType();
4651 MVT XLenVT = Subtarget.getXLenVT();
4653 if (VecVT.getVectorElementType() == MVT::i1) {
4654 if (VecVT.isFixedLengthVector()) {
4655 unsigned NumElts = VecVT.getVectorNumElements();
4658 unsigned WidenVecLen;
4659 SDValue ExtractElementIdx;
4660 SDValue ExtractBitIdx;
4661 unsigned MaxEEW = Subtarget.getELEN();
4662 MVT LargestEltVT = MVT::getIntegerVT(
4663 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
4664 if (NumElts <= LargestEltVT.getSizeInBits()) {
4665 assert(isPowerOf2_32(NumElts) &&
4666 "the number of elements should be power of 2");
4667 WideEltVT = MVT::getIntegerVT(NumElts);
4669 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
4670 ExtractBitIdx = Idx;
4672 WideEltVT = LargestEltVT;
4673 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
4674 // extract element index = index / element width
4675 ExtractElementIdx = DAG.getNode(
4676 ISD::SRL, DL, XLenVT, Idx,
4677 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
4678 // mask bit index = index % element width
4679 ExtractBitIdx = DAG.getNode(
4680 ISD::AND, DL, XLenVT, Idx,
4681 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
4683 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
4684 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
4685 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
4686 Vec, ExtractElementIdx);
4687 // Extract the bit from GPR.
4688 SDValue ShiftRight =
4689 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
4690 return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
4691 DAG.getConstant(1, DL, XLenVT));
4694 // Otherwise, promote to an i8 vector and extract from that.
4695 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
4696 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
4697 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
4700 // If this is a fixed vector, we need to convert it to a scalable vector.
4701 MVT ContainerVT = VecVT;
4702 if (VecVT.isFixedLengthVector()) {
4703 ContainerVT = getContainerForFixedLengthVector(VecVT);
4704 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4707 // If the index is 0, the vector is already in the right position.
4708 if (!isNullConstant(Idx)) {
4709 // Use a VL of 1 to avoid processing more elements than we need.
4710 SDValue VL = DAG.getConstant(1, DL, XLenVT);
4711 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
4712 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
4713 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
4716 if (!EltVT.isInteger()) {
4717 // Floating-point extracts are handled in TableGen.
4718 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
4719 DAG.getConstant(0, DL, XLenVT));
4722 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
4723 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
4726 // Some RVV intrinsics may claim that they want an integer operand to be
4727 // promoted or expanded.
4728 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
4729 const RISCVSubtarget &Subtarget) {
4730 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4731 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
4732 "Unexpected opcode");
4734 if (!Subtarget.hasVInstructions())
4737 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
4738 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
4741 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
4742 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
4743 if (!II || !II->hasScalarOperand())
4746 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
4747 assert(SplatOp < Op.getNumOperands());
4749 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
4750 SDValue &ScalarOp = Operands[SplatOp];
4751 MVT OpVT = ScalarOp.getSimpleValueType();
4752 MVT XLenVT = Subtarget.getXLenVT();
4754 // If this isn't a scalar, or its type is XLenVT we're done.
4755 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
4758 // Simplest case is that the operand needs to be promoted to XLenVT.
4759 if (OpVT.bitsLT(XLenVT)) {
4760 // If the operand is a constant, sign extend to increase our chances
4761 // of being able to use a .vi instruction. ANY_EXTEND would become a
4762 // a zero extend and the simm5 check in isel would fail.
4763 // FIXME: Should we ignore the upper bits in isel instead?
4765 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4766 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
4767 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4770 // Use the previous operand to get the vXi64 VT. The result might be a mask
4771 // VT for compares. Using the previous operand assumes that the previous
4772 // operand will never have a smaller element size than a scalar operand and
4773 // that a widening operation never uses SEW=64.
4774 // NOTE: If this fails the below assert, we can probably just find the
4775 // element count from any operand or result and use it to construct the VT.
4776 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
4777 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
4779 // The more complex case is when the scalar is larger than XLenVT.
4780 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
4781 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
4783 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
4784 // instruction to sign-extend since SEW>XLEN.
4785 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
4786 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
4787 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4791 case Intrinsic::riscv_vslide1up:
4792 case Intrinsic::riscv_vslide1down:
4793 case Intrinsic::riscv_vslide1up_mask:
4794 case Intrinsic::riscv_vslide1down_mask: {
4795 // We need to special case these when the scalar is larger than XLen.
4796 unsigned NumOps = Op.getNumOperands();
4797 bool IsMasked = NumOps == 7;
4799 // Convert the vector source to the equivalent nxvXi32 vector.
4800 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4801 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
4803 SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp,
4804 DAG.getConstant(0, DL, XLenVT));
4805 SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp,
4806 DAG.getConstant(1, DL, XLenVT));
4808 // Double the VL since we halved SEW.
4809 SDValue AVL = getVLOperand(Op);
4812 // Optimize for constant AVL
4813 if (isa<ConstantSDNode>(AVL)) {
4814 unsigned EltSize = VT.getScalarSizeInBits();
4815 unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
4817 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
4819 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
4821 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
4823 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
4825 uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
4826 if (AVLInt <= MinVLMAX) {
4827 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
4828 } else if (AVLInt >= 2 * MaxVLMAX) {
4829 // Just set vl to VLMAX in this situation
4830 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
4831 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
4832 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
4833 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
4834 SDValue SETVLMAX = DAG.getTargetConstant(
4835 Intrinsic::riscv_vsetvlimax_opt, DL, MVT::i32);
4836 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
4839 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
4840 // is related to the hardware implementation.
4841 // So let the following code handle
4845 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
4846 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
4847 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
4848 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
4850 DAG.getTargetConstant(Intrinsic::riscv_vsetvli_opt, DL, MVT::i32);
4851 // Using vsetvli instruction to get actually used length which related to
4852 // the hardware implementation
4853 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
4856 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
4859 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
4861 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
4865 Passthru = DAG.getUNDEF(I32VT);
4867 Passthru = DAG.getBitcast(I32VT, Operands[1]);
4869 if (IntNo == Intrinsic::riscv_vslide1up ||
4870 IntNo == Intrinsic::riscv_vslide1up_mask) {
4871 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
4872 ScalarHi, I32Mask, I32VL);
4873 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
4874 ScalarLo, I32Mask, I32VL);
4876 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
4877 ScalarLo, I32Mask, I32VL);
4878 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
4879 ScalarHi, I32Mask, I32VL);
4882 // Convert back to nxvXi64.
4883 Vec = DAG.getBitcast(VT, Vec);
4887 // Apply mask after the operation.
4888 SDValue Mask = Operands[NumOps - 3];
4889 SDValue MaskedOff = Operands[1];
4890 // Assume Policy operand is the last operand.
4892 cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue();
4893 // We don't need to select maskedoff if it's undef.
4894 if (MaskedOff.isUndef())
4897 if (Policy == RISCVII::TAIL_AGNOSTIC)
4898 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
4900 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
4901 // It's fine because vmerge does not care mask policy.
4902 return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff,
4907 // We need to convert the scalar to a splat vector.
4908 SDValue VL = getVLOperand(Op);
4909 assert(VL.getValueType() == XLenVT);
4910 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
4911 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
4914 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
4915 SelectionDAG &DAG) const {
4916 unsigned IntNo = Op.getConstantOperandVal(0);
4918 MVT XLenVT = Subtarget.getXLenVT();
4922 break; // Don't custom lower most intrinsics.
4923 case Intrinsic::thread_pointer: {
4924 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4925 return DAG.getRegister(RISCV::X4, PtrVT);
4927 case Intrinsic::riscv_orc_b:
4928 case Intrinsic::riscv_brev8: {
4929 // Lower to the GORCI encoding for orc.b or the GREVI encoding for brev8.
4931 IntNo == Intrinsic::riscv_brev8 ? RISCVISD::GREV : RISCVISD::GORC;
4932 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
4933 DAG.getConstant(7, DL, XLenVT));
4935 case Intrinsic::riscv_grev:
4936 case Intrinsic::riscv_gorc: {
4938 IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
4939 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4941 case Intrinsic::riscv_zip:
4942 case Intrinsic::riscv_unzip: {
4943 // Lower to the SHFLI encoding for zip or the UNSHFLI encoding for unzip.
4944 // For i32 the immediate is 15. For i64 the immediate is 31.
4946 IntNo == Intrinsic::riscv_zip ? RISCVISD::SHFL : RISCVISD::UNSHFL;
4947 unsigned BitWidth = Op.getValueSizeInBits();
4948 assert(isPowerOf2_32(BitWidth) && BitWidth >= 2 && "Unexpected bit width");
4949 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
4950 DAG.getConstant((BitWidth / 2) - 1, DL, XLenVT));
4952 case Intrinsic::riscv_shfl:
4953 case Intrinsic::riscv_unshfl: {
4955 IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
4956 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4958 case Intrinsic::riscv_bcompress:
4959 case Intrinsic::riscv_bdecompress: {
4960 unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
4961 : RISCVISD::BDECOMPRESS;
4962 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
4964 case Intrinsic::riscv_bfp:
4965 return DAG.getNode(RISCVISD::BFP, DL, XLenVT, Op.getOperand(1),
4967 case Intrinsic::riscv_fsl:
4968 return DAG.getNode(RISCVISD::FSL, DL, XLenVT, Op.getOperand(1),
4969 Op.getOperand(2), Op.getOperand(3));
4970 case Intrinsic::riscv_fsr:
4971 return DAG.getNode(RISCVISD::FSR, DL, XLenVT, Op.getOperand(1),
4972 Op.getOperand(2), Op.getOperand(3));
4973 case Intrinsic::riscv_vmv_x_s:
4974 assert(Op.getValueType() == XLenVT && "Unexpected VT!");
4975 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
4977 case Intrinsic::riscv_vmv_v_x:
4978 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
4979 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
4981 case Intrinsic::riscv_vfmv_v_f:
4982 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
4983 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4984 case Intrinsic::riscv_vmv_s_x: {
4985 SDValue Scalar = Op.getOperand(2);
4987 if (Scalar.getValueType().bitsLE(XLenVT)) {
4988 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
4989 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
4990 Op.getOperand(1), Scalar, Op.getOperand(3));
4993 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
4995 // This is an i64 value that lives in two scalar registers. We have to
4996 // insert this in a convoluted way. First we build vXi64 splat containing
4997 // the two values that we assemble using some bit math. Next we'll use
4998 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
4999 // to merge element 0 from our splat into the source vector.
5000 // FIXME: This is probably not the best way to do this, but it is
5001 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
5008 // vmseq.vx mMask, vVid, 0
5009 // vmerge.vvm vDest, vSrc, vVal, mMask
5010 MVT VT = Op.getSimpleValueType();
5011 SDValue Vec = Op.getOperand(1);
5012 SDValue VL = getVLOperand(Op);
5014 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
5015 if (Op.getOperand(1).isUndef())
5017 SDValue SplattedIdx =
5018 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
5019 DAG.getConstant(0, DL, MVT::i32), VL);
5021 MVT MaskVT = getMaskTypeFor(VT);
5022 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
5023 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
5024 SDValue SelectCond =
5025 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
5026 DAG.getCondCode(ISD::SETEQ), Mask, VL);
5027 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
5032 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
5035 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
5036 SelectionDAG &DAG) const {
5037 unsigned IntNo = Op.getConstantOperandVal(1);
5041 case Intrinsic::riscv_masked_strided_load: {
5043 MVT XLenVT = Subtarget.getXLenVT();
5045 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
5046 // the selection of the masked intrinsics doesn't do this for us.
5047 SDValue Mask = Op.getOperand(5);
5048 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5050 MVT VT = Op->getSimpleValueType(0);
5051 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5053 SDValue PassThru = Op.getOperand(2);
5055 MVT MaskVT = getMaskTypeFor(ContainerVT);
5056 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5057 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
5060 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
5062 SDValue IntID = DAG.getTargetConstant(
5063 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
5066 auto *Load = cast<MemIntrinsicSDNode>(Op);
5067 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
5069 Ops.push_back(DAG.getUNDEF(ContainerVT));
5071 Ops.push_back(PassThru);
5072 Ops.push_back(Op.getOperand(3)); // Ptr
5073 Ops.push_back(Op.getOperand(4)); // Stride
5075 Ops.push_back(Mask);
5078 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
5079 Ops.push_back(Policy);
5082 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5084 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
5085 Load->getMemoryVT(), Load->getMemOperand());
5086 SDValue Chain = Result.getValue(1);
5087 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
5088 return DAG.getMergeValues({Result, Chain}, DL);
5090 case Intrinsic::riscv_seg2_load:
5091 case Intrinsic::riscv_seg3_load:
5092 case Intrinsic::riscv_seg4_load:
5093 case Intrinsic::riscv_seg5_load:
5094 case Intrinsic::riscv_seg6_load:
5095 case Intrinsic::riscv_seg7_load:
5096 case Intrinsic::riscv_seg8_load: {
5098 static const Intrinsic::ID VlsegInts[7] = {
5099 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
5100 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
5101 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
5102 Intrinsic::riscv_vlseg8};
5103 unsigned NF = Op->getNumValues() - 1;
5104 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
5105 MVT XLenVT = Subtarget.getXLenVT();
5106 MVT VT = Op->getSimpleValueType(0);
5107 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5109 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
5110 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
5111 auto *Load = cast<MemIntrinsicSDNode>(Op);
5112 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
5113 ContainerVTs.push_back(MVT::Other);
5114 SDVTList VTs = DAG.getVTList(ContainerVTs);
5115 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
5116 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
5117 Ops.push_back(Op.getOperand(2));
5120 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
5121 Load->getMemoryVT(), Load->getMemOperand());
5122 SmallVector<SDValue, 9> Results;
5123 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
5124 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
5126 Results.push_back(Result.getValue(NF));
5127 return DAG.getMergeValues(Results, DL);
5131 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
5134 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
5135 SelectionDAG &DAG) const {
5136 unsigned IntNo = Op.getConstantOperandVal(1);
5140 case Intrinsic::riscv_masked_strided_store: {
5142 MVT XLenVT = Subtarget.getXLenVT();
5144 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
5145 // the selection of the masked intrinsics doesn't do this for us.
5146 SDValue Mask = Op.getOperand(5);
5147 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
5149 SDValue Val = Op.getOperand(2);
5150 MVT VT = Val.getSimpleValueType();
5151 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5153 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
5155 MVT MaskVT = getMaskTypeFor(ContainerVT);
5156 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
5159 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
5161 SDValue IntID = DAG.getTargetConstant(
5162 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
5165 auto *Store = cast<MemIntrinsicSDNode>(Op);
5166 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
5168 Ops.push_back(Op.getOperand(3)); // Ptr
5169 Ops.push_back(Op.getOperand(4)); // Stride
5171 Ops.push_back(Mask);
5174 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
5175 Ops, Store->getMemoryVT(),
5176 Store->getMemOperand());
5183 static MVT getLMUL1VT(MVT VT) {
5184 assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
5185 "Unexpected vector MVT");
5186 return MVT::getScalableVectorVT(
5187 VT.getVectorElementType(),
5188 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
5191 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
5192 switch (ISDOpcode) {
5194 llvm_unreachable("Unhandled reduction");
5195 case ISD::VECREDUCE_ADD:
5196 return RISCVISD::VECREDUCE_ADD_VL;
5197 case ISD::VECREDUCE_UMAX:
5198 return RISCVISD::VECREDUCE_UMAX_VL;
5199 case ISD::VECREDUCE_SMAX:
5200 return RISCVISD::VECREDUCE_SMAX_VL;
5201 case ISD::VECREDUCE_UMIN:
5202 return RISCVISD::VECREDUCE_UMIN_VL;
5203 case ISD::VECREDUCE_SMIN:
5204 return RISCVISD::VECREDUCE_SMIN_VL;
5205 case ISD::VECREDUCE_AND:
5206 return RISCVISD::VECREDUCE_AND_VL;
5207 case ISD::VECREDUCE_OR:
5208 return RISCVISD::VECREDUCE_OR_VL;
5209 case ISD::VECREDUCE_XOR:
5210 return RISCVISD::VECREDUCE_XOR_VL;
5214 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
5218 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
5219 MVT VecVT = Vec.getSimpleValueType();
5220 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
5221 Op.getOpcode() == ISD::VECREDUCE_OR ||
5222 Op.getOpcode() == ISD::VECREDUCE_XOR ||
5223 Op.getOpcode() == ISD::VP_REDUCE_AND ||
5224 Op.getOpcode() == ISD::VP_REDUCE_OR ||
5225 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
5226 "Unexpected reduction lowering");
5228 MVT XLenVT = Subtarget.getXLenVT();
5229 assert(Op.getValueType() == XLenVT &&
5230 "Expected reduction output to be legalized to XLenVT");
5232 MVT ContainerVT = VecVT;
5233 if (VecVT.isFixedLengthVector()) {
5234 ContainerVT = getContainerForFixedLengthVector(VecVT);
5235 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5240 Mask = Op.getOperand(2);
5241 VL = Op.getOperand(3);
5243 std::tie(Mask, VL) =
5244 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
5249 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
5251 switch (Op.getOpcode()) {
5253 llvm_unreachable("Unhandled reduction");
5254 case ISD::VECREDUCE_AND:
5255 case ISD::VP_REDUCE_AND: {
5257 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
5258 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
5259 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
5264 case ISD::VECREDUCE_OR:
5265 case ISD::VP_REDUCE_OR:
5267 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
5271 case ISD::VECREDUCE_XOR:
5272 case ISD::VP_REDUCE_XOR: {
5273 // ((vcpop x) & 1) != 0
5274 SDValue One = DAG.getConstant(1, DL, XLenVT);
5275 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
5276 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
5283 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
5288 // Now include the start value in the operation.
5289 // Note that we must return the start value when no elements are operated
5290 // upon. The vcpop instructions we've emitted in each case above will return
5291 // 0 for an inactive vector, and so we've already received the neutral value:
5292 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
5293 // can simply include the start value.
5294 return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
5297 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
5298 SelectionDAG &DAG) const {
5300 SDValue Vec = Op.getOperand(0);
5301 EVT VecEVT = Vec.getValueType();
5303 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
5305 // Due to ordering in legalize types we may have a vector type that needs to
5306 // be split. Do that manually so we can get down to a legal type.
5307 while (getTypeAction(*DAG.getContext(), VecEVT) ==
5308 TargetLowering::TypeSplitVector) {
5310 std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
5311 VecEVT = Lo.getValueType();
5312 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
5315 // TODO: The type may need to be widened rather than split. Or widened before
5317 if (!isTypeLegal(VecEVT))
5320 MVT VecVT = VecEVT.getSimpleVT();
5321 MVT VecEltVT = VecVT.getVectorElementType();
5322 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
5324 MVT ContainerVT = VecVT;
5325 if (VecVT.isFixedLengthVector()) {
5326 ContainerVT = getContainerForFixedLengthVector(VecVT);
5327 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5330 MVT M1VT = getLMUL1VT(ContainerVT);
5331 MVT XLenVT = Subtarget.getXLenVT();
5334 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
5336 SDValue NeutralElem =
5337 DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
5338 SDValue IdentitySplat =
5339 lowerScalarSplat(SDValue(), NeutralElem, DAG.getConstant(1, DL, XLenVT),
5340 M1VT, DL, DAG, Subtarget);
5341 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
5342 IdentitySplat, Mask, VL);
5343 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
5344 DAG.getConstant(0, DL, XLenVT));
5345 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
5348 // Given a reduction op, this function returns the matching reduction opcode,
5349 // the vector SDValue and the scalar SDValue required to lower this to a
5351 static std::tuple<unsigned, SDValue, SDValue>
5352 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
5354 auto Flags = Op->getFlags();
5355 unsigned Opcode = Op.getOpcode();
5356 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
5359 llvm_unreachable("Unhandled reduction");
5360 case ISD::VECREDUCE_FADD: {
5361 // Use positive zero if we can. It is cheaper to materialize.
5363 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
5364 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
5366 case ISD::VECREDUCE_SEQ_FADD:
5367 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
5369 case ISD::VECREDUCE_FMIN:
5370 return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
5371 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
5372 case ISD::VECREDUCE_FMAX:
5373 return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
5374 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
5378 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
5379 SelectionDAG &DAG) const {
5381 MVT VecEltVT = Op.getSimpleValueType();
5384 SDValue VectorVal, ScalarVal;
5385 std::tie(RVVOpcode, VectorVal, ScalarVal) =
5386 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
5387 MVT VecVT = VectorVal.getSimpleValueType();
5389 MVT ContainerVT = VecVT;
5390 if (VecVT.isFixedLengthVector()) {
5391 ContainerVT = getContainerForFixedLengthVector(VecVT);
5392 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
5395 MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
5396 MVT XLenVT = Subtarget.getXLenVT();
5399 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
5401 SDValue ScalarSplat =
5402 lowerScalarSplat(SDValue(), ScalarVal, DAG.getConstant(1, DL, XLenVT),
5403 M1VT, DL, DAG, Subtarget);
5404 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
5405 VectorVal, ScalarSplat, Mask, VL);
5406 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
5407 DAG.getConstant(0, DL, XLenVT));
5410 static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
5411 switch (ISDOpcode) {
5413 llvm_unreachable("Unhandled reduction");
5414 case ISD::VP_REDUCE_ADD:
5415 return RISCVISD::VECREDUCE_ADD_VL;
5416 case ISD::VP_REDUCE_UMAX:
5417 return RISCVISD::VECREDUCE_UMAX_VL;
5418 case ISD::VP_REDUCE_SMAX:
5419 return RISCVISD::VECREDUCE_SMAX_VL;
5420 case ISD::VP_REDUCE_UMIN:
5421 return RISCVISD::VECREDUCE_UMIN_VL;
5422 case ISD::VP_REDUCE_SMIN:
5423 return RISCVISD::VECREDUCE_SMIN_VL;
5424 case ISD::VP_REDUCE_AND:
5425 return RISCVISD::VECREDUCE_AND_VL;
5426 case ISD::VP_REDUCE_OR:
5427 return RISCVISD::VECREDUCE_OR_VL;
5428 case ISD::VP_REDUCE_XOR:
5429 return RISCVISD::VECREDUCE_XOR_VL;
5430 case ISD::VP_REDUCE_FADD:
5431 return RISCVISD::VECREDUCE_FADD_VL;
5432 case ISD::VP_REDUCE_SEQ_FADD:
5433 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
5434 case ISD::VP_REDUCE_FMAX:
5435 return RISCVISD::VECREDUCE_FMAX_VL;
5436 case ISD::VP_REDUCE_FMIN:
5437 return RISCVISD::VECREDUCE_FMIN_VL;
5441 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
5442 SelectionDAG &DAG) const {
5444 SDValue Vec = Op.getOperand(1);
5445 EVT VecEVT = Vec.getValueType();
5447 // TODO: The type may need to be widened rather than split. Or widened before
5449 if (!isTypeLegal(VecEVT))
5452 MVT VecVT = VecEVT.getSimpleVT();
5453 MVT VecEltVT = VecVT.getVectorElementType();
5454 unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
5456 MVT ContainerVT = VecVT;
5457 if (VecVT.isFixedLengthVector()) {
5458 ContainerVT = getContainerForFixedLengthVector(VecVT);
5459 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5462 SDValue VL = Op.getOperand(3);
5463 SDValue Mask = Op.getOperand(2);
5465 MVT M1VT = getLMUL1VT(ContainerVT);
5466 MVT XLenVT = Subtarget.getXLenVT();
5467 MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
5469 SDValue StartSplat = lowerScalarSplat(SDValue(), Op.getOperand(0),
5470 DAG.getConstant(1, DL, XLenVT), M1VT,
5471 DL, DAG, Subtarget);
5473 DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
5474 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
5475 DAG.getConstant(0, DL, XLenVT));
5476 if (!VecVT.isInteger())
5478 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
5481 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
5482 SelectionDAG &DAG) const {
5483 SDValue Vec = Op.getOperand(0);
5484 SDValue SubVec = Op.getOperand(1);
5485 MVT VecVT = Vec.getSimpleValueType();
5486 MVT SubVecVT = SubVec.getSimpleValueType();
5489 MVT XLenVT = Subtarget.getXLenVT();
5490 unsigned OrigIdx = Op.getConstantOperandVal(2);
5491 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
5493 // We don't have the ability to slide mask vectors up indexed by their i1
5494 // elements; the smallest we can do is i8. Often we are able to bitcast to
5495 // equivalent i8 vectors. Note that when inserting a fixed-length vector
5496 // into a scalable one, we might not necessarily have enough scalable
5497 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
5498 if (SubVecVT.getVectorElementType() == MVT::i1 &&
5499 (OrigIdx != 0 || !Vec.isUndef())) {
5500 if (VecVT.getVectorMinNumElements() >= 8 &&
5501 SubVecVT.getVectorMinNumElements() >= 8) {
5502 assert(OrigIdx % 8 == 0 && "Invalid index");
5503 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
5504 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
5505 "Unexpected mask vector lowering");
5508 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
5509 SubVecVT.isScalableVector());
5510 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
5511 VecVT.isScalableVector());
5512 Vec = DAG.getBitcast(VecVT, Vec);
5513 SubVec = DAG.getBitcast(SubVecVT, SubVec);
5515 // We can't slide this mask vector up indexed by its i1 elements.
5516 // This poses a problem when we wish to insert a scalable vector which
5517 // can't be re-expressed as a larger type. Just choose the slow path and
5518 // extend to a larger type, then truncate back down.
5519 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
5520 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
5521 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
5522 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
5523 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
5525 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
5526 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
5530 // If the subvector vector is a fixed-length type, we cannot use subregister
5531 // manipulation to simplify the codegen; we don't know which register of a
5532 // LMUL group contains the specific subvector as we only know the minimum
5533 // register size. Therefore we must slide the vector group up the full
5535 if (SubVecVT.isFixedLengthVector()) {
5536 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
5538 MVT ContainerVT = VecVT;
5539 if (VecVT.isFixedLengthVector()) {
5540 ContainerVT = getContainerForFixedLengthVector(VecVT);
5541 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5543 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
5544 DAG.getUNDEF(ContainerVT), SubVec,
5545 DAG.getConstant(0, DL, XLenVT));
5546 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
5547 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
5548 return DAG.getBitcast(Op.getValueType(), SubVec);
5551 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
5552 // Set the vector length to only the number of elements we care about. Note
5553 // that for slideup this includes the offset.
5555 DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
5556 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
5557 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
5558 SubVec, SlideupAmt, Mask, VL);
5559 if (VecVT.isFixedLengthVector())
5560 Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
5561 return DAG.getBitcast(Op.getValueType(), Slideup);
5564 unsigned SubRegIdx, RemIdx;
5565 std::tie(SubRegIdx, RemIdx) =
5566 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
5567 VecVT, SubVecVT, OrigIdx, TRI);
5569 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
5570 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
5571 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
5572 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
5574 // 1. If the Idx has been completely eliminated and this subvector's size is
5575 // a vector register or a multiple thereof, or the surrounding elements are
5576 // undef, then this is a subvector insert which naturally aligns to a vector
5577 // register. These can easily be handled using subregister manipulation.
5578 // 2. If the subvector is smaller than a vector register, then the insertion
5579 // must preserve the undisturbed elements of the register. We do this by
5580 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
5581 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
5582 // subvector within the vector register, and an INSERT_SUBVECTOR of that
5583 // LMUL=1 type back into the larger vector (resolving to another subregister
5584 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
5585 // to avoid allocating a large register group to hold our subvector.
5586 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
5589 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
5590 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
5591 // (in our case undisturbed). This means we can set up a subvector insertion
5592 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
5593 // size of the subvector.
5594 MVT InterSubVT = VecVT;
5595 SDValue AlignedExtract = Vec;
5596 unsigned AlignedIdx = OrigIdx - RemIdx;
5597 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
5598 InterSubVT = getLMUL1VT(VecVT);
5599 // Extract a subvector equal to the nearest full vector register type. This
5600 // should resolve to a EXTRACT_SUBREG instruction.
5601 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
5602 DAG.getConstant(AlignedIdx, DL, XLenVT));
5605 SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
5606 // For scalable vectors this must be further multiplied by vscale.
5607 SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
5610 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
5612 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
5613 VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
5614 VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
5615 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
5617 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
5618 DAG.getUNDEF(InterSubVT), SubVec,
5619 DAG.getConstant(0, DL, XLenVT));
5621 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
5622 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
5624 // If required, insert this subvector back into the correct vector register.
5625 // This should resolve to an INSERT_SUBREG instruction.
5626 if (VecVT.bitsGT(InterSubVT))
5627 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
5628 DAG.getConstant(AlignedIdx, DL, XLenVT));
5630 // We might have bitcast from a mask type: cast back to the original type if
5632 return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
5635 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
5636 SelectionDAG &DAG) const {
5637 SDValue Vec = Op.getOperand(0);
5638 MVT SubVecVT = Op.getSimpleValueType();
5639 MVT VecVT = Vec.getSimpleValueType();
5642 MVT XLenVT = Subtarget.getXLenVT();
5643 unsigned OrigIdx = Op.getConstantOperandVal(1);
5644 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
5646 // We don't have the ability to slide mask vectors down indexed by their i1
5647 // elements; the smallest we can do is i8. Often we are able to bitcast to
5648 // equivalent i8 vectors. Note that when extracting a fixed-length vector
5649 // from a scalable one, we might not necessarily have enough scalable
5650 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
5651 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
5652 if (VecVT.getVectorMinNumElements() >= 8 &&
5653 SubVecVT.getVectorMinNumElements() >= 8) {
5654 assert(OrigIdx % 8 == 0 && "Invalid index");
5655 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
5656 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
5657 "Unexpected mask vector lowering");
5660 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
5661 SubVecVT.isScalableVector());
5662 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
5663 VecVT.isScalableVector());
5664 Vec = DAG.getBitcast(VecVT, Vec);
5666 // We can't slide this mask vector down, indexed by its i1 elements.
5667 // This poses a problem when we wish to extract a scalable vector which
5668 // can't be re-expressed as a larger type. Just choose the slow path and
5669 // extend to a larger type, then truncate back down.
5670 // TODO: We could probably improve this when extracting certain fixed
5671 // from fixed, where we can extract as i8 and shift the correct element
5672 // right to reach the desired subvector?
5673 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
5674 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
5675 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
5676 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
5678 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
5679 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
5683 // If the subvector vector is a fixed-length type, we cannot use subregister
5684 // manipulation to simplify the codegen; we don't know which register of a
5685 // LMUL group contains the specific subvector as we only know the minimum
5686 // register size. Therefore we must slide the vector group down the full
5688 if (SubVecVT.isFixedLengthVector()) {
5689 // With an index of 0 this is a cast-like subvector, which can be performed
5690 // with subregister operations.
5693 MVT ContainerVT = VecVT;
5694 if (VecVT.isFixedLengthVector()) {
5695 ContainerVT = getContainerForFixedLengthVector(VecVT);
5696 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
5699 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
5700 // Set the vector length to only the number of elements we care about. This
5701 // avoids sliding down elements we're going to discard straight away.
5702 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
5703 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
5705 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
5706 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
5707 // Now we can use a cast-like subvector extract to get the result.
5708 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
5709 DAG.getConstant(0, DL, XLenVT));
5710 return DAG.getBitcast(Op.getValueType(), Slidedown);
5713 unsigned SubRegIdx, RemIdx;
5714 std::tie(SubRegIdx, RemIdx) =
5715 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
5716 VecVT, SubVecVT, OrigIdx, TRI);
5718 // If the Idx has been completely eliminated then this is a subvector extract
5719 // which naturally aligns to a vector register. These can easily be handled
5720 // using subregister manipulation.
5724 // Else we must shift our vector register directly to extract the subvector.
5725 // Do this using VSLIDEDOWN.
5727 // If the vector type is an LMUL-group type, extract a subvector equal to the
5728 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
5730 MVT InterSubVT = VecVT;
5731 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
5732 InterSubVT = getLMUL1VT(VecVT);
5733 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
5734 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
5737 // Slide this vector register down by the desired number of elements in order
5738 // to place the desired subvector starting at element 0.
5739 SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
5740 // For scalable vectors this must be further multiplied by vscale.
5741 SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
5744 std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
5746 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
5747 DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
5749 // Now the vector is in the right position, extract our final subvector. This
5750 // should resolve to a COPY.
5751 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
5752 DAG.getConstant(0, DL, XLenVT));
5754 // We might have bitcast from a mask type: cast back to the original type if
5756 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
5759 // Lower step_vector to the vid instruction. Any non-identity step value must
5760 // be accounted for my manual expansion.
5761 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
5762 SelectionDAG &DAG) const {
5764 MVT VT = Op.getSimpleValueType();
5765 MVT XLenVT = Subtarget.getXLenVT();
5767 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
5768 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
5769 uint64_t StepValImm = Op.getConstantOperandVal(0);
5770 if (StepValImm != 1) {
5771 if (isPowerOf2_64(StepValImm)) {
5773 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
5774 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
5775 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
5777 SDValue StepVal = lowerScalarSplat(
5778 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
5779 VL, VT, DL, DAG, Subtarget);
5780 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
5786 // Implement vector_reverse using vrgather.vv with indices determined by
5787 // subtracting the id of each element from (VLMAX-1). This will convert
5788 // the indices like so:
5789 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
5790 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
5791 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
5792 SelectionDAG &DAG) const {
5794 MVT VecVT = Op.getSimpleValueType();
5795 if (VecVT.getVectorElementType() == MVT::i1) {
5796 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
5797 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
5798 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
5799 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
5801 unsigned EltSize = VecVT.getScalarSizeInBits();
5802 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
5803 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
5805 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
5807 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
5808 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
5810 // If this is SEW=8 and VLMAX is potentially more than 256, we need
5811 // to use vrgatherei16.vv.
5812 // TODO: It's also possible to use vrgatherei16.vv for other types to
5813 // decrease register width for the index calculation.
5814 if (MaxVLMAX > 256 && EltSize == 8) {
5815 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
5816 // Reverse each half, then reassemble them in reverse order.
5817 // NOTE: It's also possible that after splitting that VLMAX no longer
5818 // requires vrgatherei16.vv.
5819 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
5821 std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
5823 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
5824 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
5825 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
5826 // Reassemble the low and high pieces reversed.
5827 // FIXME: This is a CONCAT_VECTORS.
5829 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
5830 DAG.getIntPtrConstant(0, DL));
5832 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
5833 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
5836 // Just promote the int type to i16 which will double the LMUL.
5837 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
5838 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
5841 MVT XLenVT = Subtarget.getXLenVT();
5843 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
5845 // Calculate VLMAX-1 for the desired SEW.
5846 unsigned MinElts = VecVT.getVectorMinNumElements();
5847 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
5848 DAG.getConstant(MinElts, DL, XLenVT));
5850 DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
5852 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
5854 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
5857 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
5859 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
5860 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
5862 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
5863 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
5864 DAG.getUNDEF(IntVT), Mask, VL);
5866 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
5867 DAG.getUNDEF(VecVT), Mask, VL);
5870 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
5871 SelectionDAG &DAG) const {
5873 SDValue V1 = Op.getOperand(0);
5874 SDValue V2 = Op.getOperand(1);
5875 MVT XLenVT = Subtarget.getXLenVT();
5876 MVT VecVT = Op.getSimpleValueType();
5878 unsigned MinElts = VecVT.getVectorMinNumElements();
5879 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
5880 DAG.getConstant(MinElts, DL, XLenVT));
5882 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
5883 SDValue DownOffset, UpOffset;
5884 if (ImmValue >= 0) {
5885 // The operand is a TargetConstant, we need to rebuild it as a regular
5887 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
5888 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
5890 // The operand is a TargetConstant, we need to rebuild it as a regular
5891 // constant rather than negating the original operand.
5892 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
5893 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
5896 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
5899 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, DAG.getUNDEF(VecVT), V1,
5900 DownOffset, TrueMask, UpOffset);
5901 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VecVT, SlideDown, V2, UpOffset,
5902 TrueMask, DAG.getRegister(RISCV::X0, XLenVT));
5906 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
5907 SelectionDAG &DAG) const {
5909 auto *Load = cast<LoadSDNode>(Op);
5911 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5912 Load->getMemoryVT(),
5913 *Load->getMemOperand()) &&
5914 "Expecting a correctly-aligned load");
5916 MVT VT = Op.getSimpleValueType();
5917 MVT XLenVT = Subtarget.getXLenVT();
5918 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5920 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
5922 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
5923 SDValue IntID = DAG.getTargetConstant(
5924 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
5925 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
5927 Ops.push_back(DAG.getUNDEF(ContainerVT));
5928 Ops.push_back(Load->getBasePtr());
5930 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5932 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
5933 Load->getMemoryVT(), Load->getMemOperand());
5935 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5936 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
5940 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
5941 SelectionDAG &DAG) const {
5943 auto *Store = cast<StoreSDNode>(Op);
5945 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5946 Store->getMemoryVT(),
5947 *Store->getMemOperand()) &&
5948 "Expecting a correctly-aligned store");
5950 SDValue StoreVal = Store->getValue();
5951 MVT VT = StoreVal.getSimpleValueType();
5952 MVT XLenVT = Subtarget.getXLenVT();
5954 // If the size less than a byte, we need to pad with zeros to make a byte.
5955 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
5957 StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
5958 DAG.getConstant(0, DL, VT), StoreVal,
5959 DAG.getIntPtrConstant(0, DL));
5962 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5964 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
5967 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
5969 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
5970 SDValue IntID = DAG.getTargetConstant(
5971 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
5972 return DAG.getMemIntrinsicNode(
5973 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
5974 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
5975 Store->getMemoryVT(), Store->getMemOperand());
5978 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
5979 SelectionDAG &DAG) const {
5981 MVT VT = Op.getSimpleValueType();
5983 const auto *MemSD = cast<MemSDNode>(Op);
5984 EVT MemVT = MemSD->getMemoryVT();
5985 MachineMemOperand *MMO = MemSD->getMemOperand();
5986 SDValue Chain = MemSD->getChain();
5987 SDValue BasePtr = MemSD->getBasePtr();
5989 SDValue Mask, PassThru, VL;
5990 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
5991 Mask = VPLoad->getMask();
5992 PassThru = DAG.getUNDEF(VT);
5993 VL = VPLoad->getVectorLength();
5995 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
5996 Mask = MLoad->getMask();
5997 PassThru = MLoad->getPassThru();
6000 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
6002 MVT XLenVT = Subtarget.getXLenVT();
6004 MVT ContainerVT = VT;
6005 if (VT.isFixedLengthVector()) {
6006 ContainerVT = getContainerForFixedLengthVector(VT);
6007 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
6009 MVT MaskVT = getMaskTypeFor(ContainerVT);
6010 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6015 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6018 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
6019 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
6021 Ops.push_back(DAG.getUNDEF(ContainerVT));
6023 Ops.push_back(PassThru);
6024 Ops.push_back(BasePtr);
6026 Ops.push_back(Mask);
6029 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
6031 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
6034 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
6035 Chain = Result.getValue(1);
6037 if (VT.isFixedLengthVector())
6038 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
6040 return DAG.getMergeValues({Result, Chain}, DL);
6043 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
6044 SelectionDAG &DAG) const {
6047 const auto *MemSD = cast<MemSDNode>(Op);
6048 EVT MemVT = MemSD->getMemoryVT();
6049 MachineMemOperand *MMO = MemSD->getMemOperand();
6050 SDValue Chain = MemSD->getChain();
6051 SDValue BasePtr = MemSD->getBasePtr();
6052 SDValue Val, Mask, VL;
6054 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
6055 Val = VPStore->getValue();
6056 Mask = VPStore->getMask();
6057 VL = VPStore->getVectorLength();
6059 const auto *MStore = cast<MaskedStoreSDNode>(Op);
6060 Val = MStore->getValue();
6061 Mask = MStore->getMask();
6064 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
6066 MVT VT = Val.getSimpleValueType();
6067 MVT XLenVT = Subtarget.getXLenVT();
6069 MVT ContainerVT = VT;
6070 if (VT.isFixedLengthVector()) {
6071 ContainerVT = getContainerForFixedLengthVector(VT);
6073 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
6075 MVT MaskVT = getMaskTypeFor(ContainerVT);
6076 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6081 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6084 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
6085 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
6087 Ops.push_back(BasePtr);
6089 Ops.push_back(Mask);
6092 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
6093 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
6097 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
6098 SelectionDAG &DAG) const {
6099 MVT InVT = Op.getOperand(0).getSimpleValueType();
6100 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
6102 MVT VT = Op.getSimpleValueType();
6105 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
6107 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
6111 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
6113 MVT MaskVT = getMaskTypeFor(ContainerVT);
6114 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
6116 SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
6117 Op.getOperand(2), Mask, VL);
6119 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
6122 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
6123 SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
6124 MVT VT = Op.getSimpleValueType();
6126 if (VT.getVectorElementType() == MVT::i1)
6127 return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMergeOp*/ false,
6130 return lowerToScalableOp(Op, DAG, VecOpc, /*HasMergeOp*/ true);
6134 RISCVTargetLowering::lowerFixedLengthVectorShiftToRVV(SDValue Op,
6135 SelectionDAG &DAG) const {
6137 switch (Op.getOpcode()) {
6138 default: llvm_unreachable("Unexpected opcode!");
6139 case ISD::SHL: Opc = RISCVISD::SHL_VL; break;
6140 case ISD::SRA: Opc = RISCVISD::SRA_VL; break;
6141 case ISD::SRL: Opc = RISCVISD::SRL_VL; break;
6144 return lowerToScalableOp(Op, DAG, Opc, /*HasMergeOp*/ true);
6147 // Lower vector ABS to smax(X, sub(0, X)).
6148 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
6150 MVT VT = Op.getSimpleValueType();
6151 SDValue X = Op.getOperand(0);
6153 assert(VT.isFixedLengthVector() && "Unexpected type");
6155 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6156 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6159 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6161 SDValue SplatZero = DAG.getNode(
6162 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
6163 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
6164 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
6165 DAG.getUNDEF(ContainerVT), Mask, VL);
6166 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
6167 DAG.getUNDEF(ContainerVT), Mask, VL);
6169 return convertFromScalableVector(VT, Max, DAG, Subtarget);
6172 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
6173 SDValue Op, SelectionDAG &DAG) const {
6175 MVT VT = Op.getSimpleValueType();
6176 SDValue Mag = Op.getOperand(0);
6177 SDValue Sign = Op.getOperand(1);
6178 assert(Mag.getValueType() == Sign.getValueType() &&
6179 "Can only handle COPYSIGN with matching types.");
6181 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6182 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
6183 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
6186 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6188 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
6189 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
6191 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
6194 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
6195 SDValue Op, SelectionDAG &DAG) const {
6196 MVT VT = Op.getSimpleValueType();
6197 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6200 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
6203 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
6205 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
6207 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
6211 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6214 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
6216 return convertFromScalableVector(VT, Select, DAG, Subtarget);
6219 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
6220 unsigned NewOpc, bool HasMergeOp,
6221 bool HasMask) const {
6222 MVT VT = Op.getSimpleValueType();
6223 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6225 // Create list of operands by converting existing ones to scalable types.
6226 SmallVector<SDValue, 6> Ops;
6227 for (const SDValue &V : Op->op_values()) {
6228 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
6230 // Pass through non-vector operands.
6231 if (!V.getValueType().isVector()) {
6236 // "cast" fixed length vector to a scalable vector.
6237 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
6238 "Only fixed length vectors are supported!");
6239 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
6244 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6246 Ops.push_back(DAG.getUNDEF(ContainerVT));
6248 Ops.push_back(Mask);
6251 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
6252 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
6255 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
6256 // * Operands of each node are assumed to be in the same order.
6257 // * The EVL operand is promoted from i32 to i64 on RV64.
6258 // * Fixed-length vectors are converted to their scalable-vector container
6260 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
6261 unsigned RISCVISDOpc,
6262 bool HasMergeOp) const {
6264 MVT VT = Op.getSimpleValueType();
6265 SmallVector<SDValue, 4> Ops;
6267 MVT ContainerVT = VT;
6268 if (VT.isFixedLengthVector())
6269 ContainerVT = getContainerForFixedLengthVector(VT);
6271 for (const auto &OpIdx : enumerate(Op->ops())) {
6272 SDValue V = OpIdx.value();
6273 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
6274 // Add dummy merge value before the mask.
6275 if (HasMergeOp && *ISD::getVPMaskIdx(Op.getOpcode()) == OpIdx.index())
6276 Ops.push_back(DAG.getUNDEF(ContainerVT));
6277 // Pass through operands which aren't fixed-length vectors.
6278 if (!V.getValueType().isFixedLengthVector()) {
6282 // "cast" fixed length vector to a scalable vector.
6283 MVT OpVT = V.getSimpleValueType();
6284 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
6285 assert(useRVVForFixedLengthVectorVT(OpVT) &&
6286 "Only fixed length vectors are supported!");
6287 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
6290 if (!VT.isFixedLengthVector())
6291 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
6293 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
6295 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
6298 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
6299 SelectionDAG &DAG) const {
6301 MVT VT = Op.getSimpleValueType();
6303 SDValue Src = Op.getOperand(0);
6304 // NOTE: Mask is dropped.
6305 SDValue VL = Op.getOperand(2);
6307 MVT ContainerVT = VT;
6308 if (VT.isFixedLengthVector()) {
6309 ContainerVT = getContainerForFixedLengthVector(VT);
6310 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
6311 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
6314 MVT XLenVT = Subtarget.getXLenVT();
6315 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
6316 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6317 DAG.getUNDEF(ContainerVT), Zero, VL);
6319 SDValue SplatValue = DAG.getConstant(
6320 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
6321 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6322 DAG.getUNDEF(ContainerVT), SplatValue, VL);
6324 SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src,
6325 Splat, ZeroSplat, VL);
6326 if (!VT.isFixedLengthVector())
6328 return convertFromScalableVector(VT, Result, DAG, Subtarget);
6331 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
6332 SelectionDAG &DAG) const {
6334 MVT VT = Op.getSimpleValueType();
6336 SDValue Op1 = Op.getOperand(0);
6337 SDValue Op2 = Op.getOperand(1);
6338 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6339 // NOTE: Mask is dropped.
6340 SDValue VL = Op.getOperand(4);
6342 MVT ContainerVT = VT;
6343 if (VT.isFixedLengthVector()) {
6344 ContainerVT = getContainerForFixedLengthVector(VT);
6345 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
6346 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
6350 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
6352 switch (Condition) {
6357 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
6359 // X == Y --> ~(X^Y)
6362 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
6364 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
6367 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
6368 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
6372 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
6373 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
6376 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
6377 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
6381 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
6382 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
6385 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
6386 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
6390 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
6391 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
6394 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
6395 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
6399 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
6400 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
6405 if (!VT.isFixedLengthVector())
6407 return convertFromScalableVector(VT, Result, DAG, Subtarget);
6410 // Lower Floating-Point/Integer Type-Convert VP SDNodes
6411 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
6412 unsigned RISCVISDOpc) const {
6415 SDValue Src = Op.getOperand(0);
6416 SDValue Mask = Op.getOperand(1);
6417 SDValue VL = Op.getOperand(2);
6419 MVT DstVT = Op.getSimpleValueType();
6420 MVT SrcVT = Src.getSimpleValueType();
6421 if (DstVT.isFixedLengthVector()) {
6422 DstVT = getContainerForFixedLengthVector(DstVT);
6423 SrcVT = getContainerForFixedLengthVector(SrcVT);
6424 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
6425 MVT MaskVT = getMaskTypeFor(DstVT);
6426 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6429 unsigned RISCVISDExtOpc = (RISCVISDOpc == RISCVISD::SINT_TO_FP_VL ||
6430 RISCVISDOpc == RISCVISD::FP_TO_SINT_VL)
6431 ? RISCVISD::VSEXT_VL
6432 : RISCVISD::VZEXT_VL;
6434 unsigned DstEltSize = DstVT.getScalarSizeInBits();
6435 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
6438 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
6439 if (SrcVT.isInteger()) {
6440 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
6442 // Do we need to do any pre-widening before converting?
6443 if (SrcEltSize == 1) {
6444 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
6445 MVT XLenVT = Subtarget.getXLenVT();
6446 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
6447 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
6448 DAG.getUNDEF(IntVT), Zero, VL);
6449 SDValue One = DAG.getConstant(
6450 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
6451 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
6452 DAG.getUNDEF(IntVT), One, VL);
6453 Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat,
6455 } else if (DstEltSize > (2 * SrcEltSize)) {
6456 // Widen before converting.
6457 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
6458 DstVT.getVectorElementCount());
6459 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
6462 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
6464 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
6465 "Wrong input/output vector types");
6467 // Convert f16 to f32 then convert f32 to i64.
6468 if (DstEltSize > (2 * SrcEltSize)) {
6469 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
6471 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
6473 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
6476 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
6478 } else { // Narrowing + Conversion
6479 if (SrcVT.isInteger()) {
6480 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
6481 // First do a narrowing convert to an FP type half the size, then round
6482 // the FP type to a small FP type if needed.
6484 MVT InterimFVT = DstVT;
6485 if (SrcEltSize > (2 * DstEltSize)) {
6486 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
6487 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
6488 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
6491 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
6493 if (InterimFVT != DstVT) {
6495 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
6498 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
6499 "Wrong input/output vector types");
6500 // First do a narrowing conversion to an integer half the size, then
6501 // truncate if needed.
6503 if (DstEltSize == 1) {
6504 // First convert to the same size integer, then convert to mask using
6506 assert(SrcEltSize >= 16 && "Unexpected FP type!");
6507 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
6508 DstVT.getVectorElementCount());
6509 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
6511 // Compare the integer result to 0. The integer should be 0 or 1/-1,
6512 // otherwise the conversion was undefined.
6513 MVT XLenVT = Subtarget.getXLenVT();
6514 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6515 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
6516 DAG.getUNDEF(InterimIVT), SplatZero);
6517 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, Result, SplatZero,
6518 DAG.getCondCode(ISD::SETNE), Mask, VL);
6520 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6521 DstVT.getVectorElementCount());
6523 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
6525 while (InterimIVT != DstVT) {
6528 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6529 DstVT.getVectorElementCount());
6530 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
6537 MVT VT = Op.getSimpleValueType();
6538 if (!VT.isFixedLengthVector())
6540 return convertFromScalableVector(VT, Result, DAG, Subtarget);
6543 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
6545 unsigned VecOpc) const {
6546 MVT VT = Op.getSimpleValueType();
6547 if (VT.getVectorElementType() != MVT::i1)
6548 return lowerVPOp(Op, DAG, VecOpc, true);
6550 // It is safe to drop mask parameter as masked-off elements are undef.
6551 SDValue Op1 = Op->getOperand(0);
6552 SDValue Op2 = Op->getOperand(1);
6553 SDValue VL = Op->getOperand(3);
6555 MVT ContainerVT = VT;
6556 const bool IsFixed = VT.isFixedLengthVector();
6558 ContainerVT = getContainerForFixedLengthVector(VT);
6559 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
6560 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
6564 SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
6567 return convertFromScalableVector(VT, Val, DAG, Subtarget);
6570 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
6571 SelectionDAG &DAG) const {
6573 MVT XLenVT = Subtarget.getXLenVT();
6574 MVT VT = Op.getSimpleValueType();
6575 MVT ContainerVT = VT;
6576 if (VT.isFixedLengthVector())
6577 ContainerVT = getContainerForFixedLengthVector(VT);
6579 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
6581 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
6582 // Check if the mask is known to be all ones
6583 SDValue Mask = VPNode->getMask();
6584 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
6586 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
6587 : Intrinsic::riscv_vlse_mask,
6589 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
6590 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
6591 VPNode->getStride()};
6593 if (VT.isFixedLengthVector()) {
6594 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
6595 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6597 Ops.push_back(Mask);
6599 Ops.push_back(VPNode->getVectorLength());
6601 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
6602 Ops.push_back(Policy);
6606 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
6607 VPNode->getMemoryVT(), VPNode->getMemOperand());
6608 SDValue Chain = Result.getValue(1);
6610 if (VT.isFixedLengthVector())
6611 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
6613 return DAG.getMergeValues({Result, Chain}, DL);
6616 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
6617 SelectionDAG &DAG) const {
6619 MVT XLenVT = Subtarget.getXLenVT();
6621 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
6622 SDValue StoreVal = VPNode->getValue();
6623 MVT VT = StoreVal.getSimpleValueType();
6624 MVT ContainerVT = VT;
6625 if (VT.isFixedLengthVector()) {
6626 ContainerVT = getContainerForFixedLengthVector(VT);
6627 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
6630 // Check if the mask is known to be all ones
6631 SDValue Mask = VPNode->getMask();
6632 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
6634 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
6635 : Intrinsic::riscv_vsse_mask,
6637 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
6638 VPNode->getBasePtr(), VPNode->getStride()};
6640 if (VT.isFixedLengthVector()) {
6641 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
6642 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6644 Ops.push_back(Mask);
6646 Ops.push_back(VPNode->getVectorLength());
6648 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
6649 Ops, VPNode->getMemoryVT(),
6650 VPNode->getMemOperand());
6653 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
6654 // matched to a RVV indexed load. The RVV indexed load instructions only
6655 // support the "unsigned unscaled" addressing mode; indices are implicitly
6656 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
6657 // signed or scaled indexing is extended to the XLEN value type and scaled
6659 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
6660 SelectionDAG &DAG) const {
6662 MVT VT = Op.getSimpleValueType();
6664 const auto *MemSD = cast<MemSDNode>(Op.getNode());
6665 EVT MemVT = MemSD->getMemoryVT();
6666 MachineMemOperand *MMO = MemSD->getMemOperand();
6667 SDValue Chain = MemSD->getChain();
6668 SDValue BasePtr = MemSD->getBasePtr();
6670 ISD::LoadExtType LoadExtType;
6671 SDValue Index, Mask, PassThru, VL;
6673 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
6674 Index = VPGN->getIndex();
6675 Mask = VPGN->getMask();
6676 PassThru = DAG.getUNDEF(VT);
6677 VL = VPGN->getVectorLength();
6678 // VP doesn't support extending loads.
6679 LoadExtType = ISD::NON_EXTLOAD;
6681 // Else it must be a MGATHER.
6682 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
6683 Index = MGN->getIndex();
6684 Mask = MGN->getMask();
6685 PassThru = MGN->getPassThru();
6686 LoadExtType = MGN->getExtensionType();
6689 MVT IndexVT = Index.getSimpleValueType();
6690 MVT XLenVT = Subtarget.getXLenVT();
6692 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
6694 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
6695 // Targets have to explicitly opt-in for extending vector loads.
6696 assert(LoadExtType == ISD::NON_EXTLOAD &&
6697 "Unexpected extending MGATHER/VP_GATHER");
6700 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
6701 // the selection of the masked intrinsics doesn't do this for us.
6702 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
6704 MVT ContainerVT = VT;
6705 if (VT.isFixedLengthVector()) {
6706 ContainerVT = getContainerForFixedLengthVector(VT);
6707 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
6708 ContainerVT.getVectorElementCount());
6710 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
6713 MVT MaskVT = getMaskTypeFor(ContainerVT);
6714 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6715 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
6720 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6722 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
6723 IndexVT = IndexVT.changeVectorElementType(XLenVT);
6724 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
6726 Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
6731 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
6732 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
6734 Ops.push_back(DAG.getUNDEF(ContainerVT));
6736 Ops.push_back(PassThru);
6737 Ops.push_back(BasePtr);
6738 Ops.push_back(Index);
6740 Ops.push_back(Mask);
6743 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
6745 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
6747 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
6748 Chain = Result.getValue(1);
6750 if (VT.isFixedLengthVector())
6751 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
6753 return DAG.getMergeValues({Result, Chain}, DL);
6756 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
6757 // matched to a RVV indexed store. The RVV indexed store instructions only
6758 // support the "unsigned unscaled" addressing mode; indices are implicitly
6759 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
6760 // signed or scaled indexing is extended to the XLEN value type and scaled
6762 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
6763 SelectionDAG &DAG) const {
6765 const auto *MemSD = cast<MemSDNode>(Op.getNode());
6766 EVT MemVT = MemSD->getMemoryVT();
6767 MachineMemOperand *MMO = MemSD->getMemOperand();
6768 SDValue Chain = MemSD->getChain();
6769 SDValue BasePtr = MemSD->getBasePtr();
6771 bool IsTruncatingStore = false;
6772 SDValue Index, Mask, Val, VL;
6774 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
6775 Index = VPSN->getIndex();
6776 Mask = VPSN->getMask();
6777 Val = VPSN->getValue();
6778 VL = VPSN->getVectorLength();
6779 // VP doesn't support truncating stores.
6780 IsTruncatingStore = false;
6782 // Else it must be a MSCATTER.
6783 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
6784 Index = MSN->getIndex();
6785 Mask = MSN->getMask();
6786 Val = MSN->getValue();
6787 IsTruncatingStore = MSN->isTruncatingStore();
6790 MVT VT = Val.getSimpleValueType();
6791 MVT IndexVT = Index.getSimpleValueType();
6792 MVT XLenVT = Subtarget.getXLenVT();
6794 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
6796 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
6797 // Targets have to explicitly opt-in for extending vector loads and
6798 // truncating vector stores.
6799 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
6800 (void)IsTruncatingStore;
6802 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
6803 // the selection of the masked intrinsics doesn't do this for us.
6804 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
6806 MVT ContainerVT = VT;
6807 if (VT.isFixedLengthVector()) {
6808 ContainerVT = getContainerForFixedLengthVector(VT);
6809 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
6810 ContainerVT.getVectorElementCount());
6812 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
6813 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
6816 MVT MaskVT = getMaskTypeFor(ContainerVT);
6817 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6822 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6824 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
6825 IndexVT = IndexVT.changeVectorElementType(XLenVT);
6826 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
6828 Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
6833 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
6834 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
6836 Ops.push_back(BasePtr);
6837 Ops.push_back(Index);
6839 Ops.push_back(Mask);
6842 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
6843 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
6846 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
6847 SelectionDAG &DAG) const {
6848 const MVT XLenVT = Subtarget.getXLenVT();
6850 SDValue Chain = Op->getOperand(0);
6851 SDValue SysRegNo = DAG.getTargetConstant(
6852 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
6853 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
6854 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
6856 // Encoding used for rounding mode in RISCV differs from that used in
6857 // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a
6858 // table, which consists of a sequence of 4-bit fields, each representing
6859 // corresponding FLT_ROUNDS mode.
6860 static const int Table =
6861 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
6862 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
6863 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
6864 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
6865 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
6868 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
6869 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
6870 DAG.getConstant(Table, DL, XLenVT), Shift);
6871 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
6872 DAG.getConstant(7, DL, XLenVT));
6874 return DAG.getMergeValues({Masked, Chain}, DL);
6877 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
6878 SelectionDAG &DAG) const {
6879 const MVT XLenVT = Subtarget.getXLenVT();
6881 SDValue Chain = Op->getOperand(0);
6882 SDValue RMValue = Op->getOperand(1);
6883 SDValue SysRegNo = DAG.getTargetConstant(
6884 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
6886 // Encoding used for rounding mode in RISCV differs from that used in
6887 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
6888 // a table, which consists of a sequence of 4-bit fields, each representing
6889 // corresponding RISCV mode.
6890 static const unsigned Table =
6891 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
6892 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
6893 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
6894 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
6895 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
6897 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
6898 DAG.getConstant(2, DL, XLenVT));
6899 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
6900 DAG.getConstant(Table, DL, XLenVT), Shift);
6901 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
6902 DAG.getConstant(0x7, DL, XLenVT));
6903 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
6907 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
6908 SelectionDAG &DAG) const {
6909 MachineFunction &MF = DAG.getMachineFunction();
6911 bool isRISCV64 = Subtarget.is64Bit();
6912 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6914 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
6915 return DAG.getFrameIndex(FI, PtrVT);
6918 static RISCVISD::NodeType getRISCVWOpcodeByIntr(unsigned IntNo) {
6921 llvm_unreachable("Unexpected Intrinsic");
6922 case Intrinsic::riscv_bcompress:
6923 return RISCVISD::BCOMPRESSW;
6924 case Intrinsic::riscv_bdecompress:
6925 return RISCVISD::BDECOMPRESSW;
6926 case Intrinsic::riscv_bfp:
6927 return RISCVISD::BFPW;
6928 case Intrinsic::riscv_fsl:
6929 return RISCVISD::FSLW;
6930 case Intrinsic::riscv_fsr:
6931 return RISCVISD::FSRW;
6935 // Converts the given intrinsic to a i64 operation with any extension.
6936 static SDValue customLegalizeToWOpByIntr(SDNode *N, SelectionDAG &DAG,
6939 RISCVISD::NodeType WOpcode = getRISCVWOpcodeByIntr(IntNo);
6940 // Deal with the Instruction Operands
6941 SmallVector<SDValue, 3> NewOps;
6942 for (SDValue Op : drop_begin(N->ops()))
6943 // Promote the operand to i64 type
6944 NewOps.push_back(DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op));
6945 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOps);
6946 // ReplaceNodeResults requires we maintain the same type for the return value.
6947 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
6950 // Returns the opcode of the target-specific SDNode that implements the 32-bit
6951 // form of the given Opcode.
6952 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
6955 llvm_unreachable("Unexpected opcode");
6957 return RISCVISD::SLLW;
6959 return RISCVISD::SRAW;
6961 return RISCVISD::SRLW;
6963 return RISCVISD::DIVW;
6965 return RISCVISD::DIVUW;
6967 return RISCVISD::REMUW;
6969 return RISCVISD::ROLW;
6971 return RISCVISD::RORW;
6975 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
6976 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
6977 // otherwise be promoted to i64, making it difficult to select the
6978 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
6979 // type i8/i16/i32 is lost.
6980 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
6981 unsigned ExtOpc = ISD::ANY_EXTEND) {
6983 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
6984 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
6985 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
6986 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
6987 // ReplaceNodeResults requires we maintain the same type for the return value.
6988 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
6991 // Converts the given 32-bit operation to a i64 operation with signed extension
6992 // semantic to reduce the signed extension instructions.
6993 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
6995 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
6996 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
6997 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
6998 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
6999 DAG.getValueType(MVT::i32));
7000 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
7003 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
7004 SmallVectorImpl<SDValue> &Results,
7005 SelectionDAG &DAG) const {
7007 switch (N->getOpcode()) {
7009 llvm_unreachable("Don't know how to custom type legalize this operation!");
7010 case ISD::STRICT_FP_TO_SINT:
7011 case ISD::STRICT_FP_TO_UINT:
7012 case ISD::FP_TO_SINT:
7013 case ISD::FP_TO_UINT: {
7014 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7015 "Unexpected custom legalisation");
7016 bool IsStrict = N->isStrictFPOpcode();
7017 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
7018 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
7019 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
7020 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
7021 TargetLowering::TypeSoftenFloat) {
7022 if (!isTypeLegal(Op0.getValueType()))
7025 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
7026 : RISCVISD::STRICT_FCVT_WU_RV64;
7027 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
7028 SDValue Res = DAG.getNode(
7029 Opc, DL, VTs, N->getOperand(0), Op0,
7030 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
7031 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
7032 Results.push_back(Res.getValue(1));
7035 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
7037 DAG.getNode(Opc, DL, MVT::i64, Op0,
7038 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
7039 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
7042 // If the FP type needs to be softened, emit a library call using the 'si'
7043 // version. If we left it to default legalization we'd end up with 'di'. If
7044 // the FP type doesn't need to be softened just let generic type
7045 // legalization promote the result type.
7048 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
7050 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
7051 MakeLibCallOptions CallOptions;
7052 EVT OpVT = Op0.getValueType();
7053 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
7054 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
7056 std::tie(Result, Chain) =
7057 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
7058 Results.push_back(Result);
7060 Results.push_back(Chain);
7063 case ISD::READCYCLECOUNTER: {
7064 assert(!Subtarget.is64Bit() &&
7065 "READCYCLECOUNTER only has custom type legalization on riscv32");
7067 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
7069 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
7072 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
7073 Results.push_back(RCW.getValue(2));
7077 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
7078 unsigned XLen = Subtarget.getXLen();
7079 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
7081 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
7082 SDValue LHS = N->getOperand(0);
7083 SDValue RHS = N->getOperand(1);
7084 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
7086 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
7087 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
7088 // We need exactly one side to be unsigned.
7089 if (LHSIsU == RHSIsU)
7092 auto MakeMULPair = [&](SDValue S, SDValue U) {
7093 MVT XLenVT = Subtarget.getXLenVT();
7094 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
7095 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
7096 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
7097 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
7098 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
7101 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
7102 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
7104 // The other operand should be signed, but still prefer MULH when
7106 if (RHSIsU && LHSIsS && !RHSIsS)
7107 Results.push_back(MakeMULPair(LHS, RHS));
7108 else if (LHSIsU && RHSIsS && !LHSIsS)
7109 Results.push_back(MakeMULPair(RHS, LHS));
7117 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7118 "Unexpected custom legalisation");
7119 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
7124 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7125 "Unexpected custom legalisation");
7126 if (N->getOperand(1).getOpcode() != ISD::Constant) {
7127 // If we can use a BSET instruction, allow default promotion to apply.
7128 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
7129 isOneConstant(N->getOperand(0)))
7131 Results.push_back(customLegalizeToWOp(N, DAG));
7135 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
7136 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
7138 if (N->getOpcode() == ISD::SHL) {
7141 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
7143 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
7144 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
7145 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
7146 DAG.getValueType(MVT::i32));
7147 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
7153 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7154 "Unexpected custom legalisation");
7155 Results.push_back(customLegalizeToWOp(N, DAG));
7158 case ISD::CTTZ_ZERO_UNDEF:
7160 case ISD::CTLZ_ZERO_UNDEF: {
7161 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7162 "Unexpected custom legalisation");
7165 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
7167 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
7168 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
7169 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
7170 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
7176 MVT VT = N->getSimpleValueType(0);
7177 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
7178 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
7179 "Unexpected custom legalisation");
7180 // Don't promote division/remainder by constant since we should expand those
7181 // to multiply by magic constant.
7182 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
7183 if (N->getOperand(1).getOpcode() == ISD::Constant &&
7184 !isIntDivCheap(N->getValueType(0), Attr))
7187 // If the input is i32, use ANY_EXTEND since the W instructions don't read
7188 // the upper 32 bits. For other types we need to sign or zero extend
7189 // based on the opcode.
7190 unsigned ExtOpc = ISD::ANY_EXTEND;
7192 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
7195 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
7200 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7201 "Unexpected custom legalisation");
7202 bool IsAdd = N->getOpcode() == ISD::UADDO;
7203 // Create an ADDW or SUBW.
7204 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
7205 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
7207 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
7208 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
7209 DAG.getValueType(MVT::i32));
7212 if (IsAdd && isOneConstant(RHS)) {
7213 // Special case uaddo X, 1 overflowed if the addition result is 0.
7214 // The general case (X + C) < C is not necessarily beneficial. Although we
7215 // reduce the live range of X, we may introduce the materialization of
7216 // constant C, especially when the setcc result is used by branch. We have
7217 // no compare with constant and branch instructions.
7218 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
7219 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
7221 // Sign extend the LHS and perform an unsigned compare with the ADDW
7222 // result. Since the inputs are sign extended from i32, this is equivalent
7223 // to comparing the lower 32 bits.
7224 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
7225 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
7226 IsAdd ? ISD::SETULT : ISD::SETUGT);
7229 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
7230 Results.push_back(Overflow);
7234 case ISD::USUBSAT: {
7235 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7236 "Unexpected custom legalisation");
7237 if (Subtarget.hasStdExtZbb()) {
7238 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
7239 // sign extend allows overflow of the lower 32 bits to be detected on
7240 // the promoted size.
7242 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
7244 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
7245 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
7246 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
7250 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
7251 // promotion for UADDO/USUBO.
7252 Results.push_back(expandAddSubSat(N, DAG));
7256 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7257 "Unexpected custom legalisation");
7259 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
7261 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
7263 // Freeze the source so we can increase it's use count.
7264 Src = DAG.getFreeze(Src);
7266 // Copy sign bit to all bits using the sraiw pattern.
7267 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
7268 DAG.getValueType(MVT::i32));
7269 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
7270 DAG.getConstant(31, DL, MVT::i64));
7272 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
7273 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
7275 // NOTE: The result is only required to be anyextended, but sext is
7276 // consistent with type legalization of sub.
7277 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
7278 DAG.getValueType(MVT::i32));
7279 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
7282 case ISD::BITCAST: {
7283 EVT VT = N->getValueType(0);
7284 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
7285 SDValue Op0 = N->getOperand(0);
7286 EVT Op0VT = Op0.getValueType();
7287 MVT XLenVT = Subtarget.getXLenVT();
7288 if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) {
7289 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
7290 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
7291 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
7292 Subtarget.hasStdExtF()) {
7294 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
7295 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
7296 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
7297 isTypeLegal(Op0VT)) {
7298 // Custom-legalize bitcasts from fixed-length vector types to illegal
7299 // scalar types in order to improve codegen. Bitcast the vector to a
7300 // one-element vector type whose element type is the same as the result
7301 // type, and extract the first element.
7302 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7303 if (isTypeLegal(BVT)) {
7304 SDValue BVec = DAG.getBitcast(BVT, Op0);
7305 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
7306 DAG.getConstant(0, DL, XLenVT)));
7311 case RISCVISD::GREV:
7312 case RISCVISD::GORC:
7313 case RISCVISD::SHFL: {
7314 MVT VT = N->getSimpleValueType(0);
7315 MVT XLenVT = Subtarget.getXLenVT();
7316 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
7317 "Unexpected custom legalisation");
7318 assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
7319 assert((Subtarget.hasStdExtZbp() ||
7320 (Subtarget.hasStdExtZbkb() && N->getOpcode() == RISCVISD::GREV &&
7321 N->getConstantOperandVal(1) == 7)) &&
7322 "Unexpected extension");
7323 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
7325 DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
7326 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp0, NewOp1);
7327 // ReplaceNodeResults requires we maintain the same type for the return
7329 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
7333 case ISD::BITREVERSE: {
7334 MVT VT = N->getSimpleValueType(0);
7335 MVT XLenVT = Subtarget.getXLenVT();
7336 assert((VT == MVT::i8 || VT == MVT::i16 ||
7337 (VT == MVT::i32 && Subtarget.is64Bit())) &&
7338 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
7339 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
7340 unsigned Imm = VT.getSizeInBits() - 1;
7341 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
7342 if (N->getOpcode() == ISD::BSWAP)
7344 SDValue GREVI = DAG.getNode(RISCVISD::GREV, DL, XLenVT, NewOp0,
7345 DAG.getConstant(Imm, DL, XLenVT));
7346 // ReplaceNodeResults requires we maintain the same type for the return
7348 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI));
7353 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7354 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
7356 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
7358 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
7360 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
7361 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
7362 // Mask the shift amount to 5 bits to prevent accidentally setting bit 5.
7363 NewShAmt = DAG.getNode(ISD::AND, DL, MVT::i64, NewShAmt,
7364 DAG.getConstant(0x1f, DL, MVT::i64));
7365 // fshl and fshr concatenate their operands in the same order. fsrw and fslw
7366 // instruction use different orders. fshl will return its first operand for
7367 // shift of zero, fshr will return its second operand. fsl and fsr both
7368 // return rs1 so the ISD nodes need to have different operand orders.
7369 // Shift amount is in rs2.
7370 unsigned Opc = RISCVISD::FSLW;
7371 if (N->getOpcode() == ISD::FSHR) {
7372 std::swap(NewOp0, NewOp1);
7373 Opc = RISCVISD::FSRW;
7375 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewShAmt);
7376 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
7379 case ISD::EXTRACT_VECTOR_ELT: {
7380 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
7381 // type is illegal (currently only vXi64 RV32).
7382 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
7383 // transferred to the destination register. We issue two of these from the
7384 // upper- and lower- halves of the SEW-bit vector element, slid down to the
7386 SDValue Vec = N->getOperand(0);
7387 SDValue Idx = N->getOperand(1);
7389 // The vector type hasn't been legalized yet so we can't issue target
7390 // specific nodes if it needs legalization.
7391 // FIXME: We would manually legalize if it's important.
7392 if (!isTypeLegal(Vec.getValueType()))
7395 MVT VecVT = Vec.getSimpleValueType();
7397 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
7398 VecVT.getVectorElementType() == MVT::i64 &&
7399 "Unexpected EXTRACT_VECTOR_ELT legalization");
7401 // If this is a fixed vector, we need to convert it to a scalable vector.
7402 MVT ContainerVT = VecVT;
7403 if (VecVT.isFixedLengthVector()) {
7404 ContainerVT = getContainerForFixedLengthVector(VecVT);
7405 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7408 MVT XLenVT = Subtarget.getXLenVT();
7410 // Use a VL of 1 to avoid processing more elements than we need.
7411 SDValue VL = DAG.getConstant(1, DL, XLenVT);
7412 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
7414 // Unless the index is known to be 0, we must slide the vector down to get
7415 // the desired element into index 0.
7416 if (!isNullConstant(Idx)) {
7417 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
7418 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
7421 // Extract the lower XLEN bits of the correct vector element.
7422 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
7424 // To extract the upper XLEN bits of the vector element, shift the first
7425 // element right by 32 bits and re-extract the lower XLEN bits.
7426 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7427 DAG.getUNDEF(ContainerVT),
7428 DAG.getConstant(32, DL, XLenVT), VL);
7430 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
7431 DAG.getUNDEF(ContainerVT), Mask, VL);
7433 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
7435 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
7438 case ISD::INTRINSIC_WO_CHAIN: {
7439 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
7443 "Don't know how to custom type legalize this intrinsic!");
7444 case Intrinsic::riscv_grev:
7445 case Intrinsic::riscv_gorc: {
7446 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7447 "Unexpected custom legalisation");
7449 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
7451 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
7453 IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW;
7454 // If the control is a constant, promote the node by clearing any extra
7455 // bits bits in the control. isel will form greviw/gorciw if the result is
7457 if (isa<ConstantSDNode>(NewOp2)) {
7458 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
7459 DAG.getConstant(0x1f, DL, MVT::i64));
7460 Opc = IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
7462 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
7463 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
7466 case Intrinsic::riscv_bcompress:
7467 case Intrinsic::riscv_bdecompress:
7468 case Intrinsic::riscv_bfp:
7469 case Intrinsic::riscv_fsl:
7470 case Intrinsic::riscv_fsr: {
7471 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7472 "Unexpected custom legalisation");
7473 Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo));
7476 case Intrinsic::riscv_orc_b: {
7477 // Lower to the GORCI encoding for orc.b with the operand extended.
7479 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
7480 SDValue Res = DAG.getNode(RISCVISD::GORC, DL, MVT::i64, NewOp,
7481 DAG.getConstant(7, DL, MVT::i64));
7482 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
7485 case Intrinsic::riscv_shfl:
7486 case Intrinsic::riscv_unshfl: {
7487 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
7488 "Unexpected custom legalisation");
7490 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
7492 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
7494 IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
7495 // There is no (UN)SHFLIW. If the control word is a constant, we can use
7496 // (UN)SHFLI with bit 4 of the control word cleared. The upper 32 bit half
7497 // will be shuffled the same way as the lower 32 bit half, but the two
7498 // halves won't cross.
7499 if (isa<ConstantSDNode>(NewOp2)) {
7500 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
7501 DAG.getConstant(0xf, DL, MVT::i64));
7503 IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
7505 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
7506 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
7509 case Intrinsic::riscv_vmv_x_s: {
7510 EVT VT = N->getValueType(0);
7511 MVT XLenVT = Subtarget.getXLenVT();
7512 if (VT.bitsLT(XLenVT)) {
7513 // Simple case just extract using vmv.x.s and truncate.
7514 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
7515 Subtarget.getXLenVT(), N->getOperand(1));
7516 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
7520 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
7521 "Unexpected custom legalization");
7523 // We need to do the move in two steps.
7524 SDValue Vec = N->getOperand(1);
7525 MVT VecVT = Vec.getSimpleValueType();
7527 // First extract the lower XLEN bits of the element.
7528 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
7530 // To extract the upper XLEN bits of the vector element, shift the first
7531 // element right by 32 bits and re-extract the lower XLEN bits.
7532 SDValue VL = DAG.getConstant(1, DL, XLenVT);
7533 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
7535 SDValue ThirtyTwoV =
7536 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
7537 DAG.getConstant(32, DL, XLenVT), VL);
7538 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
7539 DAG.getUNDEF(VecVT), Mask, VL);
7540 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
7543 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
7549 case ISD::VECREDUCE_ADD:
7550 case ISD::VECREDUCE_AND:
7551 case ISD::VECREDUCE_OR:
7552 case ISD::VECREDUCE_XOR:
7553 case ISD::VECREDUCE_SMAX:
7554 case ISD::VECREDUCE_UMAX:
7555 case ISD::VECREDUCE_SMIN:
7556 case ISD::VECREDUCE_UMIN:
7557 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
7558 Results.push_back(V);
7560 case ISD::VP_REDUCE_ADD:
7561 case ISD::VP_REDUCE_AND:
7562 case ISD::VP_REDUCE_OR:
7563 case ISD::VP_REDUCE_XOR:
7564 case ISD::VP_REDUCE_SMAX:
7565 case ISD::VP_REDUCE_UMAX:
7566 case ISD::VP_REDUCE_SMIN:
7567 case ISD::VP_REDUCE_UMIN:
7568 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
7569 Results.push_back(V);
7571 case ISD::FLT_ROUNDS_: {
7572 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
7573 SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
7574 Results.push_back(Res.getValue(0));
7575 Results.push_back(Res.getValue(1));
7581 // A structure to hold one of the bit-manipulation patterns below. Together, a
7582 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
7583 // (or (and (shl x, 1), 0xAAAAAAAA),
7584 // (and (srl x, 1), 0x55555555))
7585 struct RISCVBitmanipPat {
7590 bool formsPairWith(const RISCVBitmanipPat &Other) const {
7591 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
7595 // Matches patterns of the form
7596 // (and (shl x, C2), (C1 << C2))
7597 // (and (srl x, C2), C1)
7598 // (shl (and x, C1), C2)
7599 // (srl (and x, (C1 << C2)), C2)
7600 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
7601 // The expected masks for each shift amount are specified in BitmanipMasks where
7602 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
7603 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
7604 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
7606 static Optional<RISCVBitmanipPat>
7607 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
7608 assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
7609 "Unexpected number of masks");
7610 Optional<uint64_t> Mask;
7611 // Optionally consume a mask around the shift operation.
7612 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
7613 Mask = Op.getConstantOperandVal(1);
7614 Op = Op.getOperand(0);
7616 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
7618 bool IsSHL = Op.getOpcode() == ISD::SHL;
7620 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7622 uint64_t ShAmt = Op.getConstantOperandVal(1);
7624 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
7625 if (ShAmt >= Width || !isPowerOf2_64(ShAmt))
7627 // If we don't have enough masks for 64 bit, then we must be trying to
7628 // match SHFL so we're only allowed to shift 1/4 of the width.
7629 if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
7632 SDValue Src = Op.getOperand(0);
7634 // The expected mask is shifted left when the AND is found around SHL
7636 // ((x >> 1) & 0x55555555)
7637 // ((x << 1) & 0xAAAAAAAA)
7638 bool SHLExpMask = IsSHL;
7641 // Sometimes LLVM keeps the mask as an operand of the shift, typically when
7642 // the mask is all ones: consume that now.
7643 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
7644 Mask = Src.getConstantOperandVal(1);
7645 Src = Src.getOperand(0);
7646 // The expected mask is now in fact shifted left for SRL, so reverse the
7648 // ((x & 0xAAAAAAAA) >> 1)
7649 // ((x & 0x55555555) << 1)
7650 SHLExpMask = !SHLExpMask;
7652 // Use a default shifted mask of all-ones if there's no AND, truncated
7653 // down to the expected width. This simplifies the logic later on.
7654 Mask = maskTrailingOnes<uint64_t>(Width);
7655 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
7659 unsigned MaskIdx = Log2_32(ShAmt);
7660 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
7665 if (Mask != ExpMask)
7668 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
7671 // Matches any of the following bit-manipulation patterns:
7672 // (and (shl x, 1), (0x55555555 << 1))
7673 // (and (srl x, 1), 0x55555555)
7674 // (shl (and x, 0x55555555), 1)
7675 // (srl (and x, (0x55555555 << 1)), 1)
7676 // where the shift amount and mask may vary thus:
7677 // [1] = 0x55555555 / 0xAAAAAAAA
7678 // [2] = 0x33333333 / 0xCCCCCCCC
7679 // [4] = 0x0F0F0F0F / 0xF0F0F0F0
7680 // [8] = 0x00FF00FF / 0xFF00FF00
7681 // [16] = 0x0000FFFF / 0xFFFFFFFF
7682 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
7683 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
7684 // These are the unshifted masks which we use to match bit-manipulation
7685 // patterns. They may be shifted left in certain circumstances.
7686 static const uint64_t BitmanipMasks[] = {
7687 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
7688 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
7690 return matchRISCVBitmanipPat(Op, BitmanipMasks);
7693 // Try to fold (<bop> x, (reduction.<bop> vec, start))
7694 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG) {
7695 auto BinOpToRVVReduce = [](unsigned Opc) {
7698 llvm_unreachable("Unhandled binary to transfrom reduction");
7700 return RISCVISD::VECREDUCE_ADD_VL;
7702 return RISCVISD::VECREDUCE_UMAX_VL;
7704 return RISCVISD::VECREDUCE_SMAX_VL;
7706 return RISCVISD::VECREDUCE_UMIN_VL;
7708 return RISCVISD::VECREDUCE_SMIN_VL;
7710 return RISCVISD::VECREDUCE_AND_VL;
7712 return RISCVISD::VECREDUCE_OR_VL;
7714 return RISCVISD::VECREDUCE_XOR_VL;
7716 return RISCVISD::VECREDUCE_FADD_VL;
7718 return RISCVISD::VECREDUCE_FMAX_VL;
7720 return RISCVISD::VECREDUCE_FMIN_VL;
7724 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
7725 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7726 isNullConstant(V.getOperand(1)) &&
7727 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
7730 unsigned Opc = N->getOpcode();
7732 if (IsReduction(N->getOperand(0), Opc))
7734 else if (IsReduction(N->getOperand(1), Opc))
7739 // Skip if FADD disallows reassociation but the combiner needs.
7740 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
7743 SDValue Extract = N->getOperand(ReduceIdx);
7744 SDValue Reduce = Extract.getOperand(0);
7745 if (!Reduce.hasOneUse())
7748 SDValue ScalarV = Reduce.getOperand(2);
7750 // Make sure that ScalarV is a splat with VL=1.
7751 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
7752 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
7753 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
7756 if (!isOneConstant(ScalarV.getOperand(2)))
7759 // TODO: Deal with value other than neutral element.
7760 auto IsRVVNeutralElement = [Opc, &DAG](SDNode *N, SDValue V) {
7761 if (Opc == ISD::FADD && N->getFlags().hasNoSignedZeros() &&
7762 isNullFPConstant(V))
7764 return DAG.getNeutralElement(Opc, SDLoc(V), V.getSimpleValueType(),
7765 N->getFlags()) == V;
7768 // Check the scalar of ScalarV is neutral element
7769 if (!IsRVVNeutralElement(N, ScalarV.getOperand(1)))
7772 if (!ScalarV.hasOneUse())
7775 EVT SplatVT = ScalarV.getValueType();
7776 SDValue NewStart = N->getOperand(1 - ReduceIdx);
7777 unsigned SplatOpc = RISCVISD::VFMV_S_F_VL;
7778 if (SplatVT.isInteger()) {
7779 auto *C = dyn_cast<ConstantSDNode>(NewStart.getNode());
7780 if (!C || C->isZero() || !isInt<5>(C->getSExtValue()))
7781 SplatOpc = RISCVISD::VMV_S_X_VL;
7783 SplatOpc = RISCVISD::VMV_V_X_VL;
7786 SDValue NewScalarV =
7787 DAG.getNode(SplatOpc, SDLoc(N), SplatVT, ScalarV.getOperand(0), NewStart,
7788 ScalarV.getOperand(2));
7790 DAG.getNode(Reduce.getOpcode(), SDLoc(Reduce), Reduce.getValueType(),
7791 Reduce.getOperand(0), Reduce.getOperand(1), NewScalarV,
7792 Reduce.getOperand(3), Reduce.getOperand(4));
7793 return DAG.getNode(Extract.getOpcode(), SDLoc(Extract),
7794 Extract.getValueType(), NewReduce, Extract.getOperand(1));
7797 // Match the following pattern as a GREVI(W) operation
7798 // (or (BITMANIP_SHL x), (BITMANIP_SRL x))
7799 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
7800 const RISCVSubtarget &Subtarget) {
7801 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
7802 EVT VT = Op.getValueType();
7804 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
7805 auto LHS = matchGREVIPat(Op.getOperand(0));
7806 auto RHS = matchGREVIPat(Op.getOperand(1));
7807 if (LHS && RHS && LHS->formsPairWith(*RHS)) {
7809 return DAG.getNode(RISCVISD::GREV, DL, VT, LHS->Op,
7810 DAG.getConstant(LHS->ShAmt, DL, VT));
7816 // Matches any the following pattern as a GORCI(W) operation
7817 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2
7818 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2
7819 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
7820 // Note that with the variant of 3.,
7821 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
7822 // the inner pattern will first be matched as GREVI and then the outer
7823 // pattern will be matched to GORC via the first rule above.
7824 // 4. (or (rotl/rotr x, bitwidth/2), x)
7825 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
7826 const RISCVSubtarget &Subtarget) {
7827 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
7828 EVT VT = Op.getValueType();
7830 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
7832 SDValue Op0 = Op.getOperand(0);
7833 SDValue Op1 = Op.getOperand(1);
7835 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
7836 if (Reverse.getOpcode() == RISCVISD::GREV && Reverse.getOperand(0) == X &&
7837 isa<ConstantSDNode>(Reverse.getOperand(1)) &&
7838 isPowerOf2_32(Reverse.getConstantOperandVal(1)))
7839 return DAG.getNode(RISCVISD::GORC, DL, VT, X, Reverse.getOperand(1));
7840 // We can also form GORCI from ROTL/ROTR by half the bitwidth.
7841 if ((Reverse.getOpcode() == ISD::ROTL ||
7842 Reverse.getOpcode() == ISD::ROTR) &&
7843 Reverse.getOperand(0) == X &&
7844 isa<ConstantSDNode>(Reverse.getOperand(1))) {
7845 uint64_t RotAmt = Reverse.getConstantOperandVal(1);
7846 if (RotAmt == (VT.getSizeInBits() / 2))
7847 return DAG.getNode(RISCVISD::GORC, DL, VT, X,
7848 DAG.getConstant(RotAmt, DL, VT));
7853 // Check for either commutable permutation of (or (GREVI x, shamt), x)
7854 if (SDValue V = MatchOROfReverse(Op0, Op1))
7856 if (SDValue V = MatchOROfReverse(Op1, Op0))
7859 // OR is commutable so canonicalize its OR operand to the left
7860 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
7861 std::swap(Op0, Op1);
7862 if (Op0.getOpcode() != ISD::OR)
7864 SDValue OrOp0 = Op0.getOperand(0);
7865 SDValue OrOp1 = Op0.getOperand(1);
7866 auto LHS = matchGREVIPat(OrOp0);
7867 // OR is commutable so swap the operands and try again: x might have been
7870 std::swap(OrOp0, OrOp1);
7871 LHS = matchGREVIPat(OrOp0);
7873 auto RHS = matchGREVIPat(Op1);
7874 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
7875 return DAG.getNode(RISCVISD::GORC, DL, VT, LHS->Op,
7876 DAG.getConstant(LHS->ShAmt, DL, VT));
7882 // Matches any of the following bit-manipulation patterns:
7883 // (and (shl x, 1), (0x22222222 << 1))
7884 // (and (srl x, 1), 0x22222222)
7885 // (shl (and x, 0x22222222), 1)
7886 // (srl (and x, (0x22222222 << 1)), 1)
7887 // where the shift amount and mask may vary thus:
7888 // [1] = 0x22222222 / 0x44444444
7889 // [2] = 0x0C0C0C0C / 0x3C3C3C3C
7890 // [4] = 0x00F000F0 / 0x0F000F00
7891 // [8] = 0x0000FF00 / 0x00FF0000
7892 // [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
7893 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
7894 // These are the unshifted masks which we use to match bit-manipulation
7895 // patterns. They may be shifted left in certain circumstances.
7896 static const uint64_t BitmanipMasks[] = {
7897 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
7898 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
7900 return matchRISCVBitmanipPat(Op, BitmanipMasks);
7903 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
7904 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
7905 const RISCVSubtarget &Subtarget) {
7906 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
7907 EVT VT = Op.getValueType();
7909 if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
7912 SDValue Op0 = Op.getOperand(0);
7913 SDValue Op1 = Op.getOperand(1);
7915 // Or is commutable so canonicalize the second OR to the LHS.
7916 if (Op0.getOpcode() != ISD::OR)
7917 std::swap(Op0, Op1);
7918 if (Op0.getOpcode() != ISD::OR)
7921 // We found an inner OR, so our operands are the operands of the inner OR
7922 // and the other operand of the outer OR.
7923 SDValue A = Op0.getOperand(0);
7924 SDValue B = Op0.getOperand(1);
7927 auto Match1 = matchSHFLPat(A);
7928 auto Match2 = matchSHFLPat(B);
7930 // If neither matched, we failed.
7931 if (!Match1 && !Match2)
7934 // We had at least one match. if one failed, try the remaining C operand.
7937 Match1 = matchSHFLPat(A);
7940 } else if (!Match2) {
7942 Match2 = matchSHFLPat(B);
7946 assert(Match1 && Match2);
7948 // Make sure our matches pair up.
7949 if (!Match1->formsPairWith(*Match2))
7952 // All the remains is to make sure C is an AND with the same input, that masks
7953 // out the bits that are being shuffled.
7954 if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
7955 C.getOperand(0) != Match1->Op)
7958 uint64_t Mask = C.getConstantOperandVal(1);
7960 static const uint64_t BitmanipMasks[] = {
7961 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
7962 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
7965 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
7966 unsigned MaskIdx = Log2_32(Match1->ShAmt);
7967 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
7969 if (Mask != ExpMask)
7973 return DAG.getNode(RISCVISD::SHFL, DL, VT, Match1->Op,
7974 DAG.getConstant(Match1->ShAmt, DL, VT));
7977 // Optimize (add (shl x, c0), (shl y, c1)) ->
7978 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
7979 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
7980 const RISCVSubtarget &Subtarget) {
7981 // Perform this optimization only in the zba extension.
7982 if (!Subtarget.hasStdExtZba())
7985 // Skip for vector types and larger types.
7986 EVT VT = N->getValueType(0);
7987 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
7990 // The two operand nodes must be SHL and have no other use.
7991 SDValue N0 = N->getOperand(0);
7992 SDValue N1 = N->getOperand(1);
7993 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
7994 !N0->hasOneUse() || !N1->hasOneUse())
7998 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
7999 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
8002 int64_t C0 = N0C->getSExtValue();
8003 int64_t C1 = N1C->getSExtValue();
8004 if (C0 <= 0 || C1 <= 0)
8007 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
8008 int64_t Bits = std::min(C0, C1);
8009 int64_t Diff = std::abs(C0 - C1);
8010 if (Diff != 1 && Diff != 2 && Diff != 3)
8015 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
8016 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
8018 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
8019 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
8020 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
8024 // ROTR ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32
8025 // ROTL ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32
8026 // ROTR ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64
8027 // ROTL ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64
8028 // RORW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64
8029 // ROLW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64
8030 // The grev patterns represents BSWAP.
8031 // FIXME: This can be generalized to any GREV. We just need to toggle the MSB
8033 static SDValue combineROTR_ROTL_RORW_ROLW(SDNode *N, SelectionDAG &DAG,
8034 const RISCVSubtarget &Subtarget) {
8035 bool IsWInstruction =
8036 N->getOpcode() == RISCVISD::RORW || N->getOpcode() == RISCVISD::ROLW;
8037 assert((N->getOpcode() == ISD::ROTR || N->getOpcode() == ISD::ROTL ||
8039 "Unexpected opcode!");
8040 SDValue Src = N->getOperand(0);
8041 EVT VT = N->getValueType(0);
8044 if (!Subtarget.hasStdExtZbp() || Src.getOpcode() != RISCVISD::GREV)
8047 if (!isa<ConstantSDNode>(N->getOperand(1)) ||
8048 !isa<ConstantSDNode>(Src.getOperand(1)))
8051 unsigned BitWidth = IsWInstruction ? 32 : VT.getSizeInBits();
8052 assert(isPowerOf2_32(BitWidth) && "Expected a power of 2");
8054 // Needs to be a rotate by half the bitwidth for ROTR/ROTL or by 16 for
8055 // RORW/ROLW. And the grev should be the encoding for bswap for this width.
8056 unsigned ShAmt1 = N->getConstantOperandVal(1);
8057 unsigned ShAmt2 = Src.getConstantOperandVal(1);
8058 if (BitWidth < 32 || ShAmt1 != (BitWidth / 2) || ShAmt2 != (BitWidth - 8))
8061 Src = Src.getOperand(0);
8063 // Toggle bit the MSB of the shift.
8064 unsigned CombinedShAmt = ShAmt1 ^ ShAmt2;
8065 if (CombinedShAmt == 0)
8068 SDValue Res = DAG.getNode(
8069 RISCVISD::GREV, DL, VT, Src,
8070 DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
8071 if (!IsWInstruction)
8074 // Sign extend the result to match the behavior of the rotate. This will be
8075 // selected to GREVIW in isel.
8076 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Res,
8077 DAG.getValueType(MVT::i32));
8080 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
8081 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
8082 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
8083 // not undo itself, but they are redundant.
8084 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
8085 bool IsGORC = N->getOpcode() == RISCVISD::GORC;
8086 assert((IsGORC || N->getOpcode() == RISCVISD::GREV) && "Unexpected opcode");
8087 SDValue Src = N->getOperand(0);
8089 if (Src.getOpcode() != N->getOpcode())
8092 if (!isa<ConstantSDNode>(N->getOperand(1)) ||
8093 !isa<ConstantSDNode>(Src.getOperand(1)))
8096 unsigned ShAmt1 = N->getConstantOperandVal(1);
8097 unsigned ShAmt2 = Src.getConstantOperandVal(1);
8098 Src = Src.getOperand(0);
8100 unsigned CombinedShAmt;
8102 CombinedShAmt = ShAmt1 | ShAmt2;
8104 CombinedShAmt = ShAmt1 ^ ShAmt2;
8106 if (CombinedShAmt == 0)
8111 N->getOpcode(), DL, N->getValueType(0), Src,
8112 DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
8115 // Combine a constant select operand into its use:
8117 // (and (select cond, -1, c), x)
8118 // -> (select cond, x, (and x, c)) [AllOnes=1]
8119 // (or (select cond, 0, c), x)
8120 // -> (select cond, x, (or x, c)) [AllOnes=0]
8121 // (xor (select cond, 0, c), x)
8122 // -> (select cond, x, (xor x, c)) [AllOnes=0]
8123 // (add (select cond, 0, c), x)
8124 // -> (select cond, x, (add x, c)) [AllOnes=0]
8125 // (sub x, (select cond, 0, c))
8126 // -> (select cond, x, (sub x, c)) [AllOnes=0]
8127 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
8128 SelectionDAG &DAG, bool AllOnes) {
8129 EVT VT = N->getValueType(0);
8135 if ((Slct.getOpcode() != ISD::SELECT &&
8136 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
8140 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
8141 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
8145 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
8146 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
8147 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
8148 SDValue NonConstantVal;
8149 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
8150 SwapSelectOps = false;
8151 NonConstantVal = FalseVal;
8152 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
8153 SwapSelectOps = true;
8154 NonConstantVal = TrueVal;
8158 // Slct is now know to be the desired identity constant when CC is true.
8160 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
8161 // Unless SwapSelectOps says the condition should be false.
8163 std::swap(TrueVal, FalseVal);
8165 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
8166 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
8167 {Slct.getOperand(0), Slct.getOperand(1),
8168 Slct.getOperand(2), TrueVal, FalseVal});
8170 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
8171 {Slct.getOperand(0), TrueVal, FalseVal});
8174 // Attempt combineSelectAndUse on each operand of a commutative operator N.
8175 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
8177 SDValue N0 = N->getOperand(0);
8178 SDValue N1 = N->getOperand(1);
8179 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes))
8181 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes))
8186 // Transform (add (mul x, c0), c1) ->
8187 // (add (mul (add x, c1/c0), c0), c1%c0).
8188 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
8189 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
8190 // to an infinite loop in DAGCombine if transformed.
8191 // Or transform (add (mul x, c0), c1) ->
8192 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
8193 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
8194 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
8195 // lead to an infinite loop in DAGCombine if transformed.
8196 // Or transform (add (mul x, c0), c1) ->
8197 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
8198 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
8199 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
8200 // lead to an infinite loop in DAGCombine if transformed.
8201 // Or transform (add (mul x, c0), c1) ->
8202 // (mul (add x, c1/c0), c0).
8203 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
8204 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
8205 const RISCVSubtarget &Subtarget) {
8206 // Skip for vector types and larger types.
8207 EVT VT = N->getValueType(0);
8208 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
8210 // The first operand node must be a MUL and has no other use.
8211 SDValue N0 = N->getOperand(0);
8212 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
8214 // Check if c0 and c1 match above conditions.
8215 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
8216 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
8219 // If N0C has multiple uses it's possible one of the cases in
8220 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
8221 // in an infinite loop.
8222 if (!N0C->hasOneUse())
8224 int64_t C0 = N0C->getSExtValue();
8225 int64_t C1 = N1C->getSExtValue();
8227 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
8229 // Search for proper CA (non-zero) and CB that both are simm12.
8230 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
8231 !isInt<12>(C0 * (C1 / C0))) {
8234 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
8235 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
8238 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
8239 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
8244 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
8246 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
8247 DAG.getConstant(CA, DL, VT));
8249 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
8250 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
8253 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
8254 const RISCVSubtarget &Subtarget) {
8255 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
8257 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
8259 if (SDValue V = combineBinOpToReduce(N, DAG))
8261 // fold (add (select lhs, rhs, cc, 0, y), x) ->
8262 // (select lhs, rhs, cc, x, (add x, y))
8263 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
8266 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) {
8267 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
8268 // (select lhs, rhs, cc, x, (sub x, y))
8269 SDValue N0 = N->getOperand(0);
8270 SDValue N1 = N->getOperand(1);
8271 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false);
8274 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
8275 const RISCVSubtarget &Subtarget) {
8276 SDValue N0 = N->getOperand(0);
8277 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
8278 // extending X. This is safe since we only need the LSB after the shift and
8279 // shift amounts larger than 31 would produce poison. If we wait until
8280 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
8281 // to use a BEXT instruction.
8282 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
8283 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
8284 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
8287 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
8288 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
8289 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
8290 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
8291 DAG.getConstant(1, DL, MVT::i64));
8292 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
8295 if (SDValue V = combineBinOpToReduce(N, DAG))
8298 // fold (and (select lhs, rhs, cc, -1, y), x) ->
8299 // (select lhs, rhs, cc, x, (and x, y))
8300 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true);
8303 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
8304 const RISCVSubtarget &Subtarget) {
8305 if (Subtarget.hasStdExtZbp()) {
8306 if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
8308 if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget))
8310 if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget))
8314 if (SDValue V = combineBinOpToReduce(N, DAG))
8316 // fold (or (select cond, 0, y), x) ->
8317 // (select cond, x, (or x, y))
8318 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
8321 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
8322 SDValue N0 = N->getOperand(0);
8323 SDValue N1 = N->getOperand(1);
8325 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
8326 // NOTE: Assumes ROL being legal means ROLW is legal.
8327 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8328 if (N0.getOpcode() == RISCVISD::SLLW &&
8329 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
8330 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
8332 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
8333 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
8336 if (SDValue V = combineBinOpToReduce(N, DAG))
8338 // fold (xor (select cond, 0, y), x) ->
8339 // (select cond, x, (xor x, y))
8340 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
8343 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
8344 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
8345 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
8346 // can become a sext.w instead of a shift pair.
8347 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
8348 const RISCVSubtarget &Subtarget) {
8349 SDValue N0 = N->getOperand(0);
8350 SDValue N1 = N->getOperand(1);
8351 EVT VT = N->getValueType(0);
8352 EVT OpVT = N0.getValueType();
8354 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
8357 // RHS needs to be a constant.
8358 auto *N1C = dyn_cast<ConstantSDNode>(N1);
8362 // LHS needs to be (and X, 0xffffffff).
8363 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
8364 !isa<ConstantSDNode>(N0.getOperand(1)) ||
8365 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
8368 // Looking for an equality compare.
8369 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
8370 if (!isIntEqualitySetCC(Cond))
8373 const APInt &C1 = N1C->getAPIntValue();
8376 // If the constant is larger than 2^32 - 1 it is impossible for both sides
8378 if (C1.getActiveBits() > 32)
8379 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
8381 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
8382 N0.getOperand(0), DAG.getValueType(MVT::i32));
8383 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
8388 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
8389 const RISCVSubtarget &Subtarget) {
8390 SDValue Src = N->getOperand(0);
8391 EVT VT = N->getValueType(0);
8393 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
8394 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
8395 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
8396 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
8399 // Fold (i64 (sext_inreg (abs X), i32)) ->
8400 // (i64 (smax (sext_inreg (neg X), i32), X)) if X has more than 32 sign bits.
8401 // The (sext_inreg (neg X), i32) will be selected to negw by isel. This
8402 // pattern occurs after type legalization of (i32 (abs X)) on RV64 if the user
8403 // of the (i32 (abs X)) is a sext or setcc or something else that causes type
8404 // legalization to add a sext_inreg after the abs. The (i32 (abs X)) will have
8405 // been type legalized to (i64 (abs (sext_inreg X, i32))), but the sext_inreg
8406 // may get combined into an earlier operation so we need to use
8407 // ComputeNumSignBits.
8408 // NOTE: (i64 (sext_inreg (abs X), i32)) can also be created for
8409 // (i64 (ashr (shl (abs X), 32), 32)) without any type legalization so
8410 // we can't assume that X has 33 sign bits. We must check.
8411 if (Subtarget.hasStdExtZbb() && Subtarget.is64Bit() &&
8412 Src.getOpcode() == ISD::ABS && Src.hasOneUse() && VT == MVT::i64 &&
8413 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32 &&
8414 DAG.ComputeNumSignBits(Src.getOperand(0)) > 32) {
8416 SDValue Freeze = DAG.getFreeze(Src.getOperand(0));
8418 DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, MVT::i64), Freeze);
8419 Neg = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Neg,
8420 DAG.getValueType(MVT::i32));
8421 return DAG.getNode(ISD::SMAX, DL, MVT::i64, Freeze, Neg);
8427 // Try to form vwadd(u).wv/wx or vwsub(u).wv/wx. It might later be optimized to
8428 // vwadd(u).vv/vx or vwsub(u).vv/vx.
8429 static SDValue combineADDSUB_VLToVWADDSUB_VL(SDNode *N, SelectionDAG &DAG,
8430 bool Commute = false) {
8431 assert((N->getOpcode() == RISCVISD::ADD_VL ||
8432 N->getOpcode() == RISCVISD::SUB_VL) &&
8433 "Unexpected opcode");
8434 bool IsAdd = N->getOpcode() == RISCVISD::ADD_VL;
8435 SDValue Op0 = N->getOperand(0);
8436 SDValue Op1 = N->getOperand(1);
8438 std::swap(Op0, Op1);
8440 MVT VT = N->getSimpleValueType(0);
8442 // Determine the narrow size for a widening add/sub.
8443 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
8444 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
8445 VT.getVectorElementCount());
8447 SDValue Merge = N->getOperand(2);
8448 SDValue Mask = N->getOperand(3);
8449 SDValue VL = N->getOperand(4);
8453 // If the RHS is a sext or zext, we can form a widening op.
8454 if ((Op1.getOpcode() == RISCVISD::VZEXT_VL ||
8455 Op1.getOpcode() == RISCVISD::VSEXT_VL) &&
8456 Op1.hasOneUse() && Op1.getOperand(1) == Mask && Op1.getOperand(2) == VL) {
8457 unsigned ExtOpc = Op1.getOpcode();
8458 Op1 = Op1.getOperand(0);
8459 // Re-introduce narrower extends if needed.
8460 if (Op1.getValueType() != NarrowVT)
8461 Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
8464 if (ExtOpc == RISCVISD::VSEXT_VL)
8465 WOpc = IsAdd ? RISCVISD::VWADD_W_VL : RISCVISD::VWSUB_W_VL;
8467 WOpc = IsAdd ? RISCVISD::VWADDU_W_VL : RISCVISD::VWSUBU_W_VL;
8469 return DAG.getNode(WOpc, DL, VT, Op0, Op1, Merge, Mask, VL);
8472 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
8478 // Try to convert vwadd(u).wv/wx or vwsub(u).wv/wx to vwadd(u).vv/vx or
8480 static SDValue combineVWADD_W_VL_VWSUB_W_VL(SDNode *N, SelectionDAG &DAG) {
8481 SDValue Op0 = N->getOperand(0);
8482 SDValue Op1 = N->getOperand(1);
8483 SDValue Merge = N->getOperand(2);
8484 SDValue Mask = N->getOperand(3);
8485 SDValue VL = N->getOperand(4);
8487 MVT VT = N->getSimpleValueType(0);
8488 MVT NarrowVT = Op1.getSimpleValueType();
8489 unsigned NarrowSize = NarrowVT.getScalarSizeInBits();
8492 switch (N->getOpcode()) {
8493 default: llvm_unreachable("Unexpected opcode");
8494 case RISCVISD::VWADD_W_VL: VOpc = RISCVISD::VWADD_VL; break;
8495 case RISCVISD::VWSUB_W_VL: VOpc = RISCVISD::VWSUB_VL; break;
8496 case RISCVISD::VWADDU_W_VL: VOpc = RISCVISD::VWADDU_VL; break;
8497 case RISCVISD::VWSUBU_W_VL: VOpc = RISCVISD::VWSUBU_VL; break;
8500 bool IsSigned = N->getOpcode() == RISCVISD::VWADD_W_VL ||
8501 N->getOpcode() == RISCVISD::VWSUB_W_VL;
8505 // If the LHS is a sext or zext, we can narrow this op to the same size as
8507 if (((Op0.getOpcode() == RISCVISD::VZEXT_VL && !IsSigned) ||
8508 (Op0.getOpcode() == RISCVISD::VSEXT_VL && IsSigned)) &&
8509 Op0.hasOneUse() && Op0.getOperand(1) == Mask && Op0.getOperand(2) == VL) {
8510 unsigned ExtOpc = Op0.getOpcode();
8511 Op0 = Op0.getOperand(0);
8512 // Re-introduce narrower extends if needed.
8513 if (Op0.getValueType() != NarrowVT)
8514 Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
8515 return DAG.getNode(VOpc, DL, VT, Op0, Op1, Merge, Mask, VL);
8518 bool IsAdd = N->getOpcode() == RISCVISD::VWADD_W_VL ||
8519 N->getOpcode() == RISCVISD::VWADDU_W_VL;
8521 // Look for splats on the left hand side of a vwadd(u).wv. We might be able
8522 // to commute and use a vwadd(u).vx instead.
8523 if (IsAdd && Op0.getOpcode() == RISCVISD::VMV_V_X_VL &&
8524 Op0.getOperand(0).isUndef() && Op0.getOperand(2) == VL) {
8525 Op0 = Op0.getOperand(1);
8527 // See if have enough sign bits or zero bits in the scalar to use a
8528 // widening add/sub by splatting to smaller element size.
8529 unsigned EltBits = VT.getScalarSizeInBits();
8530 unsigned ScalarBits = Op0.getValueSizeInBits();
8531 // Make sure we're getting all element bits from the scalar register.
8532 // FIXME: Support implicit sign extension of vmv.v.x?
8533 if (ScalarBits < EltBits)
8537 if (DAG.ComputeNumSignBits(Op0) <= (ScalarBits - NarrowSize))
8540 APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
8541 if (!DAG.MaskedValueIsZero(Op0, Mask))
8545 Op0 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
8546 DAG.getUNDEF(NarrowVT), Op0, VL);
8547 return DAG.getNode(VOpc, DL, VT, Op1, Op0, Merge, Mask, VL);
8553 // Try to form VWMUL, VWMULU or VWMULSU.
8554 // TODO: Support VWMULSU.vx with a sign extend Op and a splat of scalar Op.
8555 static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
8557 assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
8558 SDValue Op0 = N->getOperand(0);
8559 SDValue Op1 = N->getOperand(1);
8561 std::swap(Op0, Op1);
8563 bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
8564 bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
8565 bool IsVWMULSU = IsSignExt && Op1.getOpcode() == RISCVISD::VZEXT_VL;
8566 if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
8569 SDValue Merge = N->getOperand(2);
8570 SDValue Mask = N->getOperand(3);
8571 SDValue VL = N->getOperand(4);
8573 // Make sure the mask and VL match.
8574 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)
8577 MVT VT = N->getSimpleValueType(0);
8579 // Determine the narrow size for a widening multiply.
8580 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
8581 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
8582 VT.getVectorElementCount());
8586 // See if the other operand is the same opcode.
8587 if (IsVWMULSU || Op0.getOpcode() == Op1.getOpcode()) {
8588 if (!Op1.hasOneUse())
8591 // Make sure the mask and VL match.
8592 if (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
8595 Op1 = Op1.getOperand(0);
8596 } else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) {
8597 // The operand is a splat of a scalar.
8599 // The pasthru must be undef for tail agnostic
8600 if (!Op1.getOperand(0).isUndef())
8602 // The VL must be the same.
8603 if (Op1.getOperand(2) != VL)
8606 // Get the scalar value.
8607 Op1 = Op1.getOperand(1);
8609 // See if have enough sign bits or zero bits in the scalar to use a
8610 // widening multiply by splatting to smaller element size.
8611 unsigned EltBits = VT.getScalarSizeInBits();
8612 unsigned ScalarBits = Op1.getValueSizeInBits();
8613 // Make sure we're getting all element bits from the scalar register.
8614 // FIXME: Support implicit sign extension of vmv.v.x?
8615 if (ScalarBits < EltBits)
8618 // If the LHS is a sign extend, try to use vwmul.
8619 if (IsSignExt && DAG.ComputeNumSignBits(Op1) > (ScalarBits - NarrowSize)) {
8622 // Otherwise try to use vwmulu or vwmulsu.
8623 APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
8624 if (DAG.MaskedValueIsZero(Op1, Mask))
8625 IsVWMULSU = IsSignExt;
8630 Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
8631 DAG.getUNDEF(NarrowVT), Op1, VL);
8635 Op0 = Op0.getOperand(0);
8637 // Re-introduce narrower extends if needed.
8638 unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
8639 if (Op0.getValueType() != NarrowVT)
8640 Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
8641 // vwmulsu requires second operand to be zero extended.
8642 ExtOpc = IsVWMULSU ? RISCVISD::VZEXT_VL : ExtOpc;
8643 if (Op1.getValueType() != NarrowVT)
8644 Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
8646 unsigned WMulOpc = RISCVISD::VWMULSU_VL;
8648 WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
8649 return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Merge, Mask, VL);
8652 static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) {
8653 switch (Op.getOpcode()) {
8654 case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
8655 case ISD::FTRUNC: return RISCVFPRndMode::RTZ;
8656 case ISD::FFLOOR: return RISCVFPRndMode::RDN;
8657 case ISD::FCEIL: return RISCVFPRndMode::RUP;
8658 case ISD::FROUND: return RISCVFPRndMode::RMM;
8661 return RISCVFPRndMode::Invalid;
8665 // (fp_to_int (froundeven X)) -> fcvt X, rne
8666 // (fp_to_int (ftrunc X)) -> fcvt X, rtz
8667 // (fp_to_int (ffloor X)) -> fcvt X, rdn
8668 // (fp_to_int (fceil X)) -> fcvt X, rup
8669 // (fp_to_int (fround X)) -> fcvt X, rmm
8670 static SDValue performFP_TO_INTCombine(SDNode *N,
8671 TargetLowering::DAGCombinerInfo &DCI,
8672 const RISCVSubtarget &Subtarget) {
8673 SelectionDAG &DAG = DCI.DAG;
8674 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8675 MVT XLenVT = Subtarget.getXLenVT();
8677 // Only handle XLen or i32 types. Other types narrower than XLen will
8678 // eventually be legalized to XLenVT.
8679 EVT VT = N->getValueType(0);
8680 if (VT != MVT::i32 && VT != XLenVT)
8683 SDValue Src = N->getOperand(0);
8685 // Ensure the FP type is also legal.
8686 if (!TLI.isTypeLegal(Src.getValueType()))
8689 // Don't do this for f16 with Zfhmin and not Zfh.
8690 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
8693 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
8694 if (FRM == RISCVFPRndMode::Invalid)
8697 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
8701 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
8703 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
8706 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
8707 DAG.getTargetConstant(FRM, DL, XLenVT));
8708 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
8712 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
8713 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
8714 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
8715 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
8716 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
8717 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
8718 TargetLowering::DAGCombinerInfo &DCI,
8719 const RISCVSubtarget &Subtarget) {
8720 SelectionDAG &DAG = DCI.DAG;
8721 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8722 MVT XLenVT = Subtarget.getXLenVT();
8724 // Only handle XLen types. Other types narrower than XLen will eventually be
8725 // legalized to XLenVT.
8726 EVT DstVT = N->getValueType(0);
8727 if (DstVT != XLenVT)
8730 SDValue Src = N->getOperand(0);
8732 // Ensure the FP type is also legal.
8733 if (!TLI.isTypeLegal(Src.getValueType()))
8736 // Don't do this for f16 with Zfhmin and not Zfh.
8737 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
8740 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
8742 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
8743 if (FRM == RISCVFPRndMode::Invalid)
8746 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
8750 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
8751 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
8752 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
8755 // FIXME: Support other SatVTs by clamping before or after the conversion.
8757 Src = Src.getOperand(0);
8760 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
8761 DAG.getTargetConstant(FRM, DL, XLenVT));
8763 // RISCV FP-to-int conversions saturate to the destination register size, but
8764 // don't produce 0 for nan.
8765 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
8766 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
8769 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
8770 // smaller than XLenVT.
8771 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
8772 const RISCVSubtarget &Subtarget) {
8773 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
8775 SDValue Src = N->getOperand(0);
8776 if (Src.getOpcode() != ISD::BSWAP)
8779 EVT VT = N->getValueType(0);
8780 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
8781 !isPowerOf2_32(VT.getSizeInBits()))
8785 return DAG.getNode(RISCVISD::GREV, DL, VT, Src.getOperand(0),
8786 DAG.getConstant(7, DL, VT));
8789 // Convert from one FMA opcode to another based on whether we are negating the
8790 // multiply result and/or the accumulator.
8791 // NOTE: Only supports RVV operations with VL.
8792 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
8793 assert((NegMul || NegAcc) && "Not negating anything?");
8795 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
8799 default: llvm_unreachable("Unexpected opcode");
8800 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
8801 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
8802 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
8803 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
8808 // Negating the accumulator changes ADD<->SUB.
8812 default: llvm_unreachable("Unexpected opcode");
8813 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
8814 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
8815 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
8816 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
8824 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
8825 const RISCVSubtarget &Subtarget) {
8826 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
8828 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
8831 if (!isa<ConstantSDNode>(N->getOperand(1)))
8833 uint64_t ShAmt = N->getConstantOperandVal(1);
8837 SDValue N0 = N->getOperand(0);
8839 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
8840 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
8841 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
8843 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
8844 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
8845 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
8846 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
8847 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
8849 SDLoc ShlDL(N0.getOperand(0));
8850 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
8851 N0.getOperand(0).getOperand(0),
8852 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
8854 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
8855 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
8859 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
8860 // FIXME: Should this be a generic combine? There's a similar combine on X86.
8862 // Also try these folds where an add or sub is in the middle.
8863 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
8864 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
8866 ConstantSDNode *AddC = nullptr;
8868 // We might have an ADD or SUB between the SRA and SHL.
8869 bool IsAdd = N0.getOpcode() == ISD::ADD;
8870 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
8871 if (!N0.hasOneUse())
8873 // Other operand needs to be a constant we can modify.
8874 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
8878 // AddC needs to have at least 32 trailing zeros.
8879 if (AddC->getAPIntValue().countTrailingZeros() < 32)
8882 Shl = N0.getOperand(IsAdd ? 0 : 1);
8884 // Not an ADD or SUB.
8888 // Look for a shift left by 32.
8889 if (Shl.getOpcode() != ISD::SHL || !Shl.hasOneUse() ||
8890 !isa<ConstantSDNode>(Shl.getOperand(1)) ||
8891 Shl.getConstantOperandVal(1) != 32)
8895 SDValue In = Shl.getOperand(0);
8897 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
8900 SDValue ShiftedAddC =
8901 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
8903 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
8905 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
8908 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
8909 DAG.getValueType(MVT::i32));
8914 ISD::SHL, DL, MVT::i64, SExt,
8915 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
8918 // Perform common combines for BR_CC and SELECT_CC condtions.
8919 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
8920 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
8921 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
8922 if (!ISD::isIntEqualitySetCC(CCVal))
8925 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
8926 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
8927 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
8928 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
8929 // If we're looking for eq 0 instead of ne 0, we need to invert the
8931 bool Invert = CCVal == ISD::SETEQ;
8932 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
8934 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
8936 RHS = LHS.getOperand(1);
8937 LHS = LHS.getOperand(0);
8938 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8940 CC = DAG.getCondCode(CCVal);
8944 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
8945 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
8946 RHS = LHS.getOperand(1);
8947 LHS = LHS.getOperand(0);
8951 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
8952 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
8953 LHS.getOperand(1).getOpcode() == ISD::Constant) {
8954 SDValue LHS0 = LHS.getOperand(0);
8955 if (LHS0.getOpcode() == ISD::AND &&
8956 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
8957 uint64_t Mask = LHS0.getConstantOperandVal(1);
8958 uint64_t ShAmt = LHS.getConstantOperandVal(1);
8959 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
8960 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
8961 CC = DAG.getCondCode(CCVal);
8963 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
8964 LHS = LHS0.getOperand(0);
8967 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
8968 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
8974 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
8975 // This can occur when legalizing some floating point comparisons.
8976 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
8977 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
8978 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
8979 CC = DAG.getCondCode(CCVal);
8980 RHS = DAG.getConstant(0, DL, LHS.getValueType());
8987 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
8988 DAGCombinerInfo &DCI) const {
8989 SelectionDAG &DAG = DCI.DAG;
8991 // Helper to call SimplifyDemandedBits on an operand of N where only some low
8992 // bits are demanded. N will be added to the Worklist if it was not deleted.
8993 // Caller should return SDValue(N, 0) if this returns true.
8994 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
8995 SDValue Op = N->getOperand(OpNo);
8996 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
8997 if (!SimplifyDemandedBits(Op, Mask, DCI))
9000 if (N->getOpcode() != ISD::DELETED_NODE)
9001 DCI.AddToWorklist(N);
9005 switch (N->getOpcode()) {
9008 case RISCVISD::SplitF64: {
9009 SDValue Op0 = N->getOperand(0);
9010 // If the input to SplitF64 is just BuildPairF64 then the operation is
9011 // redundant. Instead, use BuildPairF64's operands directly.
9012 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
9013 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
9015 if (Op0->isUndef()) {
9016 SDValue Lo = DAG.getUNDEF(MVT::i32);
9017 SDValue Hi = DAG.getUNDEF(MVT::i32);
9018 return DCI.CombineTo(N, Lo, Hi);
9023 // It's cheaper to materialise two 32-bit integers than to load a double
9024 // from the constant pool and transfer it to integer registers through the
9026 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
9027 APInt V = C->getValueAPF().bitcastToAPInt();
9028 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
9029 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
9030 return DCI.CombineTo(N, Lo, Hi);
9033 // This is a target-specific version of a DAGCombine performed in
9034 // DAGCombiner::visitBITCAST. It performs the equivalent of:
9035 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
9036 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
9037 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
9038 !Op0.getNode()->hasOneUse())
9040 SDValue NewSplitF64 =
9041 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
9043 SDValue Lo = NewSplitF64.getValue(0);
9044 SDValue Hi = NewSplitF64.getValue(1);
9045 APInt SignBit = APInt::getSignMask(32);
9046 if (Op0.getOpcode() == ISD::FNEG) {
9047 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
9048 DAG.getConstant(SignBit, DL, MVT::i32));
9049 return DCI.CombineTo(N, Lo, NewHi);
9051 assert(Op0.getOpcode() == ISD::FABS);
9052 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
9053 DAG.getConstant(~SignBit, DL, MVT::i32));
9054 return DCI.CombineTo(N, Lo, NewHi);
9056 case RISCVISD::SLLW:
9057 case RISCVISD::SRAW:
9058 case RISCVISD::SRLW: {
9059 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
9060 if (SimplifyDemandedLowBitsHelper(0, 32) ||
9061 SimplifyDemandedLowBitsHelper(1, 5))
9062 return SDValue(N, 0);
9068 case RISCVISD::RORW:
9069 case RISCVISD::ROLW: {
9070 if (N->getOpcode() == RISCVISD::RORW || N->getOpcode() == RISCVISD::ROLW) {
9071 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
9072 if (SimplifyDemandedLowBitsHelper(0, 32) ||
9073 SimplifyDemandedLowBitsHelper(1, 5))
9074 return SDValue(N, 0);
9077 return combineROTR_ROTL_RORW_ROLW(N, DAG, Subtarget);
9079 case RISCVISD::CLZW:
9080 case RISCVISD::CTZW: {
9081 // Only the lower 32 bits of the first operand are read
9082 if (SimplifyDemandedLowBitsHelper(0, 32))
9083 return SDValue(N, 0);
9086 case RISCVISD::GREV:
9087 case RISCVISD::GORC: {
9088 // Only the lower log2(Bitwidth) bits of the the shift amount are read.
9089 unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
9090 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
9091 if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth)))
9092 return SDValue(N, 0);
9094 return combineGREVI_GORCI(N, DAG);
9096 case RISCVISD::GREVW:
9097 case RISCVISD::GORCW: {
9098 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
9099 if (SimplifyDemandedLowBitsHelper(0, 32) ||
9100 SimplifyDemandedLowBitsHelper(1, 5))
9101 return SDValue(N, 0);
9105 case RISCVISD::SHFL:
9106 case RISCVISD::UNSHFL: {
9107 // Only the lower log2(Bitwidth)-1 bits of the the shift amount are read.
9108 unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
9109 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
9110 if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) - 1))
9111 return SDValue(N, 0);
9115 case RISCVISD::SHFLW:
9116 case RISCVISD::UNSHFLW: {
9117 // Only the lower 32 bits of LHS and lower 4 bits of RHS are read.
9118 if (SimplifyDemandedLowBitsHelper(0, 32) ||
9119 SimplifyDemandedLowBitsHelper(1, 4))
9120 return SDValue(N, 0);
9124 case RISCVISD::BCOMPRESSW:
9125 case RISCVISD::BDECOMPRESSW: {
9126 // Only the lower 32 bits of LHS and RHS are read.
9127 if (SimplifyDemandedLowBitsHelper(0, 32) ||
9128 SimplifyDemandedLowBitsHelper(1, 32))
9129 return SDValue(N, 0);
9135 case RISCVISD::FSRW:
9136 case RISCVISD::FSLW: {
9137 bool IsWInstruction =
9138 N->getOpcode() == RISCVISD::FSRW || N->getOpcode() == RISCVISD::FSLW;
9140 IsWInstruction ? 32 : N->getSimpleValueType(0).getSizeInBits();
9141 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
9142 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
9143 if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) + 1))
9144 return SDValue(N, 0);
9148 case RISCVISD::FMV_X_ANYEXTH:
9149 case RISCVISD::FMV_X_ANYEXTW_RV64: {
9151 SDValue Op0 = N->getOperand(0);
9152 MVT VT = N->getSimpleValueType(0);
9153 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
9154 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
9155 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
9156 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
9157 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
9158 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
9159 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
9160 assert(Op0.getOperand(0).getValueType() == VT &&
9161 "Unexpected value type!");
9162 return Op0.getOperand(0);
9165 // This is a target-specific version of a DAGCombine performed in
9166 // DAGCombiner::visitBITCAST. It performs the equivalent of:
9167 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
9168 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
9169 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
9170 !Op0.getNode()->hasOneUse())
9172 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
9173 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
9174 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
9175 if (Op0.getOpcode() == ISD::FNEG)
9176 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
9177 DAG.getConstant(SignBit, DL, VT));
9179 assert(Op0.getOpcode() == ISD::FABS);
9180 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
9181 DAG.getConstant(~SignBit, DL, VT));
9184 return performADDCombine(N, DAG, Subtarget);
9186 return performSUBCombine(N, DAG);
9188 return performANDCombine(N, DAG, Subtarget);
9190 return performORCombine(N, DAG, Subtarget);
9192 return performXORCombine(N, DAG);
9200 return combineBinOpToReduce(N, DAG);
9202 return performSETCCCombine(N, DAG, Subtarget);
9203 case ISD::SIGN_EXTEND_INREG:
9204 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
9205 case ISD::ZERO_EXTEND:
9206 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
9207 // type legalization. This is safe because fp_to_uint produces poison if
9209 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
9210 SDValue Src = N->getOperand(0);
9211 if (Src.getOpcode() == ISD::FP_TO_UINT &&
9212 isTypeLegal(Src.getOperand(0).getValueType()))
9213 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
9215 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
9216 isTypeLegal(Src.getOperand(1).getValueType())) {
9217 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
9218 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
9219 Src.getOperand(0), Src.getOperand(1));
9220 DCI.CombineTo(N, Res);
9221 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
9222 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
9223 return SDValue(N, 0); // Return N so it doesn't get rechecked.
9227 case RISCVISD::SELECT_CC: {
9229 SDValue LHS = N->getOperand(0);
9230 SDValue RHS = N->getOperand(1);
9231 SDValue CC = N->getOperand(2);
9232 SDValue TrueV = N->getOperand(3);
9233 SDValue FalseV = N->getOperand(4);
9236 // If the True and False values are the same, we don't need a select_cc.
9237 if (TrueV == FalseV)
9240 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
9241 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
9242 {LHS, RHS, CC, TrueV, FalseV});
9246 case RISCVISD::BR_CC: {
9247 SDValue LHS = N->getOperand(1);
9248 SDValue RHS = N->getOperand(2);
9249 SDValue CC = N->getOperand(3);
9252 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
9253 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
9254 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
9258 case ISD::BITREVERSE:
9259 return performBITREVERSECombine(N, DAG, Subtarget);
9260 case ISD::FP_TO_SINT:
9261 case ISD::FP_TO_UINT:
9262 return performFP_TO_INTCombine(N, DCI, Subtarget);
9263 case ISD::FP_TO_SINT_SAT:
9264 case ISD::FP_TO_UINT_SAT:
9265 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
9266 case ISD::FCOPYSIGN: {
9267 EVT VT = N->getValueType(0);
9270 // There is a form of VFSGNJ which injects the negated sign of its second
9271 // operand. Try and bubble any FNEG up after the extend/round to produce
9272 // this optimized pattern. Avoid modifying cases where FP_ROUND and
9274 SDValue In2 = N->getOperand(1);
9275 // Avoid cases where the extend/round has multiple uses, as duplicating
9276 // those is typically more expensive than removing a fneg.
9277 if (!In2.hasOneUse())
9279 if (In2.getOpcode() != ISD::FP_EXTEND &&
9280 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
9282 In2 = In2.getOperand(0);
9283 if (In2.getOpcode() != ISD::FNEG)
9286 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
9287 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
9288 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
9292 case ISD::VP_GATHER:
9293 case ISD::VP_SCATTER: {
9294 if (!DCI.isBeforeLegalize())
9296 SDValue Index, ScaleOp;
9297 bool IsIndexScaled = false;
9298 bool IsIndexSigned = false;
9299 if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
9300 Index = VPGSN->getIndex();
9301 ScaleOp = VPGSN->getScale();
9302 IsIndexScaled = VPGSN->isIndexScaled();
9303 IsIndexSigned = VPGSN->isIndexSigned();
9305 const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
9306 Index = MGSN->getIndex();
9307 ScaleOp = MGSN->getScale();
9308 IsIndexScaled = MGSN->isIndexScaled();
9309 IsIndexSigned = MGSN->isIndexSigned();
9311 EVT IndexVT = Index.getValueType();
9312 MVT XLenVT = Subtarget.getXLenVT();
9313 // RISCV indexed loads only support the "unsigned unscaled" addressing
9314 // mode, so anything else must be manually legalized.
9315 bool NeedsIdxLegalization =
9317 (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
9318 if (!NeedsIdxLegalization)
9323 // Any index legalization should first promote to XLenVT, so we don't lose
9324 // bits when scaling. This may create an illegal index type so we let
9325 // LLVM's legalization take care of the splitting.
9326 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
9327 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
9328 IndexVT = IndexVT.changeVectorElementType(XLenVT);
9329 Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
9330 DL, IndexVT, Index);
9333 if (IsIndexScaled) {
9334 // Manually scale the indices.
9335 // TODO: Sanitize the scale operand here?
9336 // TODO: For VP nodes, should we use VP_SHL here?
9337 unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
9338 assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
9339 SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
9340 Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
9341 ScaleOp = DAG.getTargetConstant(1, DL, ScaleOp.getValueType());
9344 ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED;
9345 if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
9346 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
9347 {VPGN->getChain(), VPGN->getBasePtr(), Index,
9348 ScaleOp, VPGN->getMask(),
9349 VPGN->getVectorLength()},
9350 VPGN->getMemOperand(), NewIndexTy);
9351 if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
9352 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
9353 {VPSN->getChain(), VPSN->getValue(),
9354 VPSN->getBasePtr(), Index, ScaleOp,
9355 VPSN->getMask(), VPSN->getVectorLength()},
9356 VPSN->getMemOperand(), NewIndexTy);
9357 if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
9358 return DAG.getMaskedGather(
9359 N->getVTList(), MGN->getMemoryVT(), DL,
9360 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
9361 MGN->getBasePtr(), Index, ScaleOp},
9362 MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
9363 const auto *MSN = cast<MaskedScatterSDNode>(N);
9364 return DAG.getMaskedScatter(
9365 N->getVTList(), MSN->getMemoryVT(), DL,
9366 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
9368 MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
9370 case RISCVISD::SRA_VL:
9371 case RISCVISD::SRL_VL:
9372 case RISCVISD::SHL_VL: {
9373 SDValue ShAmt = N->getOperand(1);
9374 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
9375 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
9377 SDValue VL = N->getOperand(3);
9378 EVT VT = N->getValueType(0);
9379 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9380 ShAmt.getOperand(1), VL);
9381 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
9382 N->getOperand(2), N->getOperand(3), N->getOperand(4));
9387 if (SDValue V = performSRACombine(N, DAG, Subtarget))
9392 SDValue ShAmt = N->getOperand(1);
9393 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
9394 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
9396 EVT VT = N->getValueType(0);
9397 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9398 ShAmt.getOperand(1),
9399 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
9400 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
9404 case RISCVISD::ADD_VL:
9405 if (SDValue V = combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ false))
9407 return combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ true);
9408 case RISCVISD::SUB_VL:
9409 return combineADDSUB_VLToVWADDSUB_VL(N, DAG);
9410 case RISCVISD::VWADD_W_VL:
9411 case RISCVISD::VWADDU_W_VL:
9412 case RISCVISD::VWSUB_W_VL:
9413 case RISCVISD::VWSUBU_W_VL:
9414 return combineVWADD_W_VL_VWSUB_W_VL(N, DAG);
9415 case RISCVISD::MUL_VL:
9416 if (SDValue V = combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ false))
9418 // Mul is commutative.
9419 return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true);
9420 case RISCVISD::VFMADD_VL:
9421 case RISCVISD::VFNMADD_VL:
9422 case RISCVISD::VFMSUB_VL:
9423 case RISCVISD::VFNMSUB_VL: {
9424 // Fold FNEG_VL into FMA opcodes.
9425 SDValue A = N->getOperand(0);
9426 SDValue B = N->getOperand(1);
9427 SDValue C = N->getOperand(2);
9428 SDValue Mask = N->getOperand(3);
9429 SDValue VL = N->getOperand(4);
9431 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
9432 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
9433 V.getOperand(2) == VL) {
9434 // Return the negated input.
9435 V = V.getOperand(0);
9442 bool NegA = invertIfNegative(A);
9443 bool NegB = invertIfNegative(B);
9444 bool NegC = invertIfNegative(C);
9446 // If no operands are negated, we're done.
9447 if (!NegA && !NegB && !NegC)
9450 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
9451 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
9455 auto *Store = cast<StoreSDNode>(N);
9456 SDValue Val = Store->getValue();
9457 // Combine store of vmv.x.s to vse with VL of 1.
9458 // FIXME: Support FP.
9459 if (Val.getOpcode() == RISCVISD::VMV_X_S) {
9460 SDValue Src = Val.getOperand(0);
9461 MVT VecVT = Src.getSimpleValueType();
9462 EVT MemVT = Store->getMemoryVT();
9463 // The memory VT and the element type must match.
9464 if (MemVT == VecVT.getVectorElementType()) {
9466 MVT MaskVT = getMaskTypeFor(VecVT);
9467 return DAG.getStoreVP(
9468 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
9469 DAG.getConstant(1, DL, MaskVT),
9470 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
9471 Store->getMemOperand(), Store->getAddressingMode(),
9472 Store->isTruncatingStore(), /*IsCompress*/ false);
9478 case ISD::SPLAT_VECTOR: {
9479 EVT VT = N->getValueType(0);
9480 // Only perform this combine on legal MVT types.
9481 if (!isTypeLegal(VT))
9483 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
9488 case RISCVISD::VMV_V_X_VL: {
9489 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
9491 unsigned ScalarSize = N->getOperand(1).getValueSizeInBits();
9492 unsigned EltWidth = N->getValueType(0).getScalarSizeInBits();
9493 if (ScalarSize > EltWidth && N->getOperand(0).isUndef())
9494 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
9495 return SDValue(N, 0);
9499 case ISD::INTRINSIC_WO_CHAIN: {
9500 unsigned IntNo = N->getConstantOperandVal(0);
9502 // By default we do not combine any intrinsic.
9505 case Intrinsic::riscv_vcpop:
9506 case Intrinsic::riscv_vcpop_mask:
9507 case Intrinsic::riscv_vfirst:
9508 case Intrinsic::riscv_vfirst_mask: {
9509 SDValue VL = N->getOperand(2);
9510 if (IntNo == Intrinsic::riscv_vcpop_mask ||
9511 IntNo == Intrinsic::riscv_vfirst_mask)
9512 VL = N->getOperand(3);
9513 if (!isNullConstant(VL))
9515 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
9517 EVT VT = N->getValueType(0);
9518 if (IntNo == Intrinsic::riscv_vfirst ||
9519 IntNo == Intrinsic::riscv_vfirst_mask)
9520 return DAG.getConstant(-1, DL, VT);
9521 return DAG.getConstant(0, DL, VT);
9525 case ISD::BITCAST: {
9526 assert(Subtarget.useRVVForFixedLengthVectors());
9527 SDValue N0 = N->getOperand(0);
9528 EVT VT = N->getValueType(0);
9529 EVT SrcVT = N0.getValueType();
9530 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
9531 // type, widen both sides to avoid a trip through memory.
9532 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
9533 VT.isScalarInteger()) {
9534 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
9535 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
9538 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
9539 N0 = DAG.getBitcast(MVT::i8, N0);
9540 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
9550 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
9551 const SDNode *N, CombineLevel Level) const {
9552 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
9553 N->getOpcode() == ISD::SRL) &&
9554 "Expected shift op");
9556 // The following folds are only desirable if `(OP _, c1 << c2)` can be
9557 // materialised in fewer instructions than `(OP _, c1)`:
9559 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
9560 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
9561 SDValue N0 = N->getOperand(0);
9562 EVT Ty = N0.getValueType();
9563 if (Ty.isScalarInteger() &&
9564 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
9565 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
9566 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
9568 const APInt &C1Int = C1->getAPIntValue();
9569 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
9571 // We can materialise `c1 << c2` into an add immediate, so it's "free",
9572 // and the combine should happen, to potentially allow further combines
9574 if (ShiftedC1Int.getMinSignedBits() <= 64 &&
9575 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
9578 // We can materialise `c1` in an add immediate, so it's "free", and the
9579 // combine should be prevented.
9580 if (C1Int.getMinSignedBits() <= 64 &&
9581 isLegalAddImmediate(C1Int.getSExtValue()))
9584 // Neither constant will fit into an immediate, so find materialisation
9586 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
9587 Subtarget.getFeatureBits(),
9588 /*CompressionCost*/true);
9589 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
9590 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
9591 /*CompressionCost*/true);
9593 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
9594 // combine should be prevented.
9595 if (C1Cost < ShiftedC1Cost)
9602 bool RISCVTargetLowering::targetShrinkDemandedConstant(
9603 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
9604 TargetLoweringOpt &TLO) const {
9605 // Delay this optimization as late as possible.
9609 EVT VT = Op.getValueType();
9613 // Only handle AND for now.
9614 unsigned Opcode = Op.getOpcode();
9615 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
9618 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
9622 const APInt &Mask = C->getAPIntValue();
9624 // Clear all non-demanded bits initially.
9625 APInt ShrunkMask = Mask & DemandedBits;
9627 // Try to make a smaller immediate by setting undemanded bits.
9629 APInt ExpandedMask = Mask | ~DemandedBits;
9631 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
9632 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
9634 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
9635 if (NewMask == Mask)
9638 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
9639 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
9640 Op.getOperand(0), NewC);
9641 return TLO.CombineTo(Op, NewOp);
9644 // If the shrunk mask fits in sign extended 12 bits, let the target
9645 // independent code apply it.
9646 if (ShrunkMask.isSignedIntN(12))
9649 // And has a few special cases for zext.
9650 if (Opcode == ISD::AND) {
9651 // Preserve (and X, 0xffff) when zext.h is supported.
9652 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
9653 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
9654 if (IsLegalMask(NewMask))
9655 return UseMask(NewMask);
9658 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
9659 if (VT == MVT::i64) {
9660 APInt NewMask = APInt(64, 0xffffffff);
9661 if (IsLegalMask(NewMask))
9662 return UseMask(NewMask);
9666 // For the remaining optimizations, we need to be able to make a negative
9667 // number through a combination of mask and undemanded bits.
9668 if (!ExpandedMask.isNegative())
9671 // What is the fewest number of bits we need to represent the negative number.
9672 unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
9674 // Try to make a 12 bit negative immediate. If that fails try to make a 32
9675 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
9676 // If we can't create a simm12, we shouldn't change opaque constants.
9677 APInt NewMask = ShrunkMask;
9678 if (MinSignedBits <= 12)
9679 NewMask.setBitsFrom(11);
9680 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
9681 NewMask.setBitsFrom(31);
9685 // Check that our new mask is a subset of the demanded mask.
9686 assert(IsLegalMask(NewMask));
9687 return UseMask(NewMask);
9690 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
9691 static const uint64_t GREVMasks[] = {
9692 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
9693 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
9695 for (unsigned Stage = 0; Stage != 6; ++Stage) {
9696 unsigned Shift = 1 << Stage;
9697 if (ShAmt & Shift) {
9698 uint64_t Mask = GREVMasks[Stage];
9699 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
9709 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
9711 const APInt &DemandedElts,
9712 const SelectionDAG &DAG,
9713 unsigned Depth) const {
9714 unsigned BitWidth = Known.getBitWidth();
9715 unsigned Opc = Op.getOpcode();
9716 assert((Opc >= ISD::BUILTIN_OP_END ||
9717 Opc == ISD::INTRINSIC_WO_CHAIN ||
9718 Opc == ISD::INTRINSIC_W_CHAIN ||
9719 Opc == ISD::INTRINSIC_VOID) &&
9720 "Should use MaskedValueIsZero if you don't know whether Op"
9721 " is a target node!");
9726 case RISCVISD::SELECT_CC: {
9727 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
9728 // If we don't know any bits, early out.
9729 if (Known.isUnknown())
9731 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
9733 // Only known if known in both the LHS and RHS.
9734 Known = KnownBits::commonBits(Known, Known2);
9737 case RISCVISD::REMUW: {
9739 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
9740 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
9741 // We only care about the lower 32 bits.
9742 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
9743 // Restore the original width by sign extending.
9744 Known = Known.sext(BitWidth);
9747 case RISCVISD::DIVUW: {
9749 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
9750 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
9751 // We only care about the lower 32 bits.
9752 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
9753 // Restore the original width by sign extending.
9754 Known = Known.sext(BitWidth);
9757 case RISCVISD::CTZW: {
9758 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
9759 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
9760 unsigned LowBits = Log2_32(PossibleTZ) + 1;
9761 Known.Zero.setBitsFrom(LowBits);
9764 case RISCVISD::CLZW: {
9765 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
9766 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
9767 unsigned LowBits = Log2_32(PossibleLZ) + 1;
9768 Known.Zero.setBitsFrom(LowBits);
9771 case RISCVISD::GREV:
9772 case RISCVISD::GORC: {
9773 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9774 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
9775 unsigned ShAmt = C->getZExtValue() & (Known.getBitWidth() - 1);
9776 bool IsGORC = Op.getOpcode() == RISCVISD::GORC;
9777 // To compute zeros, we need to invert the value and invert it back after.
9779 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), ShAmt, IsGORC);
9780 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), ShAmt, IsGORC);
9784 case RISCVISD::READ_VLENB: {
9785 // We can use the minimum and maximum VLEN values to bound VLENB. We
9786 // know VLEN must be a power of two.
9787 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
9788 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
9789 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
9790 Known.Zero.setLowBits(Log2_32(MinVLenB));
9791 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
9792 if (MaxVLenB == MinVLenB)
9793 Known.One.setBit(Log2_32(MinVLenB));
9796 case ISD::INTRINSIC_W_CHAIN:
9797 case ISD::INTRINSIC_WO_CHAIN: {
9799 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
9802 // We can't do anything for most intrinsics.
9804 case Intrinsic::riscv_vsetvli:
9805 case Intrinsic::riscv_vsetvlimax:
9806 case Intrinsic::riscv_vsetvli_opt:
9807 case Intrinsic::riscv_vsetvlimax_opt:
9808 // Assume that VL output is positive and would fit in an int32_t.
9809 // TODO: VLEN might be capped at 16 bits in a future V spec update.
9811 Known.Zero.setBitsFrom(31);
9819 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
9820 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9821 unsigned Depth) const {
9822 switch (Op.getOpcode()) {
9825 case RISCVISD::SELECT_CC: {
9827 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
9828 if (Tmp == 1) return 1; // Early out.
9830 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
9831 return std::min(Tmp, Tmp2);
9833 case RISCVISD::SLLW:
9834 case RISCVISD::SRAW:
9835 case RISCVISD::SRLW:
9836 case RISCVISD::DIVW:
9837 case RISCVISD::DIVUW:
9838 case RISCVISD::REMUW:
9839 case RISCVISD::ROLW:
9840 case RISCVISD::RORW:
9841 case RISCVISD::GREVW:
9842 case RISCVISD::GORCW:
9843 case RISCVISD::FSLW:
9844 case RISCVISD::FSRW:
9845 case RISCVISD::SHFLW:
9846 case RISCVISD::UNSHFLW:
9847 case RISCVISD::BCOMPRESSW:
9848 case RISCVISD::BDECOMPRESSW:
9849 case RISCVISD::BFPW:
9850 case RISCVISD::FCVT_W_RV64:
9851 case RISCVISD::FCVT_WU_RV64:
9852 case RISCVISD::STRICT_FCVT_W_RV64:
9853 case RISCVISD::STRICT_FCVT_WU_RV64:
9854 // TODO: As the result is sign-extended, this is conservatively correct. A
9855 // more precise answer could be calculated for SRAW depending on known
9856 // bits in the shift amount.
9858 case RISCVISD::SHFL:
9859 case RISCVISD::UNSHFL: {
9860 // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
9861 // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
9862 // will stay within the upper 32 bits. If there were more than 32 sign bits
9863 // before there will be at least 33 sign bits after.
9864 if (Op.getValueType() == MVT::i64 &&
9865 isa<ConstantSDNode>(Op.getOperand(1)) &&
9866 (Op.getConstantOperandVal(1) & 0x10) == 0) {
9867 unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
9873 case RISCVISD::VMV_X_S: {
9874 // The number of sign bits of the scalar result is computed by obtaining the
9875 // element type of the input vector operand, subtracting its width from the
9876 // XLEN, and then adding one (sign bit within the element type). If the
9877 // element type is wider than XLen, the least-significant XLEN bits are
9879 unsigned XLen = Subtarget.getXLen();
9880 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
9881 if (EltBits <= XLen)
9882 return XLen - EltBits + 1;
9885 case ISD::INTRINSIC_W_CHAIN: {
9886 unsigned IntNo = Op.getConstantOperandVal(1);
9890 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
9891 case Intrinsic::riscv_masked_atomicrmw_add_i64:
9892 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
9893 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
9894 case Intrinsic::riscv_masked_atomicrmw_max_i64:
9895 case Intrinsic::riscv_masked_atomicrmw_min_i64:
9896 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
9897 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
9898 case Intrinsic::riscv_masked_cmpxchg_i64:
9899 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
9900 // narrow atomic operation. These are implemented using atomic
9901 // operations at the minimum supported atomicrmw/cmpxchg width whose
9902 // result is then sign extended to XLEN. With +A, the minimum width is
9903 // 32 for both 64 and 32.
9904 assert(Subtarget.getXLen() == 64);
9905 assert(getMinCmpXchgSizeInBits() == 32);
9906 assert(Subtarget.hasStdExtA());
9916 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {
9917 assert(Ld && "Unexpected null LoadSDNode");
9918 if (!ISD::isNormalLoad(Ld))
9921 SDValue Ptr = Ld->getBasePtr();
9923 // Only constant pools with no offset are supported.
9924 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
9925 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
9926 if (!CNode || CNode->isMachineConstantPoolEntry() ||
9927 CNode->getOffset() != 0)
9933 // Simple case, LLA.
9934 if (Ptr.getOpcode() == RISCVISD::LLA) {
9935 auto *CNode = GetSupportedConstantPool(Ptr);
9936 if (!CNode || CNode->getTargetFlags() != 0)
9939 return CNode->getConstVal();
9942 // Look for a HI and ADD_LO pair.
9943 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
9944 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
9947 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
9948 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
9950 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
9951 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
9954 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
9957 return CNodeLo->getConstVal();
9960 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
9961 MachineBasicBlock *BB) {
9962 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
9964 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
9965 // Should the count have wrapped while it was being read, we need to try
9969 // rdcycleh x3 # load high word of cycle
9970 // rdcycle x2 # load low word of cycle
9971 // rdcycleh x4 # load high word of cycle
9972 // bne x3, x4, read # check if high word reads match, otherwise try again
9975 MachineFunction &MF = *BB->getParent();
9976 const BasicBlock *LLVM_BB = BB->getBasicBlock();
9977 MachineFunction::iterator It = ++BB->getIterator();
9979 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
9980 MF.insert(It, LoopMBB);
9982 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
9983 MF.insert(It, DoneMBB);
9985 // Transfer the remainder of BB and its successor edges to DoneMBB.
9986 DoneMBB->splice(DoneMBB->begin(), BB,
9987 std::next(MachineBasicBlock::iterator(MI)), BB->end());
9988 DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
9990 BB->addSuccessor(LoopMBB);
9992 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9993 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
9994 Register LoReg = MI.getOperand(0).getReg();
9995 Register HiReg = MI.getOperand(1).getReg();
9996 DebugLoc DL = MI.getDebugLoc();
9998 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
9999 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
10000 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
10001 .addReg(RISCV::X0);
10002 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
10003 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
10004 .addReg(RISCV::X0);
10005 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
10006 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
10007 .addReg(RISCV::X0);
10009 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
10011 .addReg(ReadAgainReg)
10014 LoopMBB->addSuccessor(LoopMBB);
10015 LoopMBB->addSuccessor(DoneMBB);
10017 MI.eraseFromParent();
10022 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
10023 MachineBasicBlock *BB) {
10024 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
10026 MachineFunction &MF = *BB->getParent();
10027 DebugLoc DL = MI.getDebugLoc();
10028 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
10029 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
10030 Register LoReg = MI.getOperand(0).getReg();
10031 Register HiReg = MI.getOperand(1).getReg();
10032 Register SrcReg = MI.getOperand(2).getReg();
10033 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
10034 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
10036 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
10038 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
10039 MachineMemOperand *MMOLo =
10040 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
10041 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
10042 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
10043 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
10046 .addMemOperand(MMOLo);
10047 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
10050 .addMemOperand(MMOHi);
10051 MI.eraseFromParent(); // The pseudo instruction is gone now.
10055 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
10056 MachineBasicBlock *BB) {
10057 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
10058 "Unexpected instruction");
10060 MachineFunction &MF = *BB->getParent();
10061 DebugLoc DL = MI.getDebugLoc();
10062 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
10063 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
10064 Register DstReg = MI.getOperand(0).getReg();
10065 Register LoReg = MI.getOperand(1).getReg();
10066 Register HiReg = MI.getOperand(2).getReg();
10067 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
10068 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
10070 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
10071 MachineMemOperand *MMOLo =
10072 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
10073 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
10074 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
10075 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
10076 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
10079 .addMemOperand(MMOLo);
10080 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
10081 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
10084 .addMemOperand(MMOHi);
10085 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
10086 MI.eraseFromParent(); // The pseudo instruction is gone now.
10090 static bool isSelectPseudo(MachineInstr &MI) {
10091 switch (MI.getOpcode()) {
10094 case RISCV::Select_GPR_Using_CC_GPR:
10095 case RISCV::Select_FPR16_Using_CC_GPR:
10096 case RISCV::Select_FPR32_Using_CC_GPR:
10097 case RISCV::Select_FPR64_Using_CC_GPR:
10102 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
10103 unsigned RelOpcode, unsigned EqOpcode,
10104 const RISCVSubtarget &Subtarget) {
10105 DebugLoc DL = MI.getDebugLoc();
10106 Register DstReg = MI.getOperand(0).getReg();
10107 Register Src1Reg = MI.getOperand(1).getReg();
10108 Register Src2Reg = MI.getOperand(2).getReg();
10109 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
10110 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
10111 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
10113 // Save the current FFLAGS.
10114 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
10116 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
10119 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
10120 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
10122 // Restore the FFLAGS.
10123 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
10124 .addReg(SavedFFlags, RegState::Kill);
10126 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
10127 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
10128 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
10129 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
10130 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
10131 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
10133 // Erase the pseudoinstruction.
10134 MI.eraseFromParent();
10138 static MachineBasicBlock *
10139 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
10140 MachineBasicBlock *ThisMBB,
10141 const RISCVSubtarget &Subtarget) {
10142 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
10143 // Without this, custom-inserter would have generated:
10155 // A: X = ...; Y = ...
10157 // C: Z = PHI [X, A], [Y, B]
10159 // E: PHI [X, C], [Z, D]
10161 // If we lower both Select_FPRX_ in a single step, we can instead generate:
10173 // A: X = ...; Y = ...
10175 // E: PHI [X, A], [X, C], [Y, D]
10177 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
10178 const DebugLoc &DL = First.getDebugLoc();
10179 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
10180 MachineFunction *F = ThisMBB->getParent();
10181 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
10182 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
10183 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10184 MachineFunction::iterator It = ++ThisMBB->getIterator();
10185 F->insert(It, FirstMBB);
10186 F->insert(It, SecondMBB);
10187 F->insert(It, SinkMBB);
10189 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
10190 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
10191 std::next(MachineBasicBlock::iterator(First)),
10193 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
10195 // Fallthrough block for ThisMBB.
10196 ThisMBB->addSuccessor(FirstMBB);
10197 // Fallthrough block for FirstMBB.
10198 FirstMBB->addSuccessor(SecondMBB);
10199 ThisMBB->addSuccessor(SinkMBB);
10200 FirstMBB->addSuccessor(SinkMBB);
10201 // This is fallthrough.
10202 SecondMBB->addSuccessor(SinkMBB);
10204 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
10205 Register FLHS = First.getOperand(1).getReg();
10206 Register FRHS = First.getOperand(2).getReg();
10207 // Insert appropriate branch.
10208 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
10213 Register SLHS = Second.getOperand(1).getReg();
10214 Register SRHS = Second.getOperand(2).getReg();
10215 Register Op1Reg4 = First.getOperand(4).getReg();
10216 Register Op1Reg5 = First.getOperand(5).getReg();
10218 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
10219 // Insert appropriate branch.
10220 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
10225 Register DestReg = Second.getOperand(0).getReg();
10226 Register Op2Reg4 = Second.getOperand(4).getReg();
10227 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
10233 .addMBB(SecondMBB);
10235 // Now remove the Select_FPRX_s.
10236 First.eraseFromParent();
10237 Second.eraseFromParent();
10241 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
10242 MachineBasicBlock *BB,
10243 const RISCVSubtarget &Subtarget) {
10244 // To "insert" Select_* instructions, we actually have to insert the triangle
10245 // control-flow pattern. The incoming instructions know the destination vreg
10246 // to set, the condition code register to branch on, the true/false values to
10247 // select between, and the condcode to use to select the appropriate branch.
10249 // We produce the following control flow:
10256 // When we find a sequence of selects we attempt to optimize their emission
10257 // by sharing the control flow. Currently we only handle cases where we have
10258 // multiple selects with the exact same condition (same LHS, RHS and CC).
10259 // The selects may be interleaved with other instructions if the other
10260 // instructions meet some requirements we deem safe:
10261 // - They are debug instructions. Otherwise,
10262 // - They do not have side-effects, do not access memory and their inputs do
10263 // not depend on the results of the select pseudo-instructions.
10264 // The TrueV/FalseV operands of the selects cannot depend on the result of
10265 // previous selects in the sequence.
10266 // These conditions could be further relaxed. See the X86 target for a
10267 // related approach and more information.
10269 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
10270 // is checked here and handled by a separate function -
10271 // EmitLoweredCascadedSelect.
10272 Register LHS = MI.getOperand(1).getReg();
10273 Register RHS = MI.getOperand(2).getReg();
10274 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
10276 SmallVector<MachineInstr *, 4> SelectDebugValues;
10277 SmallSet<Register, 4> SelectDests;
10278 SelectDests.insert(MI.getOperand(0).getReg());
10280 MachineInstr *LastSelectPseudo = &MI;
10281 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
10282 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
10283 Next->getOpcode() == MI.getOpcode() &&
10284 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
10285 Next->getOperand(5).isKill()) {
10286 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
10289 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
10290 SequenceMBBI != E; ++SequenceMBBI) {
10291 if (SequenceMBBI->isDebugInstr())
10293 if (isSelectPseudo(*SequenceMBBI)) {
10294 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
10295 SequenceMBBI->getOperand(2).getReg() != RHS ||
10296 SequenceMBBI->getOperand(3).getImm() != CC ||
10297 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
10298 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
10300 LastSelectPseudo = &*SequenceMBBI;
10301 SequenceMBBI->collectDebugValues(SelectDebugValues);
10302 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
10305 if (SequenceMBBI->hasUnmodeledSideEffects() ||
10306 SequenceMBBI->mayLoadOrStore())
10308 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
10309 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
10314 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
10315 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10316 DebugLoc DL = MI.getDebugLoc();
10317 MachineFunction::iterator I = ++BB->getIterator();
10319 MachineBasicBlock *HeadMBB = BB;
10320 MachineFunction *F = BB->getParent();
10321 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
10322 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
10324 F->insert(I, IfFalseMBB);
10325 F->insert(I, TailMBB);
10327 // Transfer debug instructions associated with the selects to TailMBB.
10328 for (MachineInstr *DebugInstr : SelectDebugValues) {
10329 TailMBB->push_back(DebugInstr->removeFromParent());
10332 // Move all instructions after the sequence to TailMBB.
10333 TailMBB->splice(TailMBB->end(), HeadMBB,
10334 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
10335 // Update machine-CFG edges by transferring all successors of the current
10336 // block to the new block which will contain the Phi nodes for the selects.
10337 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
10338 // Set the successors for HeadMBB.
10339 HeadMBB->addSuccessor(IfFalseMBB);
10340 HeadMBB->addSuccessor(TailMBB);
10342 // Insert appropriate branch.
10343 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
10348 // IfFalseMBB just falls through to TailMBB.
10349 IfFalseMBB->addSuccessor(TailMBB);
10351 // Create PHIs for all of the select pseudo-instructions.
10352 auto SelectMBBI = MI.getIterator();
10353 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
10354 auto InsertionPoint = TailMBB->begin();
10355 while (SelectMBBI != SelectEnd) {
10356 auto Next = std::next(SelectMBBI);
10357 if (isSelectPseudo(*SelectMBBI)) {
10358 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
10359 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
10360 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
10361 .addReg(SelectMBBI->getOperand(4).getReg())
10363 .addReg(SelectMBBI->getOperand(5).getReg())
10364 .addMBB(IfFalseMBB);
10365 SelectMBBI->eraseFromParent();
10370 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
10374 MachineBasicBlock *
10375 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
10376 MachineBasicBlock *BB) const {
10377 switch (MI.getOpcode()) {
10379 llvm_unreachable("Unexpected instr type to insert");
10380 case RISCV::ReadCycleWide:
10381 assert(!Subtarget.is64Bit() &&
10382 "ReadCycleWrite is only to be used on riscv32");
10383 return emitReadCycleWidePseudo(MI, BB);
10384 case RISCV::Select_GPR_Using_CC_GPR:
10385 case RISCV::Select_FPR16_Using_CC_GPR:
10386 case RISCV::Select_FPR32_Using_CC_GPR:
10387 case RISCV::Select_FPR64_Using_CC_GPR:
10388 return emitSelectPseudo(MI, BB, Subtarget);
10389 case RISCV::BuildPairF64Pseudo:
10390 return emitBuildPairF64Pseudo(MI, BB);
10391 case RISCV::SplitF64Pseudo:
10392 return emitSplitF64Pseudo(MI, BB);
10393 case RISCV::PseudoQuietFLE_H:
10394 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
10395 case RISCV::PseudoQuietFLT_H:
10396 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
10397 case RISCV::PseudoQuietFLE_S:
10398 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
10399 case RISCV::PseudoQuietFLT_S:
10400 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
10401 case RISCV::PseudoQuietFLE_D:
10402 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
10403 case RISCV::PseudoQuietFLT_D:
10404 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
10408 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
10409 SDNode *Node) const {
10410 // Add FRM dependency to any instructions with dynamic rounding mode.
10411 unsigned Opc = MI.getOpcode();
10412 auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
10415 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
10417 // If the instruction already reads FRM, don't add another read.
10418 if (MI.readsRegister(RISCV::FRM))
10421 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
10424 // Calling Convention Implementation.
10425 // The expectations for frontend ABI lowering vary from target to target.
10426 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
10427 // details, but this is a longer term goal. For now, we simply try to keep the
10428 // role of the frontend as simple and well-defined as possible. The rules can
10429 // be summarised as:
10430 // * Never split up large scalar arguments. We handle them here.
10431 // * If a hardfloat calling convention is being used, and the struct may be
10432 // passed in a pair of registers (fp+fp, int+fp), and both registers are
10433 // available, then pass as two separate arguments. If either the GPRs or FPRs
10434 // are exhausted, then pass according to the rule below.
10435 // * If a struct could never be passed in registers or directly in a stack
10436 // slot (as it is larger than 2*XLEN and the floating point rules don't
10437 // apply), then pass it using a pointer with the byval attribute.
10438 // * If a struct is less than 2*XLEN, then coerce to either a two-element
10439 // word-sized array or a 2*XLEN scalar (depending on alignment).
10440 // * The frontend can determine whether a struct is returned by reference or
10441 // not based on its size and fields. If it will be returned by reference, the
10442 // frontend must modify the prototype so a pointer with the sret annotation is
10443 // passed as the first argument. This is not necessary for large scalar
10445 // * Struct return values and varargs should be coerced to structs containing
10446 // register-size fields in the same situations they would be for fixed
10449 static const MCPhysReg ArgGPRs[] = {
10450 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
10451 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
10453 static const MCPhysReg ArgFPR16s[] = {
10454 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
10455 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
10457 static const MCPhysReg ArgFPR32s[] = {
10458 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
10459 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
10461 static const MCPhysReg ArgFPR64s[] = {
10462 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
10463 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
10465 // This is an interim calling convention and it may be changed in the future.
10466 static const MCPhysReg ArgVRs[] = {
10467 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
10468 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
10469 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
10470 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
10471 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
10472 RISCV::V20M2, RISCV::V22M2};
10473 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
10475 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
10477 // Pass a 2*XLEN argument that has been split into two XLEN values through
10478 // registers or the stack as necessary.
10479 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
10480 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
10481 MVT ValVT2, MVT LocVT2,
10482 ISD::ArgFlagsTy ArgFlags2) {
10483 unsigned XLenInBytes = XLen / 8;
10484 if (Register Reg = State.AllocateReg(ArgGPRs)) {
10485 // At least one half can be passed via register.
10486 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
10487 VA1.getLocVT(), CCValAssign::Full));
10489 // Both halves must be passed on the stack, with proper alignment.
10491 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
10493 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
10494 State.AllocateStack(XLenInBytes, StackAlign),
10495 VA1.getLocVT(), CCValAssign::Full));
10496 State.addLoc(CCValAssign::getMem(
10497 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
10498 LocVT2, CCValAssign::Full));
10502 if (Register Reg = State.AllocateReg(ArgGPRs)) {
10503 // The second half can also be passed via register.
10505 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
10507 // The second half is passed via the stack, without additional alignment.
10508 State.addLoc(CCValAssign::getMem(
10509 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
10510 LocVT2, CCValAssign::Full));
10516 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
10517 Optional<unsigned> FirstMaskArgument,
10518 CCState &State, const RISCVTargetLowering &TLI) {
10519 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
10520 if (RC == &RISCV::VRRegClass) {
10521 // Assign the first mask argument to V0.
10522 // This is an interim calling convention and it may be changed in the
10524 if (FirstMaskArgument && ValNo == *FirstMaskArgument)
10525 return State.AllocateReg(RISCV::V0);
10526 return State.AllocateReg(ArgVRs);
10528 if (RC == &RISCV::VRM2RegClass)
10529 return State.AllocateReg(ArgVRM2s);
10530 if (RC == &RISCV::VRM4RegClass)
10531 return State.AllocateReg(ArgVRM4s);
10532 if (RC == &RISCV::VRM8RegClass)
10533 return State.AllocateReg(ArgVRM8s);
10534 llvm_unreachable("Unhandled register class for ValueType");
10537 // Implements the RISC-V calling convention. Returns true upon failure.
10538 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
10539 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
10540 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
10541 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
10542 Optional<unsigned> FirstMaskArgument) {
10543 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
10544 assert(XLen == 32 || XLen == 64);
10545 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
10547 // Any return value split in to more than two values can't be returned
10548 // directly. Vectors are returned via the available vector registers.
10549 if (!LocVT.isVector() && IsRet && ValNo > 1)
10552 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
10553 // variadic argument, or if no F16/F32 argument registers are available.
10554 bool UseGPRForF16_F32 = true;
10555 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
10556 // variadic argument, or if no F64 argument registers are available.
10557 bool UseGPRForF64 = true;
10561 llvm_unreachable("Unexpected ABI");
10562 case RISCVABI::ABI_ILP32:
10563 case RISCVABI::ABI_LP64:
10565 case RISCVABI::ABI_ILP32F:
10566 case RISCVABI::ABI_LP64F:
10567 UseGPRForF16_F32 = !IsFixed;
10569 case RISCVABI::ABI_ILP32D:
10570 case RISCVABI::ABI_LP64D:
10571 UseGPRForF16_F32 = !IsFixed;
10572 UseGPRForF64 = !IsFixed;
10576 // FPR16, FPR32, and FPR64 alias each other.
10577 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
10578 UseGPRForF16_F32 = true;
10579 UseGPRForF64 = true;
10582 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
10583 // similar local variables rather than directly checking against the target
10586 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
10588 LocInfo = CCValAssign::BCvt;
10589 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
10591 LocInfo = CCValAssign::BCvt;
10594 // If this is a variadic argument, the RISC-V calling convention requires
10595 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
10596 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
10597 // be used regardless of whether the original argument was split during
10598 // legalisation or not. The argument will not be passed by registers if the
10599 // original type is larger than 2*XLEN, so the register alignment rule does
10601 unsigned TwoXLenInBytes = (2 * XLen) / 8;
10602 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
10603 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
10604 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
10605 // Skip 'odd' register if necessary.
10606 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
10607 State.AllocateReg(ArgGPRs);
10610 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
10611 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
10612 State.getPendingArgFlags();
10614 assert(PendingLocs.size() == PendingArgFlags.size() &&
10615 "PendingLocs and PendingArgFlags out of sync");
10617 // Handle passing f64 on RV32D with a soft float ABI or when floating point
10618 // registers are exhausted.
10619 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
10620 assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
10621 "Can't lower f64 if it is split");
10622 // Depending on available argument GPRS, f64 may be passed in a pair of
10623 // GPRs, split between a GPR and the stack, or passed completely on the
10624 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
10626 Register Reg = State.AllocateReg(ArgGPRs);
10629 unsigned StackOffset = State.AllocateStack(8, Align(8));
10631 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
10634 if (!State.AllocateReg(ArgGPRs))
10635 State.AllocateStack(4, Align(4));
10636 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10640 // Fixed-length vectors are located in the corresponding scalable-vector
10641 // container types.
10642 if (ValVT.isFixedLengthVector())
10643 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
10645 // Split arguments might be passed indirectly, so keep track of the pending
10646 // values. Split vectors are passed via a mix of registers and indirectly, so
10647 // treat them as we would any other argument.
10648 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
10650 LocInfo = CCValAssign::Indirect;
10651 PendingLocs.push_back(
10652 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
10653 PendingArgFlags.push_back(ArgFlags);
10654 if (!ArgFlags.isSplitEnd()) {
10659 // If the split argument only had two elements, it should be passed directly
10660 // in registers or on the stack.
10661 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
10662 PendingLocs.size() <= 2) {
10663 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
10664 // Apply the normal calling convention rules to the first half of the
10666 CCValAssign VA = PendingLocs[0];
10667 ISD::ArgFlagsTy AF = PendingArgFlags[0];
10668 PendingLocs.clear();
10669 PendingArgFlags.clear();
10670 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
10674 // Allocate to a register if possible, or else a stack slot.
10676 unsigned StoreSizeBytes = XLen / 8;
10677 Align StackAlign = Align(XLen / 8);
10679 if (ValVT == MVT::f16 && !UseGPRForF16_F32)
10680 Reg = State.AllocateReg(ArgFPR16s);
10681 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
10682 Reg = State.AllocateReg(ArgFPR32s);
10683 else if (ValVT == MVT::f64 && !UseGPRForF64)
10684 Reg = State.AllocateReg(ArgFPR64s);
10685 else if (ValVT.isVector()) {
10686 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
10688 // For return values, the vector must be passed fully via registers or
10690 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
10691 // but we're using all of them.
10694 // Try using a GPR to pass the address
10695 if ((Reg = State.AllocateReg(ArgGPRs))) {
10697 LocInfo = CCValAssign::Indirect;
10698 } else if (ValVT.isScalableVector()) {
10700 LocInfo = CCValAssign::Indirect;
10702 // Pass fixed-length vectors on the stack.
10704 StoreSizeBytes = ValVT.getStoreSize();
10705 // Align vectors to their element sizes, being careful for vXi1
10707 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
10711 Reg = State.AllocateReg(ArgGPRs);
10714 unsigned StackOffset =
10715 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
10717 // If we reach this point and PendingLocs is non-empty, we must be at the
10718 // end of a split argument that must be passed indirectly.
10719 if (!PendingLocs.empty()) {
10720 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
10721 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
10723 for (auto &It : PendingLocs) {
10725 It.convertToReg(Reg);
10727 It.convertToMem(StackOffset);
10730 PendingLocs.clear();
10731 PendingArgFlags.clear();
10735 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
10736 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
10737 "Expected an XLenVT or vector types at this stage");
10740 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10744 // When a floating-point value is passed on the stack, no bit-conversion is
10746 if (ValVT.isFloatingPoint()) {
10748 LocInfo = CCValAssign::Full;
10750 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
10754 template <typename ArgTy>
10755 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
10756 for (const auto &ArgIdx : enumerate(Args)) {
10757 MVT ArgVT = ArgIdx.value().VT;
10758 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
10759 return ArgIdx.index();
10764 void RISCVTargetLowering::analyzeInputArgs(
10765 MachineFunction &MF, CCState &CCInfo,
10766 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
10767 RISCVCCAssignFn Fn) const {
10768 unsigned NumArgs = Ins.size();
10769 FunctionType *FType = MF.getFunction().getFunctionType();
10771 Optional<unsigned> FirstMaskArgument;
10772 if (Subtarget.hasVInstructions())
10773 FirstMaskArgument = preAssignMask(Ins);
10775 for (unsigned i = 0; i != NumArgs; ++i) {
10776 MVT ArgVT = Ins[i].VT;
10777 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
10779 Type *ArgTy = nullptr;
10781 ArgTy = FType->getReturnType();
10782 else if (Ins[i].isOrigArg())
10783 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
10785 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
10786 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
10787 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
10788 FirstMaskArgument)) {
10789 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
10790 << EVT(ArgVT).getEVTString() << '\n');
10791 llvm_unreachable(nullptr);
10796 void RISCVTargetLowering::analyzeOutputArgs(
10797 MachineFunction &MF, CCState &CCInfo,
10798 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
10799 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
10800 unsigned NumArgs = Outs.size();
10802 Optional<unsigned> FirstMaskArgument;
10803 if (Subtarget.hasVInstructions())
10804 FirstMaskArgument = preAssignMask(Outs);
10806 for (unsigned i = 0; i != NumArgs; i++) {
10807 MVT ArgVT = Outs[i].VT;
10808 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
10809 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
10811 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
10812 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
10813 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
10814 FirstMaskArgument)) {
10815 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
10816 << EVT(ArgVT).getEVTString() << "\n");
10817 llvm_unreachable(nullptr);
10822 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
10824 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
10825 const CCValAssign &VA, const SDLoc &DL,
10826 const RISCVSubtarget &Subtarget) {
10827 switch (VA.getLocInfo()) {
10829 llvm_unreachable("Unexpected CCValAssign::LocInfo");
10830 case CCValAssign::Full:
10831 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
10832 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
10834 case CCValAssign::BCvt:
10835 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
10836 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
10837 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
10838 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
10840 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
10846 // The caller is responsible for loading the full value if the argument is
10847 // passed with CCValAssign::Indirect.
10848 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
10849 const CCValAssign &VA, const SDLoc &DL,
10850 const RISCVTargetLowering &TLI) {
10851 MachineFunction &MF = DAG.getMachineFunction();
10852 MachineRegisterInfo &RegInfo = MF.getRegInfo();
10853 EVT LocVT = VA.getLocVT();
10855 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
10856 Register VReg = RegInfo.createVirtualRegister(RC);
10857 RegInfo.addLiveIn(VA.getLocReg(), VReg);
10858 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
10860 if (VA.getLocInfo() == CCValAssign::Indirect)
10863 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
10866 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
10867 const CCValAssign &VA, const SDLoc &DL,
10868 const RISCVSubtarget &Subtarget) {
10869 EVT LocVT = VA.getLocVT();
10871 switch (VA.getLocInfo()) {
10873 llvm_unreachable("Unexpected CCValAssign::LocInfo");
10874 case CCValAssign::Full:
10875 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
10876 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
10878 case CCValAssign::BCvt:
10879 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
10880 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
10881 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
10882 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
10884 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
10890 // The caller is responsible for loading the full value if the argument is
10891 // passed with CCValAssign::Indirect.
10892 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
10893 const CCValAssign &VA, const SDLoc &DL) {
10894 MachineFunction &MF = DAG.getMachineFunction();
10895 MachineFrameInfo &MFI = MF.getFrameInfo();
10896 EVT LocVT = VA.getLocVT();
10897 EVT ValVT = VA.getValVT();
10898 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
10899 if (ValVT.isScalableVector()) {
10900 // When the value is a scalable vector, we save the pointer which points to
10901 // the scalable vector value in the stack. The ValVT will be the pointer
10902 // type, instead of the scalable vector type.
10905 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
10906 /*IsImmutable=*/true);
10907 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
10910 ISD::LoadExtType ExtType;
10911 switch (VA.getLocInfo()) {
10913 llvm_unreachable("Unexpected CCValAssign::LocInfo");
10914 case CCValAssign::Full:
10915 case CCValAssign::Indirect:
10916 case CCValAssign::BCvt:
10917 ExtType = ISD::NON_EXTLOAD;
10920 Val = DAG.getExtLoad(
10921 ExtType, DL, LocVT, Chain, FIN,
10922 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
10926 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
10927 const CCValAssign &VA, const SDLoc &DL) {
10928 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
10930 MachineFunction &MF = DAG.getMachineFunction();
10931 MachineFrameInfo &MFI = MF.getFrameInfo();
10932 MachineRegisterInfo &RegInfo = MF.getRegInfo();
10934 if (VA.isMemLoc()) {
10935 // f64 is passed on the stack.
10937 MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
10938 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
10939 return DAG.getLoad(MVT::f64, DL, Chain, FIN,
10940 MachinePointerInfo::getFixedStack(MF, FI));
10943 assert(VA.isRegLoc() && "Expected register VA assignment");
10945 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
10946 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
10947 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
10949 if (VA.getLocReg() == RISCV::X17) {
10950 // Second half of f64 is passed on the stack.
10951 int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
10952 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
10953 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
10954 MachinePointerInfo::getFixedStack(MF, FI));
10956 // Second half of f64 is passed in another GPR.
10957 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
10958 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
10959 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
10961 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
10964 // FastCC has less than 1% performance improvement for some particular
10965 // benchmark. But theoretically, it may has benenfit for some cases.
10966 static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
10967 unsigned ValNo, MVT ValVT, MVT LocVT,
10968 CCValAssign::LocInfo LocInfo,
10969 ISD::ArgFlagsTy ArgFlags, CCState &State,
10970 bool IsFixed, bool IsRet, Type *OrigTy,
10971 const RISCVTargetLowering &TLI,
10972 Optional<unsigned> FirstMaskArgument) {
10974 // X5 and X6 might be used for save-restore libcall.
10975 static const MCPhysReg GPRList[] = {
10976 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
10977 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
10978 RISCV::X29, RISCV::X30, RISCV::X31};
10980 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
10981 if (unsigned Reg = State.AllocateReg(GPRList)) {
10982 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10987 if (LocVT == MVT::f16) {
10988 static const MCPhysReg FPR16List[] = {
10989 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
10990 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
10991 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
10992 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
10993 if (unsigned Reg = State.AllocateReg(FPR16List)) {
10994 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
10999 if (LocVT == MVT::f32) {
11000 static const MCPhysReg FPR32List[] = {
11001 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
11002 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
11003 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
11004 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
11005 if (unsigned Reg = State.AllocateReg(FPR32List)) {
11006 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
11011 if (LocVT == MVT::f64) {
11012 static const MCPhysReg FPR64List[] = {
11013 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
11014 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
11015 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
11016 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
11017 if (unsigned Reg = State.AllocateReg(FPR64List)) {
11018 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
11023 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
11024 unsigned Offset4 = State.AllocateStack(4, Align(4));
11025 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
11029 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
11030 unsigned Offset5 = State.AllocateStack(8, Align(8));
11031 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
11035 if (LocVT.isVector()) {
11037 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
11038 // Fixed-length vectors are located in the corresponding scalable-vector
11039 // container types.
11040 if (ValVT.isFixedLengthVector())
11041 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
11042 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
11044 // Try and pass the address via a "fast" GPR.
11045 if (unsigned GPRReg = State.AllocateReg(GPRList)) {
11046 LocInfo = CCValAssign::Indirect;
11047 LocVT = TLI.getSubtarget().getXLenVT();
11048 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
11049 } else if (ValVT.isFixedLengthVector()) {
11051 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
11052 unsigned StackOffset =
11053 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
11055 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
11057 // Can't pass scalable vectors on the stack.
11065 return true; // CC didn't match.
11068 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
11069 CCValAssign::LocInfo LocInfo,
11070 ISD::ArgFlagsTy ArgFlags, CCState &State) {
11072 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
11073 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
11074 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
11075 static const MCPhysReg GPRList[] = {
11076 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
11077 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
11078 if (unsigned Reg = State.AllocateReg(GPRList)) {
11079 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
11084 if (LocVT == MVT::f32) {
11085 // Pass in STG registers: F1, ..., F6
11087 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
11088 RISCV::F18_F, RISCV::F19_F,
11089 RISCV::F20_F, RISCV::F21_F};
11090 if (unsigned Reg = State.AllocateReg(FPR32List)) {
11091 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
11096 if (LocVT == MVT::f64) {
11097 // Pass in STG registers: D1, ..., D6
11099 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
11100 RISCV::F24_D, RISCV::F25_D,
11101 RISCV::F26_D, RISCV::F27_D};
11102 if (unsigned Reg = State.AllocateReg(FPR64List)) {
11103 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
11108 report_fatal_error("No registers left in GHC calling convention");
11112 // Transform physical registers into virtual registers.
11113 SDValue RISCVTargetLowering::LowerFormalArguments(
11114 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
11115 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
11116 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
11118 MachineFunction &MF = DAG.getMachineFunction();
11120 switch (CallConv) {
11122 report_fatal_error("Unsupported calling convention");
11123 case CallingConv::C:
11124 case CallingConv::Fast:
11126 case CallingConv::GHC:
11127 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
11128 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
11129 report_fatal_error(
11130 "GHC calling convention requires the F and D instruction set extensions");
11133 const Function &Func = MF.getFunction();
11134 if (Func.hasFnAttribute("interrupt")) {
11135 if (!Func.arg_empty())
11136 report_fatal_error(
11137 "Functions with the interrupt attribute cannot have arguments!");
11140 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
11142 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
11143 report_fatal_error(
11144 "Function interrupt attribute argument not supported!");
11147 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11148 MVT XLenVT = Subtarget.getXLenVT();
11149 unsigned XLenInBytes = Subtarget.getXLen() / 8;
11150 // Used with vargs to acumulate store chains.
11151 std::vector<SDValue> OutChains;
11153 // Assign locations to all of the incoming arguments.
11154 SmallVector<CCValAssign, 16> ArgLocs;
11155 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
11157 if (CallConv == CallingConv::GHC)
11158 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
11160 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
11161 CallConv == CallingConv::Fast ? CC_RISCV_FastCC
11164 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
11165 CCValAssign &VA = ArgLocs[i];
11167 // Passing f64 on RV32D with a soft float ABI must be handled as a special
11169 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
11170 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
11171 else if (VA.isRegLoc())
11172 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
11174 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
11176 if (VA.getLocInfo() == CCValAssign::Indirect) {
11177 // If the original argument was split and passed by reference (e.g. i128
11178 // on RV32), we need to load all parts of it here (using the same
11179 // address). Vectors may be partly split to registers and partly to the
11180 // stack, in which case the base address is partly offset and subsequent
11181 // stores are relative to that.
11182 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
11183 MachinePointerInfo()));
11184 unsigned ArgIndex = Ins[i].OrigArgIndex;
11185 unsigned ArgPartOffset = Ins[i].PartOffset;
11186 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
11187 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
11188 CCValAssign &PartVA = ArgLocs[i + 1];
11189 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
11190 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
11191 if (PartVA.getValVT().isScalableVector())
11192 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
11193 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
11194 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
11195 MachinePointerInfo()));
11200 InVals.push_back(ArgValue);
11204 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
11205 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
11206 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
11207 MachineFrameInfo &MFI = MF.getFrameInfo();
11208 MachineRegisterInfo &RegInfo = MF.getRegInfo();
11209 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
11211 // Offset of the first variable argument from stack pointer, and size of
11212 // the vararg save area. For now, the varargs save area is either zero or
11213 // large enough to hold a0-a7.
11214 int VaArgOffset, VarArgsSaveSize;
11216 // If all registers are allocated, then all varargs must be passed on the
11217 // stack and we don't need to save any argregs.
11218 if (ArgRegs.size() == Idx) {
11219 VaArgOffset = CCInfo.getNextStackOffset();
11220 VarArgsSaveSize = 0;
11222 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
11223 VaArgOffset = -VarArgsSaveSize;
11226 // Record the frame index of the first variable argument
11227 // which is a value necessary to VASTART.
11228 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
11229 RVFI->setVarArgsFrameIndex(FI);
11231 // If saving an odd number of registers then create an extra stack slot to
11232 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
11233 // offsets to even-numbered registered remain 2*XLEN-aligned.
11235 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
11236 VarArgsSaveSize += XLenInBytes;
11239 // Copy the integer registers that may have been used for passing varargs
11240 // to the vararg save area.
11241 for (unsigned I = Idx; I < ArgRegs.size();
11242 ++I, VaArgOffset += XLenInBytes) {
11243 const Register Reg = RegInfo.createVirtualRegister(RC);
11244 RegInfo.addLiveIn(ArgRegs[I], Reg);
11245 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
11246 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
11247 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
11248 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
11249 MachinePointerInfo::getFixedStack(MF, FI));
11250 cast<StoreSDNode>(Store.getNode())
11252 ->setValue((Value *)nullptr);
11253 OutChains.push_back(Store);
11255 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
11258 // All stores are grouped in one node to allow the matching between
11259 // the size of Ins and InVals. This only happens for vararg functions.
11260 if (!OutChains.empty()) {
11261 OutChains.push_back(Chain);
11262 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
11268 /// isEligibleForTailCallOptimization - Check whether the call is eligible
11269 /// for tail call optimization.
11270 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
11271 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
11272 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
11273 const SmallVector<CCValAssign, 16> &ArgLocs) const {
11275 auto &Callee = CLI.Callee;
11276 auto CalleeCC = CLI.CallConv;
11277 auto &Outs = CLI.Outs;
11278 auto &Caller = MF.getFunction();
11279 auto CallerCC = Caller.getCallingConv();
11281 // Exception-handling functions need a special set of instructions to
11282 // indicate a return to the hardware. Tail-calling another function would
11283 // probably break this.
11284 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
11285 // should be expanded as new function attributes are introduced.
11286 if (Caller.hasFnAttribute("interrupt"))
11289 // Do not tail call opt if the stack is used to pass parameters.
11290 if (CCInfo.getNextStackOffset() != 0)
11293 // Do not tail call opt if any parameters need to be passed indirectly.
11294 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
11295 // passed indirectly. So the address of the value will be passed in a
11296 // register, or if not available, then the address is put on the stack. In
11297 // order to pass indirectly, space on the stack often needs to be allocated
11298 // in order to store the value. In this case the CCInfo.getNextStackOffset()
11299 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
11300 // are passed CCValAssign::Indirect.
11301 for (auto &VA : ArgLocs)
11302 if (VA.getLocInfo() == CCValAssign::Indirect)
11305 // Do not tail call opt if either caller or callee uses struct return
11307 auto IsCallerStructRet = Caller.hasStructRetAttr();
11308 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
11309 if (IsCallerStructRet || IsCalleeStructRet)
11312 // Externally-defined functions with weak linkage should not be
11313 // tail-called. The behaviour of branch instructions in this situation (as
11314 // used for tail calls) is implementation-defined, so we cannot rely on the
11315 // linker replacing the tail call with a return.
11316 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
11317 const GlobalValue *GV = G->getGlobal();
11318 if (GV->hasExternalWeakLinkage())
11322 // The callee has to preserve all registers the caller needs to preserve.
11323 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11324 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
11325 if (CalleeCC != CallerCC) {
11326 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
11327 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
11331 // Byval parameters hand the function a pointer directly into the stack area
11332 // we want to reuse during a tail call. Working around this *is* possible
11333 // but less efficient and uglier in LowerCall.
11334 for (auto &Arg : Outs)
11335 if (Arg.Flags.isByVal())
11341 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
11342 return DAG.getDataLayout().getPrefTypeAlign(
11343 VT.getTypeForEVT(*DAG.getContext()));
11346 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
11347 // and output parameter nodes.
11348 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
11349 SmallVectorImpl<SDValue> &InVals) const {
11350 SelectionDAG &DAG = CLI.DAG;
11351 SDLoc &DL = CLI.DL;
11352 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
11353 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
11354 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
11355 SDValue Chain = CLI.Chain;
11356 SDValue Callee = CLI.Callee;
11357 bool &IsTailCall = CLI.IsTailCall;
11358 CallingConv::ID CallConv = CLI.CallConv;
11359 bool IsVarArg = CLI.IsVarArg;
11360 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11361 MVT XLenVT = Subtarget.getXLenVT();
11363 MachineFunction &MF = DAG.getMachineFunction();
11365 // Analyze the operands of the call, assigning locations to each operand.
11366 SmallVector<CCValAssign, 16> ArgLocs;
11367 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
11369 if (CallConv == CallingConv::GHC)
11370 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
11372 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
11373 CallConv == CallingConv::Fast ? CC_RISCV_FastCC
11376 // Check if it's really possible to do a tail call.
11378 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
11382 else if (CLI.CB && CLI.CB->isMustTailCall())
11383 report_fatal_error("failed to perform tail call elimination on a call "
11384 "site marked musttail");
11386 // Get a count of how many bytes are to be pushed on the stack.
11387 unsigned NumBytes = ArgCCInfo.getNextStackOffset();
11389 // Create local copies for byval args
11390 SmallVector<SDValue, 8> ByValArgs;
11391 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
11392 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11393 if (!Flags.isByVal())
11396 SDValue Arg = OutVals[i];
11397 unsigned Size = Flags.getByValSize();
11398 Align Alignment = Flags.getNonZeroByValAlign();
11401 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
11402 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
11403 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
11405 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
11406 /*IsVolatile=*/false,
11407 /*AlwaysInline=*/false, IsTailCall,
11408 MachinePointerInfo(), MachinePointerInfo());
11409 ByValArgs.push_back(FIPtr);
11413 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
11415 // Copy argument values to their designated locations.
11416 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
11417 SmallVector<SDValue, 8> MemOpChains;
11419 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
11420 CCValAssign &VA = ArgLocs[i];
11421 SDValue ArgValue = OutVals[i];
11422 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11424 // Handle passing f64 on RV32D with a soft float ABI as a special case.
11425 bool IsF64OnRV32DSoftABI =
11426 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
11427 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
11428 SDValue SplitF64 = DAG.getNode(
11429 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
11430 SDValue Lo = SplitF64.getValue(0);
11431 SDValue Hi = SplitF64.getValue(1);
11433 Register RegLo = VA.getLocReg();
11434 RegsToPass.push_back(std::make_pair(RegLo, Lo));
11436 if (RegLo == RISCV::X17) {
11437 // Second half of f64 is passed on the stack.
11438 // Work out the address of the stack slot.
11439 if (!StackPtr.getNode())
11440 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
11442 MemOpChains.push_back(
11443 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
11445 // Second half of f64 is passed in another GPR.
11446 assert(RegLo < RISCV::X31 && "Invalid register pair");
11447 Register RegHigh = RegLo + 1;
11448 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
11453 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
11454 // as any other MemLoc.
11456 // Promote the value if needed.
11457 // For now, only handle fully promoted and indirect arguments.
11458 if (VA.getLocInfo() == CCValAssign::Indirect) {
11459 // Store the argument in a stack slot and pass its address.
11461 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
11462 getPrefTypeAlign(ArgValue.getValueType(), DAG));
11463 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
11464 // If the original argument was split (e.g. i128), we need
11465 // to store the required parts of it here (and pass just one address).
11466 // Vectors may be partly split to registers and partly to the stack, in
11467 // which case the base address is partly offset and subsequent stores are
11468 // relative to that.
11469 unsigned ArgIndex = Outs[i].OrigArgIndex;
11470 unsigned ArgPartOffset = Outs[i].PartOffset;
11471 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
11472 // Calculate the total size to store. We don't have access to what we're
11473 // actually storing other than performing the loop and collecting the
11475 SmallVector<std::pair<SDValue, SDValue>> Parts;
11476 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
11477 SDValue PartValue = OutVals[i + 1];
11478 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
11479 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
11480 EVT PartVT = PartValue.getValueType();
11481 if (PartVT.isScalableVector())
11482 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
11483 StoredSize += PartVT.getStoreSize();
11484 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
11485 Parts.push_back(std::make_pair(PartValue, Offset));
11488 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
11489 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
11490 MemOpChains.push_back(
11491 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
11492 MachinePointerInfo::getFixedStack(MF, FI)));
11493 for (const auto &Part : Parts) {
11494 SDValue PartValue = Part.first;
11495 SDValue PartOffset = Part.second;
11497 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
11498 MemOpChains.push_back(
11499 DAG.getStore(Chain, DL, PartValue, Address,
11500 MachinePointerInfo::getFixedStack(MF, FI)));
11502 ArgValue = SpillSlot;
11504 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
11507 // Use local copy if it is a byval arg.
11508 if (Flags.isByVal())
11509 ArgValue = ByValArgs[j++];
11511 if (VA.isRegLoc()) {
11512 // Queue up the argument copies and emit them at the end.
11513 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
11515 assert(VA.isMemLoc() && "Argument not register or memory");
11516 assert(!IsTailCall && "Tail call not allowed if stack is used "
11517 "for passing parameters");
11519 // Work out the address of the stack slot.
11520 if (!StackPtr.getNode())
11521 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
11523 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
11524 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
11527 MemOpChains.push_back(
11528 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
11532 // Join the stores, which are independent of one another.
11533 if (!MemOpChains.empty())
11534 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
11538 // Build a sequence of copy-to-reg nodes, chained and glued together.
11539 for (auto &Reg : RegsToPass) {
11540 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
11541 Glue = Chain.getValue(1);
11544 // Validate that none of the argument registers have been marked as
11545 // reserved, if so report an error. Do the same for the return address if this
11546 // is not a tailcall.
11547 validateCCReservedRegs(RegsToPass, MF);
11549 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
11550 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
11552 "Return address register required, but has been reserved."});
11554 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
11555 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
11556 // split it and then direct call can be matched by PseudoCALL.
11557 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
11558 const GlobalValue *GV = S->getGlobal();
11560 unsigned OpFlags = RISCVII::MO_CALL;
11561 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
11562 OpFlags = RISCVII::MO_PLT;
11564 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
11565 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
11566 unsigned OpFlags = RISCVII::MO_CALL;
11568 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
11570 OpFlags = RISCVII::MO_PLT;
11572 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
11575 // The first call operand is the chain and the second is the target address.
11576 SmallVector<SDValue, 8> Ops;
11577 Ops.push_back(Chain);
11578 Ops.push_back(Callee);
11580 // Add argument registers to the end of the list so that they are
11581 // known live into the call.
11582 for (auto &Reg : RegsToPass)
11583 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
11586 // Add a register mask operand representing the call-preserved registers.
11587 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
11588 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
11589 assert(Mask && "Missing call preserved mask for calling convention");
11590 Ops.push_back(DAG.getRegisterMask(Mask));
11593 // Glue the call to the argument copies, if any.
11594 if (Glue.getNode())
11595 Ops.push_back(Glue);
11598 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
11601 MF.getFrameInfo().setHasTailCall();
11602 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
11605 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
11606 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
11607 Glue = Chain.getValue(1);
11609 // Mark the end of the call, which is glued to the call itself.
11610 Chain = DAG.getCALLSEQ_END(Chain,
11611 DAG.getConstant(NumBytes, DL, PtrVT, true),
11612 DAG.getConstant(0, DL, PtrVT, true),
11614 Glue = Chain.getValue(1);
11616 // Assign locations to each value returned by this call.
11617 SmallVector<CCValAssign, 16> RVLocs;
11618 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
11619 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
11621 // Copy all of the result registers out of their specified physreg.
11622 for (auto &VA : RVLocs) {
11623 // Copy the value out
11625 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
11626 // Glue the RetValue to the end of the call sequence
11627 Chain = RetValue.getValue(1);
11628 Glue = RetValue.getValue(2);
11630 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
11631 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
11632 SDValue RetValue2 =
11633 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
11634 Chain = RetValue2.getValue(1);
11635 Glue = RetValue2.getValue(2);
11636 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
11640 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
11642 InVals.push_back(RetValue);
11648 bool RISCVTargetLowering::CanLowerReturn(
11649 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
11650 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
11651 SmallVector<CCValAssign, 16> RVLocs;
11652 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
11654 Optional<unsigned> FirstMaskArgument;
11655 if (Subtarget.hasVInstructions())
11656 FirstMaskArgument = preAssignMask(Outs);
11658 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
11659 MVT VT = Outs[i].VT;
11660 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
11661 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
11662 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
11663 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
11664 *this, FirstMaskArgument))
11671 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
11673 const SmallVectorImpl<ISD::OutputArg> &Outs,
11674 const SmallVectorImpl<SDValue> &OutVals,
11675 const SDLoc &DL, SelectionDAG &DAG) const {
11676 const MachineFunction &MF = DAG.getMachineFunction();
11677 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
11679 // Stores the assignment of the return value to a location.
11680 SmallVector<CCValAssign, 16> RVLocs;
11682 // Info about the registers and stack slot.
11683 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
11684 *DAG.getContext());
11686 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
11687 nullptr, CC_RISCV);
11689 if (CallConv == CallingConv::GHC && !RVLocs.empty())
11690 report_fatal_error("GHC functions return void only");
11693 SmallVector<SDValue, 4> RetOps(1, Chain);
11695 // Copy the result values into the output registers.
11696 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
11697 SDValue Val = OutVals[i];
11698 CCValAssign &VA = RVLocs[i];
11699 assert(VA.isRegLoc() && "Can only return in registers!");
11701 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
11702 // Handle returning f64 on RV32D with a soft float ABI.
11703 assert(VA.isRegLoc() && "Expected return via registers");
11704 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
11705 DAG.getVTList(MVT::i32, MVT::i32), Val);
11706 SDValue Lo = SplitF64.getValue(0);
11707 SDValue Hi = SplitF64.getValue(1);
11708 Register RegLo = VA.getLocReg();
11709 assert(RegLo < RISCV::X31 && "Invalid register pair");
11710 Register RegHi = RegLo + 1;
11712 if (STI.isRegisterReservedByUser(RegLo) ||
11713 STI.isRegisterReservedByUser(RegHi))
11714 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
11716 "Return value register required, but has been reserved."});
11718 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
11719 Glue = Chain.getValue(1);
11720 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
11721 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
11722 Glue = Chain.getValue(1);
11723 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
11725 // Handle a 'normal' return.
11726 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
11727 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
11729 if (STI.isRegisterReservedByUser(VA.getLocReg()))
11730 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
11732 "Return value register required, but has been reserved."});
11734 // Guarantee that all emitted copies are stuck together.
11735 Glue = Chain.getValue(1);
11736 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
11740 RetOps[0] = Chain; // Update chain.
11742 // Add the glue node if we have it.
11743 if (Glue.getNode()) {
11744 RetOps.push_back(Glue);
11747 unsigned RetOpc = RISCVISD::RET_FLAG;
11748 // Interrupt service routines use different return instructions.
11749 const Function &Func = DAG.getMachineFunction().getFunction();
11750 if (Func.hasFnAttribute("interrupt")) {
11751 if (!Func.getReturnType()->isVoidTy())
11752 report_fatal_error(
11753 "Functions with the interrupt attribute must have void return type!");
11755 MachineFunction &MF = DAG.getMachineFunction();
11757 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
11759 if (Kind == "user")
11760 RetOpc = RISCVISD::URET_FLAG;
11761 else if (Kind == "supervisor")
11762 RetOpc = RISCVISD::SRET_FLAG;
11764 RetOpc = RISCVISD::MRET_FLAG;
11767 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
11770 void RISCVTargetLowering::validateCCReservedRegs(
11771 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
11772 MachineFunction &MF) const {
11773 const Function &F = MF.getFunction();
11774 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
11776 if (llvm::any_of(Regs, [&STI](auto Reg) {
11777 return STI.isRegisterReservedByUser(Reg.first);
11779 F.getContext().diagnose(DiagnosticInfoUnsupported{
11780 F, "Argument register required, but has been reserved."});
11783 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
11784 return CI->isTailCall();
11787 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
11788 #define NODE_NAME_CASE(NODE) \
11789 case RISCVISD::NODE: \
11790 return "RISCVISD::" #NODE;
11791 // clang-format off
11792 switch ((RISCVISD::NodeType)Opcode) {
11793 case RISCVISD::FIRST_NUMBER:
11795 NODE_NAME_CASE(RET_FLAG)
11796 NODE_NAME_CASE(URET_FLAG)
11797 NODE_NAME_CASE(SRET_FLAG)
11798 NODE_NAME_CASE(MRET_FLAG)
11799 NODE_NAME_CASE(CALL)
11800 NODE_NAME_CASE(SELECT_CC)
11801 NODE_NAME_CASE(BR_CC)
11802 NODE_NAME_CASE(BuildPairF64)
11803 NODE_NAME_CASE(SplitF64)
11804 NODE_NAME_CASE(TAIL)
11805 NODE_NAME_CASE(ADD_LO)
11807 NODE_NAME_CASE(LLA)
11808 NODE_NAME_CASE(ADD_TPREL)
11810 NODE_NAME_CASE(LA_TLS_IE)
11811 NODE_NAME_CASE(LA_TLS_GD)
11812 NODE_NAME_CASE(MULHSU)
11813 NODE_NAME_CASE(SLLW)
11814 NODE_NAME_CASE(SRAW)
11815 NODE_NAME_CASE(SRLW)
11816 NODE_NAME_CASE(DIVW)
11817 NODE_NAME_CASE(DIVUW)
11818 NODE_NAME_CASE(REMUW)
11819 NODE_NAME_CASE(ROLW)
11820 NODE_NAME_CASE(RORW)
11821 NODE_NAME_CASE(CLZW)
11822 NODE_NAME_CASE(CTZW)
11823 NODE_NAME_CASE(FSLW)
11824 NODE_NAME_CASE(FSRW)
11825 NODE_NAME_CASE(FSL)
11826 NODE_NAME_CASE(FSR)
11827 NODE_NAME_CASE(FMV_H_X)
11828 NODE_NAME_CASE(FMV_X_ANYEXTH)
11829 NODE_NAME_CASE(FMV_X_SIGNEXTH)
11830 NODE_NAME_CASE(FMV_W_X_RV64)
11831 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
11832 NODE_NAME_CASE(FCVT_X)
11833 NODE_NAME_CASE(FCVT_XU)
11834 NODE_NAME_CASE(FCVT_W_RV64)
11835 NODE_NAME_CASE(FCVT_WU_RV64)
11836 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
11837 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
11838 NODE_NAME_CASE(READ_CYCLE_WIDE)
11839 NODE_NAME_CASE(GREV)
11840 NODE_NAME_CASE(GREVW)
11841 NODE_NAME_CASE(GORC)
11842 NODE_NAME_CASE(GORCW)
11843 NODE_NAME_CASE(SHFL)
11844 NODE_NAME_CASE(SHFLW)
11845 NODE_NAME_CASE(UNSHFL)
11846 NODE_NAME_CASE(UNSHFLW)
11847 NODE_NAME_CASE(BFP)
11848 NODE_NAME_CASE(BFPW)
11849 NODE_NAME_CASE(BCOMPRESS)
11850 NODE_NAME_CASE(BCOMPRESSW)
11851 NODE_NAME_CASE(BDECOMPRESS)
11852 NODE_NAME_CASE(BDECOMPRESSW)
11853 NODE_NAME_CASE(VMV_V_X_VL)
11854 NODE_NAME_CASE(VFMV_V_F_VL)
11855 NODE_NAME_CASE(VMV_X_S)
11856 NODE_NAME_CASE(VMV_S_X_VL)
11857 NODE_NAME_CASE(VFMV_S_F_VL)
11858 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
11859 NODE_NAME_CASE(READ_VLENB)
11860 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
11861 NODE_NAME_CASE(VSLIDEUP_VL)
11862 NODE_NAME_CASE(VSLIDE1UP_VL)
11863 NODE_NAME_CASE(VSLIDEDOWN_VL)
11864 NODE_NAME_CASE(VSLIDE1DOWN_VL)
11865 NODE_NAME_CASE(VID_VL)
11866 NODE_NAME_CASE(VFNCVT_ROD_VL)
11867 NODE_NAME_CASE(VECREDUCE_ADD_VL)
11868 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
11869 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
11870 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
11871 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
11872 NODE_NAME_CASE(VECREDUCE_AND_VL)
11873 NODE_NAME_CASE(VECREDUCE_OR_VL)
11874 NODE_NAME_CASE(VECREDUCE_XOR_VL)
11875 NODE_NAME_CASE(VECREDUCE_FADD_VL)
11876 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
11877 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
11878 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
11879 NODE_NAME_CASE(ADD_VL)
11880 NODE_NAME_CASE(AND_VL)
11881 NODE_NAME_CASE(MUL_VL)
11882 NODE_NAME_CASE(OR_VL)
11883 NODE_NAME_CASE(SDIV_VL)
11884 NODE_NAME_CASE(SHL_VL)
11885 NODE_NAME_CASE(SREM_VL)
11886 NODE_NAME_CASE(SRA_VL)
11887 NODE_NAME_CASE(SRL_VL)
11888 NODE_NAME_CASE(SUB_VL)
11889 NODE_NAME_CASE(UDIV_VL)
11890 NODE_NAME_CASE(UREM_VL)
11891 NODE_NAME_CASE(XOR_VL)
11892 NODE_NAME_CASE(SADDSAT_VL)
11893 NODE_NAME_CASE(UADDSAT_VL)
11894 NODE_NAME_CASE(SSUBSAT_VL)
11895 NODE_NAME_CASE(USUBSAT_VL)
11896 NODE_NAME_CASE(FADD_VL)
11897 NODE_NAME_CASE(FSUB_VL)
11898 NODE_NAME_CASE(FMUL_VL)
11899 NODE_NAME_CASE(FDIV_VL)
11900 NODE_NAME_CASE(FNEG_VL)
11901 NODE_NAME_CASE(FABS_VL)
11902 NODE_NAME_CASE(FSQRT_VL)
11903 NODE_NAME_CASE(VFMADD_VL)
11904 NODE_NAME_CASE(VFNMADD_VL)
11905 NODE_NAME_CASE(VFMSUB_VL)
11906 NODE_NAME_CASE(VFNMSUB_VL)
11907 NODE_NAME_CASE(FCOPYSIGN_VL)
11908 NODE_NAME_CASE(SMIN_VL)
11909 NODE_NAME_CASE(SMAX_VL)
11910 NODE_NAME_CASE(UMIN_VL)
11911 NODE_NAME_CASE(UMAX_VL)
11912 NODE_NAME_CASE(FMINNUM_VL)
11913 NODE_NAME_CASE(FMAXNUM_VL)
11914 NODE_NAME_CASE(MULHS_VL)
11915 NODE_NAME_CASE(MULHU_VL)
11916 NODE_NAME_CASE(FP_TO_SINT_VL)
11917 NODE_NAME_CASE(FP_TO_UINT_VL)
11918 NODE_NAME_CASE(SINT_TO_FP_VL)
11919 NODE_NAME_CASE(UINT_TO_FP_VL)
11920 NODE_NAME_CASE(FP_EXTEND_VL)
11921 NODE_NAME_CASE(FP_ROUND_VL)
11922 NODE_NAME_CASE(VWMUL_VL)
11923 NODE_NAME_CASE(VWMULU_VL)
11924 NODE_NAME_CASE(VWMULSU_VL)
11925 NODE_NAME_CASE(VWADD_VL)
11926 NODE_NAME_CASE(VWADDU_VL)
11927 NODE_NAME_CASE(VWSUB_VL)
11928 NODE_NAME_CASE(VWSUBU_VL)
11929 NODE_NAME_CASE(VWADD_W_VL)
11930 NODE_NAME_CASE(VWADDU_W_VL)
11931 NODE_NAME_CASE(VWSUB_W_VL)
11932 NODE_NAME_CASE(VWSUBU_W_VL)
11933 NODE_NAME_CASE(SETCC_VL)
11934 NODE_NAME_CASE(VSELECT_VL)
11935 NODE_NAME_CASE(VP_MERGE_VL)
11936 NODE_NAME_CASE(VMAND_VL)
11937 NODE_NAME_CASE(VMOR_VL)
11938 NODE_NAME_CASE(VMXOR_VL)
11939 NODE_NAME_CASE(VMCLR_VL)
11940 NODE_NAME_CASE(VMSET_VL)
11941 NODE_NAME_CASE(VRGATHER_VX_VL)
11942 NODE_NAME_CASE(VRGATHER_VV_VL)
11943 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
11944 NODE_NAME_CASE(VSEXT_VL)
11945 NODE_NAME_CASE(VZEXT_VL)
11946 NODE_NAME_CASE(VCPOP_VL)
11947 NODE_NAME_CASE(READ_CSR)
11948 NODE_NAME_CASE(WRITE_CSR)
11949 NODE_NAME_CASE(SWAP_CSR)
11953 #undef NODE_NAME_CASE
11956 /// getConstraintType - Given a constraint letter, return the type of
11957 /// constraint it is for this target.
11958 RISCVTargetLowering::ConstraintType
11959 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
11960 if (Constraint.size() == 1) {
11961 switch (Constraint[0]) {
11965 return C_RegisterClass;
11969 return C_Immediate;
11972 case 'S': // A symbolic address
11976 if (Constraint == "vr" || Constraint == "vm")
11977 return C_RegisterClass;
11979 return TargetLowering::getConstraintType(Constraint);
11982 std::pair<unsigned, const TargetRegisterClass *>
11983 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
11984 StringRef Constraint,
11986 // First, see if this is a constraint that directly corresponds to a
11987 // RISCV register class.
11988 if (Constraint.size() == 1) {
11989 switch (Constraint[0]) {
11991 // TODO: Support fixed vectors up to XLen for P extension?
11994 return std::make_pair(0U, &RISCV::GPRRegClass);
11996 if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
11997 return std::make_pair(0U, &RISCV::FPR16RegClass);
11998 if (Subtarget.hasStdExtF() && VT == MVT::f32)
11999 return std::make_pair(0U, &RISCV::FPR32RegClass);
12000 if (Subtarget.hasStdExtD() && VT == MVT::f64)
12001 return std::make_pair(0U, &RISCV::FPR64RegClass);
12006 } else if (Constraint == "vr") {
12007 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
12008 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
12009 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
12010 return std::make_pair(0U, RC);
12012 } else if (Constraint == "vm") {
12013 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
12014 return std::make_pair(0U, &RISCV::VMV0RegClass);
12017 // Clang will correctly decode the usage of register name aliases into their
12018 // official names. However, other frontends like `rustc` do not. This allows
12019 // users of these frontends to use the ABI names for registers in LLVM-style
12020 // register constraints.
12021 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
12022 .Case("{zero}", RISCV::X0)
12023 .Case("{ra}", RISCV::X1)
12024 .Case("{sp}", RISCV::X2)
12025 .Case("{gp}", RISCV::X3)
12026 .Case("{tp}", RISCV::X4)
12027 .Case("{t0}", RISCV::X5)
12028 .Case("{t1}", RISCV::X6)
12029 .Case("{t2}", RISCV::X7)
12030 .Cases("{s0}", "{fp}", RISCV::X8)
12031 .Case("{s1}", RISCV::X9)
12032 .Case("{a0}", RISCV::X10)
12033 .Case("{a1}", RISCV::X11)
12034 .Case("{a2}", RISCV::X12)
12035 .Case("{a3}", RISCV::X13)
12036 .Case("{a4}", RISCV::X14)
12037 .Case("{a5}", RISCV::X15)
12038 .Case("{a6}", RISCV::X16)
12039 .Case("{a7}", RISCV::X17)
12040 .Case("{s2}", RISCV::X18)
12041 .Case("{s3}", RISCV::X19)
12042 .Case("{s4}", RISCV::X20)
12043 .Case("{s5}", RISCV::X21)
12044 .Case("{s6}", RISCV::X22)
12045 .Case("{s7}", RISCV::X23)
12046 .Case("{s8}", RISCV::X24)
12047 .Case("{s9}", RISCV::X25)
12048 .Case("{s10}", RISCV::X26)
12049 .Case("{s11}", RISCV::X27)
12050 .Case("{t3}", RISCV::X28)
12051 .Case("{t4}", RISCV::X29)
12052 .Case("{t5}", RISCV::X30)
12053 .Case("{t6}", RISCV::X31)
12054 .Default(RISCV::NoRegister);
12055 if (XRegFromAlias != RISCV::NoRegister)
12056 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
12058 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
12059 // TableGen record rather than the AsmName to choose registers for InlineAsm
12060 // constraints, plus we want to match those names to the widest floating point
12061 // register type available, manually select floating point registers here.
12063 // The second case is the ABI name of the register, so that frontends can also
12064 // use the ABI names in register constraint lists.
12065 if (Subtarget.hasStdExtF()) {
12066 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
12067 .Cases("{f0}", "{ft0}", RISCV::F0_F)
12068 .Cases("{f1}", "{ft1}", RISCV::F1_F)
12069 .Cases("{f2}", "{ft2}", RISCV::F2_F)
12070 .Cases("{f3}", "{ft3}", RISCV::F3_F)
12071 .Cases("{f4}", "{ft4}", RISCV::F4_F)
12072 .Cases("{f5}", "{ft5}", RISCV::F5_F)
12073 .Cases("{f6}", "{ft6}", RISCV::F6_F)
12074 .Cases("{f7}", "{ft7}", RISCV::F7_F)
12075 .Cases("{f8}", "{fs0}", RISCV::F8_F)
12076 .Cases("{f9}", "{fs1}", RISCV::F9_F)
12077 .Cases("{f10}", "{fa0}", RISCV::F10_F)
12078 .Cases("{f11}", "{fa1}", RISCV::F11_F)
12079 .Cases("{f12}", "{fa2}", RISCV::F12_F)
12080 .Cases("{f13}", "{fa3}", RISCV::F13_F)
12081 .Cases("{f14}", "{fa4}", RISCV::F14_F)
12082 .Cases("{f15}", "{fa5}", RISCV::F15_F)
12083 .Cases("{f16}", "{fa6}", RISCV::F16_F)
12084 .Cases("{f17}", "{fa7}", RISCV::F17_F)
12085 .Cases("{f18}", "{fs2}", RISCV::F18_F)
12086 .Cases("{f19}", "{fs3}", RISCV::F19_F)
12087 .Cases("{f20}", "{fs4}", RISCV::F20_F)
12088 .Cases("{f21}", "{fs5}", RISCV::F21_F)
12089 .Cases("{f22}", "{fs6}", RISCV::F22_F)
12090 .Cases("{f23}", "{fs7}", RISCV::F23_F)
12091 .Cases("{f24}", "{fs8}", RISCV::F24_F)
12092 .Cases("{f25}", "{fs9}", RISCV::F25_F)
12093 .Cases("{f26}", "{fs10}", RISCV::F26_F)
12094 .Cases("{f27}", "{fs11}", RISCV::F27_F)
12095 .Cases("{f28}", "{ft8}", RISCV::F28_F)
12096 .Cases("{f29}", "{ft9}", RISCV::F29_F)
12097 .Cases("{f30}", "{ft10}", RISCV::F30_F)
12098 .Cases("{f31}", "{ft11}", RISCV::F31_F)
12099 .Default(RISCV::NoRegister);
12100 if (FReg != RISCV::NoRegister) {
12101 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
12102 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
12103 unsigned RegNo = FReg - RISCV::F0_F;
12104 unsigned DReg = RISCV::F0_D + RegNo;
12105 return std::make_pair(DReg, &RISCV::FPR64RegClass);
12107 if (VT == MVT::f32 || VT == MVT::Other)
12108 return std::make_pair(FReg, &RISCV::FPR32RegClass);
12109 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) {
12110 unsigned RegNo = FReg - RISCV::F0_F;
12111 unsigned HReg = RISCV::F0_H + RegNo;
12112 return std::make_pair(HReg, &RISCV::FPR16RegClass);
12117 if (Subtarget.hasVInstructions()) {
12118 Register VReg = StringSwitch<Register>(Constraint.lower())
12119 .Case("{v0}", RISCV::V0)
12120 .Case("{v1}", RISCV::V1)
12121 .Case("{v2}", RISCV::V2)
12122 .Case("{v3}", RISCV::V3)
12123 .Case("{v4}", RISCV::V4)
12124 .Case("{v5}", RISCV::V5)
12125 .Case("{v6}", RISCV::V6)
12126 .Case("{v7}", RISCV::V7)
12127 .Case("{v8}", RISCV::V8)
12128 .Case("{v9}", RISCV::V9)
12129 .Case("{v10}", RISCV::V10)
12130 .Case("{v11}", RISCV::V11)
12131 .Case("{v12}", RISCV::V12)
12132 .Case("{v13}", RISCV::V13)
12133 .Case("{v14}", RISCV::V14)
12134 .Case("{v15}", RISCV::V15)
12135 .Case("{v16}", RISCV::V16)
12136 .Case("{v17}", RISCV::V17)
12137 .Case("{v18}", RISCV::V18)
12138 .Case("{v19}", RISCV::V19)
12139 .Case("{v20}", RISCV::V20)
12140 .Case("{v21}", RISCV::V21)
12141 .Case("{v22}", RISCV::V22)
12142 .Case("{v23}", RISCV::V23)
12143 .Case("{v24}", RISCV::V24)
12144 .Case("{v25}", RISCV::V25)
12145 .Case("{v26}", RISCV::V26)
12146 .Case("{v27}", RISCV::V27)
12147 .Case("{v28}", RISCV::V28)
12148 .Case("{v29}", RISCV::V29)
12149 .Case("{v30}", RISCV::V30)
12150 .Case("{v31}", RISCV::V31)
12151 .Default(RISCV::NoRegister);
12152 if (VReg != RISCV::NoRegister) {
12153 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
12154 return std::make_pair(VReg, &RISCV::VMRegClass);
12155 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
12156 return std::make_pair(VReg, &RISCV::VRRegClass);
12157 for (const auto *RC :
12158 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
12159 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
12160 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
12161 return std::make_pair(VReg, RC);
12167 std::pair<Register, const TargetRegisterClass *> Res =
12168 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
12170 // If we picked one of the Zfinx register classes, remap it to the GPR class.
12171 // FIXME: When Zfinx is supported in CodeGen this will need to take the
12172 // Subtarget into account.
12173 if (Res.second == &RISCV::GPRF16RegClass ||
12174 Res.second == &RISCV::GPRF32RegClass ||
12175 Res.second == &RISCV::GPRF64RegClass)
12176 return std::make_pair(Res.first, &RISCV::GPRRegClass);
12182 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
12183 // Currently only support length 1 constraints.
12184 if (ConstraintCode.size() == 1) {
12185 switch (ConstraintCode[0]) {
12187 return InlineAsm::Constraint_A;
12193 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
12196 void RISCVTargetLowering::LowerAsmOperandForConstraint(
12197 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
12198 SelectionDAG &DAG) const {
12199 // Currently only support length 1 constraints.
12200 if (Constraint.length() == 1) {
12201 switch (Constraint[0]) {
12203 // Validate & create a 12-bit signed immediate operand.
12204 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
12205 uint64_t CVal = C->getSExtValue();
12206 if (isInt<12>(CVal))
12208 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
12212 // Validate & create an integer zero operand.
12213 if (auto *C = dyn_cast<ConstantSDNode>(Op))
12214 if (C->getZExtValue() == 0)
12216 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
12219 // Validate & create a 5-bit unsigned immediate operand.
12220 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
12221 uint64_t CVal = C->getZExtValue();
12222 if (isUInt<5>(CVal))
12224 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
12228 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
12229 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
12230 GA->getValueType(0)));
12231 } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
12232 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
12233 BA->getValueType(0)));
12240 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
12243 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
12245 AtomicOrdering Ord) const {
12246 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
12247 return Builder.CreateFence(Ord);
12248 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
12249 return Builder.CreateFence(AtomicOrdering::Release);
12253 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
12255 AtomicOrdering Ord) const {
12256 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
12257 return Builder.CreateFence(AtomicOrdering::Acquire);
12261 TargetLowering::AtomicExpansionKind
12262 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
12263 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
12264 // point operations can't be used in an lr/sc sequence without breaking the
12265 // forward-progress guarantee.
12266 if (AI->isFloatingPointOperation())
12267 return AtomicExpansionKind::CmpXChg;
12269 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
12270 if (Size == 8 || Size == 16)
12271 return AtomicExpansionKind::MaskedIntrinsic;
12272 return AtomicExpansionKind::None;
12275 static Intrinsic::ID
12276 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
12280 llvm_unreachable("Unexpected AtomicRMW BinOp");
12281 case AtomicRMWInst::Xchg:
12282 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
12283 case AtomicRMWInst::Add:
12284 return Intrinsic::riscv_masked_atomicrmw_add_i32;
12285 case AtomicRMWInst::Sub:
12286 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
12287 case AtomicRMWInst::Nand:
12288 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
12289 case AtomicRMWInst::Max:
12290 return Intrinsic::riscv_masked_atomicrmw_max_i32;
12291 case AtomicRMWInst::Min:
12292 return Intrinsic::riscv_masked_atomicrmw_min_i32;
12293 case AtomicRMWInst::UMax:
12294 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
12295 case AtomicRMWInst::UMin:
12296 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
12303 llvm_unreachable("Unexpected AtomicRMW BinOp");
12304 case AtomicRMWInst::Xchg:
12305 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
12306 case AtomicRMWInst::Add:
12307 return Intrinsic::riscv_masked_atomicrmw_add_i64;
12308 case AtomicRMWInst::Sub:
12309 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
12310 case AtomicRMWInst::Nand:
12311 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
12312 case AtomicRMWInst::Max:
12313 return Intrinsic::riscv_masked_atomicrmw_max_i64;
12314 case AtomicRMWInst::Min:
12315 return Intrinsic::riscv_masked_atomicrmw_min_i64;
12316 case AtomicRMWInst::UMax:
12317 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
12318 case AtomicRMWInst::UMin:
12319 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
12323 llvm_unreachable("Unexpected XLen\n");
12326 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
12327 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
12328 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
12329 unsigned XLen = Subtarget.getXLen();
12331 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
12332 Type *Tys[] = {AlignedAddr->getType()};
12333 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
12335 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
12338 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
12339 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
12340 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
12345 // Must pass the shift amount needed to sign extend the loaded value prior
12346 // to performing a signed comparison for min/max. ShiftAmt is the number of
12347 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
12348 // is the number of bits to left+right shift the value in order to
12350 if (AI->getOperation() == AtomicRMWInst::Min ||
12351 AI->getOperation() == AtomicRMWInst::Max) {
12352 const DataLayout &DL = AI->getModule()->getDataLayout();
12353 unsigned ValWidth =
12354 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
12356 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
12357 Result = Builder.CreateCall(LrwOpScwLoop,
12358 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
12361 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
12365 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12369 TargetLowering::AtomicExpansionKind
12370 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
12371 AtomicCmpXchgInst *CI) const {
12372 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
12373 if (Size == 8 || Size == 16)
12374 return AtomicExpansionKind::MaskedIntrinsic;
12375 return AtomicExpansionKind::None;
12378 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
12379 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
12380 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
12381 unsigned XLen = Subtarget.getXLen();
12382 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
12383 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
12385 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
12386 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
12387 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
12388 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
12390 Type *Tys[] = {AlignedAddr->getType()};
12391 Function *MaskedCmpXchg =
12392 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
12393 Value *Result = Builder.CreateCall(
12394 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
12396 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12400 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
12401 EVT DataVT) const {
12405 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
12407 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
12410 switch (FPVT.getSimpleVT().SimpleTy) {
12412 return Subtarget.hasStdExtZfh();
12414 return Subtarget.hasStdExtF();
12416 return Subtarget.hasStdExtD();
12422 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
12423 // If we are using the small code model, we can reduce size of jump table
12424 // entry to 4 bytes.
12425 if (Subtarget.is64Bit() && !isPositionIndependent() &&
12426 getTargetMachine().getCodeModel() == CodeModel::Small) {
12427 return MachineJumpTableInfo::EK_Custom32;
12429 return TargetLowering::getJumpTableEncoding();
12432 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
12433 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
12434 unsigned uid, MCContext &Ctx) const {
12435 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
12436 getTargetMachine().getCodeModel() == CodeModel::Small);
12437 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
12440 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
12441 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
12442 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
12443 // a power of two as well.
12444 // FIXME: This doesn't work for zve32, but that's already broken
12445 // elsewhere for the same reason.
12446 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
12447 static_assert(RISCV::RVVBitsPerBlock == 64,
12448 "RVVBitsPerBlock changed, audit needed");
12452 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
12454 VT = VT.getScalarType();
12456 if (!VT.isSimple())
12459 switch (VT.getSimpleVT().SimpleTy) {
12461 return Subtarget.hasStdExtZfh();
12463 return Subtarget.hasStdExtF();
12465 return Subtarget.hasStdExtD();
12473 Register RISCVTargetLowering::getExceptionPointerRegister(
12474 const Constant *PersonalityFn) const {
12478 Register RISCVTargetLowering::getExceptionSelectorRegister(
12479 const Constant *PersonalityFn) const {
12483 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
12484 // Return false to suppress the unnecessary extensions if the LibCall
12485 // arguments or return value is f32 type for LP64 ABI.
12486 RISCVABI::ABI ABI = Subtarget.getTargetABI();
12487 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
12493 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
12494 if (Subtarget.is64Bit() && Type == MVT::i32)
12500 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
12502 // Check integral scalar types.
12503 const bool HasExtMOrZmmul =
12504 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
12505 if (VT.isScalarInteger()) {
12506 // Omit the optimization if the sub target has the M extension and the data
12507 // size exceeds XLen.
12508 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
12510 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
12511 // Break the MUL to a SLLI and an ADD/SUB.
12512 const APInt &Imm = ConstNode->getAPIntValue();
12513 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
12514 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
12516 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
12517 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
12518 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
12519 (Imm - 8).isPowerOf2()))
12521 // Omit the following optimization if the sub target has the M extension
12522 // and the data size >= XLen.
12523 if (HasExtMOrZmmul && VT.getSizeInBits() >= Subtarget.getXLen())
12525 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
12526 // a pair of LUI/ADDI.
12527 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
12528 APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
12529 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
12530 (1 - ImmS).isPowerOf2())
12539 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
12540 SDValue ConstNode) const {
12541 // Let the DAGCombiner decide for vectors.
12542 EVT VT = AddNode.getValueType();
12546 // Let the DAGCombiner decide for larger types.
12547 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
12550 // It is worse if c1 is simm12 while c1*c2 is not.
12551 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
12552 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
12553 const APInt &C1 = C1Node->getAPIntValue();
12554 const APInt &C2 = C2Node->getAPIntValue();
12555 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
12558 // Default to true and let the DAGCombiner decide.
12562 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
12563 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
12564 bool *Fast) const {
12565 if (!VT.isVector()) {
12568 return Subtarget.enableUnalignedScalarMem();
12571 // All vector implementations must support element alignment
12572 EVT ElemVT = VT.getVectorElementType();
12573 if (Alignment >= ElemVT.getStoreSize()) {
12582 bool RISCVTargetLowering::splitValueIntoRegisterParts(
12583 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
12584 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
12585 bool IsABIRegCopy = CC.has_value();
12586 EVT ValueVT = Val.getValueType();
12587 if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
12588 // Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
12589 // and cast to f32.
12590 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
12591 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
12592 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
12593 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
12594 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
12599 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
12600 LLVMContext &Context = *DAG.getContext();
12601 EVT ValueEltVT = ValueVT.getVectorElementType();
12602 EVT PartEltVT = PartVT.getVectorElementType();
12603 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
12604 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
12605 if (PartVTBitSize % ValueVTBitSize == 0) {
12606 assert(PartVTBitSize >= ValueVTBitSize);
12607 // If the element types are different, bitcast to the same element type of
12609 // Give an example here, we want copy a <vscale x 1 x i8> value to
12610 // <vscale x 4 x i16>.
12611 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
12612 // subvector, then we can bitcast to <vscale x 4 x i16>.
12613 if (ValueEltVT != PartEltVT) {
12614 if (PartVTBitSize > ValueVTBitSize) {
12615 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
12616 assert(Count != 0 && "The number of element should not be zero.");
12617 EVT SameEltTypeVT =
12618 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
12619 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
12620 DAG.getUNDEF(SameEltTypeVT), Val,
12621 DAG.getVectorIdxConstant(0, DL));
12623 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
12626 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
12627 Val, DAG.getVectorIdxConstant(0, DL));
12636 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
12637 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
12638 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
12639 bool IsABIRegCopy = CC.has_value();
12640 if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
12641 SDValue Val = Parts[0];
12643 // Cast the f32 to i32, truncate to i16, and cast back to f16.
12644 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
12645 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
12646 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
12650 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
12651 LLVMContext &Context = *DAG.getContext();
12652 SDValue Val = Parts[0];
12653 EVT ValueEltVT = ValueVT.getVectorElementType();
12654 EVT PartEltVT = PartVT.getVectorElementType();
12655 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
12656 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
12657 if (PartVTBitSize % ValueVTBitSize == 0) {
12658 assert(PartVTBitSize >= ValueVTBitSize);
12659 EVT SameEltTypeVT = ValueVT;
12660 // If the element types are different, convert it to the same element type
12662 // Give an example here, we want copy a <vscale x 1 x i8> value from
12663 // <vscale x 4 x i16>.
12664 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
12665 // then we can extract <vscale x 1 x i8>.
12666 if (ValueEltVT != PartEltVT) {
12667 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
12668 assert(Count != 0 && "The number of element should not be zero.");
12670 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
12671 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
12673 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
12674 DAG.getVectorIdxConstant(0, DL));
12682 RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
12684 SmallVectorImpl<SDNode *> &Created) const {
12685 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
12686 if (isIntDivCheap(N->getValueType(0), Attr))
12687 return SDValue(N, 0); // Lower SDIV as SDIV
12689 assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
12690 "Unexpected divisor!");
12692 // Conditional move is needed, so do the transformation iff Zbt is enabled.
12693 if (!Subtarget.hasStdExtZbt())
12696 // When |Divisor| >= 2 ^ 12, it isn't profitable to do such transformation.
12697 // Besides, more critical path instructions will be generated when dividing
12698 // by 2. So we keep using the original DAGs for these cases.
12699 unsigned Lg2 = Divisor.countTrailingZeros();
12700 if (Lg2 == 1 || Lg2 >= 12)
12703 // fold (sdiv X, pow2)
12704 EVT VT = N->getValueType(0);
12705 if (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64))
12709 SDValue N0 = N->getOperand(0);
12710 SDValue Zero = DAG.getConstant(0, DL, VT);
12711 SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
12713 // Add (N0 < 0) ? Pow2 - 1 : 0;
12714 SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT);
12715 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
12716 SDValue Sel = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
12718 Created.push_back(Cmp.getNode());
12719 Created.push_back(Add.getNode());
12720 Created.push_back(Sel.getNode());
12724 DAG.getNode(ISD::SRA, DL, VT, Sel, DAG.getConstant(Lg2, DL, VT));
12726 // If we're dividing by a positive value, we're done. Otherwise, we must
12727 // negate the result.
12728 if (Divisor.isNonNegative())
12731 Created.push_back(SRA.getNode());
12732 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
12735 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
12736 // When aggressively optimizing for code size, we prefer to use a div
12737 // instruction, as it is usually smaller than the alternative sequence.
12738 // TODO: Add vector division?
12739 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
12740 return OptSize && !VT.isVector();
12743 #define GET_REGISTER_MATCHER
12744 #include "RISCVGenAsmMatcher.inc"
12747 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
12748 const MachineFunction &MF) const {
12749 Register Reg = MatchRegisterAltName(RegName);
12750 if (Reg == RISCV::NoRegister)
12751 Reg = MatchRegisterName(RegName);
12752 if (Reg == RISCV::NoRegister)
12753 report_fatal_error(
12754 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
12755 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
12756 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
12757 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
12758 StringRef(RegName) + "\"."));
12763 namespace RISCVVIntrinsicsTable {
12765 #define GET_RISCVVIntrinsicsTable_IMPL
12766 #include "RISCVGenSearchableTables.inc"
12768 } // namespace RISCVVIntrinsicsTable
12770 } // namespace llvm