Info.size = MemoryLocation::UnknownSize;
Info.flags |= MachineMemOperand::MOStore;
return true;
+ case Intrinsic::riscv_seg2_load:
+ case Intrinsic::riscv_seg3_load:
+ case Intrinsic::riscv_seg4_load:
+ case Intrinsic::riscv_seg5_load:
+ case Intrinsic::riscv_seg6_load:
+ case Intrinsic::riscv_seg7_load:
+ case Intrinsic::riscv_seg8_load:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.memVT =
+ getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
+ Info.align =
+ Align(DL.getTypeSizeInBits(
+ I.getType()->getStructElementType(0)->getScalarType()) /
+ 8);
+ Info.size = MemoryLocation::UnknownSize;
+ Info.flags |= MachineMemOperand::MOLoad;
+ return true;
}
}
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
return DAG.getMergeValues({Result, Chain}, DL);
}
+ case Intrinsic::riscv_seg2_load:
+ case Intrinsic::riscv_seg3_load:
+ case Intrinsic::riscv_seg4_load:
+ case Intrinsic::riscv_seg5_load:
+ case Intrinsic::riscv_seg6_load:
+ case Intrinsic::riscv_seg7_load:
+ case Intrinsic::riscv_seg8_load: {
+ SDLoc DL(Op);
+ static const Intrinsic::ID VlsegInts[7] = {
+ Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
+ Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
+ Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
+ Intrinsic::riscv_vlseg8};
+ unsigned NF = Op->getNumValues() - 1;
+ assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT VT = Op->getSimpleValueType(0);
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+ SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
+ auto *Load = cast<MemIntrinsicSDNode>(Op);
+ SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
+ ContainerVTs.push_back(MVT::Other);
+ SDVTList VTs = DAG.getVTList(ContainerVTs);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
+ {Load->getChain(), IntID, Op.getOperand(2), VL},
+ Load->getMemoryVT(), Load->getMemOperand());
+ SmallVector<SDValue, 9> Results;
+ for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
+ Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
+ DAG, Subtarget));
+ Results.push_back(Result.getValue(NF));
+ return DAG.getMergeValues(Results, DL);
+ }
}
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv64 -mattr=+zve64x -riscv-v-vector-bits-min=128 < %s \
+; RUN: | FileCheck %s
+
+define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
+; CHECK-LABEL: load_factor2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <16 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8> } %2, 1
+ ret <8 x i8> %4
+}
+
+define <8 x i8> @load_factor3(<24 x i8>* %ptr) {
+; CHECK-LABEL: load_factor3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg3e8.v v6, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <24 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ ret <8 x i8> %5
+}
+
+define <8 x i8> @load_factor4(<32 x i8>* %ptr) {
+; CHECK-LABEL: load_factor4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg4e8.v v5, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v5_v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <32 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
+ ret <8 x i8> %6
+}
+
+define <8 x i8> @load_factor5(<40 x i8>* %ptr) {
+; CHECK-LABEL: load_factor5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg5e8.v v4, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v4_v5_v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <40 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
+ %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
+ ret <8 x i8> %7
+}
+
+define <8 x i8> @load_factor6(<48 x i8>* %ptr) {
+; CHECK-LABEL: load_factor6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg6e8.v v3, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v3_v4_v5_v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <48 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
+ %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
+ %8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5
+ ret <8 x i8> %8
+}
+
+define <8 x i8> @load_factor7(<56 x i8>* %ptr) {
+; CHECK-LABEL: load_factor7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg7e8.v v2, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v2_v3_v4_v5_v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <56 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
+ %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
+ %8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5
+ %9 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 6
+ ret <8 x i8> %9
+}
+
+define <8 x i8> @load_factor8(<64 x i8>* %ptr) {
+; CHECK-LABEL: load_factor8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg8e8.v v1, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v1_v2_v3_v4_v5_v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <64 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
+ %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
+ %8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5
+ %9 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 6
+ %10 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 7
+ ret <8 x i8> %10
+}
+declare { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0i8.i64(i8*, i64)