ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
}
+// Emit, if possible, a specialized version of the given Libcall. Typically this
+// means selecting the appropriately aligned version, but we also convert memset
+// of 0 into memclr.
+SDValue ARMSelectionDAGInfo::
+EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ RTLIB::Libcall LC) const {
+ const ARMSubtarget &Subtarget =
+ DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
+ const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
+
+ // Only use a specialized AEABI function if the default version of this
+ // Libcall is an AEABI function.
+ if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
+ return SDValue();
+
+ // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
+ // able to translate memset to memclr and use the value to index the function
+ // name array.
+ enum {
+ AEABI_MEMCPY = 0,
+ AEABI_MEMMOVE,
+ AEABI_MEMSET,
+ AEABI_MEMCLR
+ } AEABILibcall;
+ switch (LC) {
+ case RTLIB::MEMCPY:
+ AEABILibcall = AEABI_MEMCPY;
+ break;
+ case RTLIB::MEMMOVE:
+ AEABILibcall = AEABI_MEMMOVE;
+ break;
+ case RTLIB::MEMSET:
+ AEABILibcall = AEABI_MEMSET;
+ if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
+ if (ConstantSrc->getZExtValue() == 0)
+ AEABILibcall = AEABI_MEMCLR;
+ break;
+ default:
+ return SDValue();
+ }
+
+ // Choose the most-aligned libcall variant that we can
+ enum {
+ ALIGN1 = 0,
+ ALIGN4,
+ ALIGN8
+ } AlignVariant;
+ if ((Align & 7) == 0)
+ AlignVariant = ALIGN8;
+ else if ((Align & 3) == 0)
+ AlignVariant = ALIGN4;
+ else
+ AlignVariant = ALIGN1;
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+ if (AEABILibcall == AEABI_MEMCLR) {
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ } else if (AEABILibcall == AEABI_MEMSET) {
+ // Adjust parameters for memset, EABI uses format (ptr, size, value),
+ // GNU library uses (ptr, value, size)
+ // See RTABI section 4.3.4
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType().bitsGT(MVT::i32))
+ Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ else if (Src.getValueType().bitsLT(MVT::i32))
+ Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+
+ Entry.Node = Src;
+ Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+ Entry.isSExt = false;
+ Args.push_back(Entry);
+ } else {
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ }
+
+ char const *FunctionNames[4][3] = {
+ { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
+ { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
+ { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
+ { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
+ };
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(TLI->getLibcallCallingConv(LC),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
+ TLI->getPointerTy()), std::move(Args), 0)
+ .setDiscardResult();
+ std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+
+ return CallResult.second;
+}
+
SDValue
ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (!ConstantSize)
- return SDValue();
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+ RTLIB::MEMCPY);
uint64_t SizeVal = ConstantSize->getZExtValue();
if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
- return SDValue();
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+ RTLIB::MEMCPY);
unsigned BytesLeft = SizeVal & 3;
unsigned NumMemOps = SizeVal >> 2;
makeArrayRef(TFOps, i));
}
-// Adjust parameters for memset, EABI uses format (ptr, size, value),
-// GNU library uses (ptr, value, size)
-// See RTABI section 4.3.4
+
+SDValue ARMSelectionDAGInfo::
+EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+ RTLIB::MEMMOVE);
+}
+
+
SDValue ARMSelectionDAGInfo::
EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
- const ARMSubtarget &Subtarget =
- DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
- // Use default for non-AAPCS (or MachO) subtargets
- if (!Subtarget.isAAPCS_ABI() || Subtarget.isTargetMachO() ||
- Subtarget.isTargetWindows())
- return SDValue();
-
- const ARMTargetLowering &TLI = *Subtarget.getTargetLowering();
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
-
- // First argument: data pointer
- Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
- Entry.Node = Dst;
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
-
- // Second argument: buffer size
- Entry.Node = Size;
- Entry.Ty = IntPtrTy;
- Entry.isSExt = false;
- Args.push_back(Entry);
-
- // Extend or truncate the argument to be an i32 value for the call.
- if (Src.getValueType().bitsGT(MVT::i32))
- Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
- else
- Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
-
- // Third argument: value to fill
- Entry.Node = Src;
- Entry.Ty = Type::getInt32Ty(*DAG.getContext());
- Entry.isSExt = true;
- Args.push_back(Entry);
-
- // Emit __eabi_memset call
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
- TLI.getPointerTy()), std::move(Args), 0)
- .setDiscardResult();
-
- std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
- return CallResult.second;
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
+ RTLIB::MEMSET);
}
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
; EABI memset swaps arguments
+ ; CHECK-IOS: mov r1, #1
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: movs r1, #1
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: mov r2, #1
+ ; CHECK-EABI: __aeabi_memset
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false)
+
+ ; EABI uses memclr if value set to 0
; CHECK-IOS: mov r1, #0
; CHECK-IOS: memset
; CHECK-DARWIN: movs r1, #0
; CHECK-DARWIN: memset
- ; CHECK-EABI: mov r2, #0
- ; CHECK-EABI: __aeabi_memset
+ ; CHECK-EABI: __aeabi_memclr
call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false)
+
+ ; EABI uses aligned function variants if possible
+
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: __aeabi_memmove4
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false)
+
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy4
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false)
+
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: __aeabi_memset4
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 4, i1 false)
+
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: __aeabi_memclr4
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 4, i1 false)
+
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: __aeabi_memmove8
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
+
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy8
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
+
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: __aeabi_memset8
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 8, i1 false)
+
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: __aeabi_memclr8
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 8, i1 false)
+
unreachable
}
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK-IOS: mov r0, sp
- ; CHECK-IOS: mov r1, #0
+ ; CHECK-IOS: mov r1, #1
; CHECK-IOS: memset
- ; CHECK-DARINW: add r0, sp, #4
- ; CHECK-DARWIN: movs r1, #0
+ ; CHECK-DARWIN: add r0, sp, #4
+ ; CHECK-DARWIN: movs r1, #1
; CHECK-DARWIN: memset
; CHECK-EABI: add r0, sp, #4
- ; CHECK-EABI: mov r2, #0
+ ; CHECK-EABI: mov r2, #1
; CHECK-EABI: __aeabi_memset
%arr2 = alloca [9 x i8], align 1
%2 = bitcast [9 x i8]* %arr2 to i8*
- call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
unreachable
}
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r0, sp, #3}}
- ; CHECK-IOS: mov r1, #0
+ ; CHECK-IOS: mov r1, #1
; CHECK-IOS: memset
- ; CHECK-DARWIN: movs r1, #0
+ ; CHECK-DARWIN: movs r1, #1
; CHECK-DARWIN: memset
- ; CHECK-EABI: mov r2, #0
+ ; CHECK-EABI: mov r2, #1
; CHECK-EABI: __aeabi_memset
%arr2 = alloca [7 x i8], align 1
%2 = bitcast [7 x i8]* %arr2 to i8*
- call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
unreachable
}
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(1|5)}}
- ; CHECK-IOS: mov r1, #0
+ ; CHECK-IOS: mov r1, #1
; CHECK-IOS: memset
- ; CHECK-DARWIN: movs r1, #0
+ ; CHECK-DARWIN: movs r1, #1
; CHECK-DARWIN: memset
- ; CHECK-EABI: mov r2, #0
+ ; CHECK-EABI: mov r2, #1
; CHECK-EABI: __aeabi_memset
%arr2 = alloca [9 x i8], align 1
%2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4
- call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
unreachable
}
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(1|5)}}
- ; CHECK-IOS: mov r1, #0
+ ; CHECK-IOS: mov r1, #1
; CHECK-IOS: memset
- ; CHECK-DARWIN: movs r1, #0
+ ; CHECK-DARWIN: movs r1, #1
; CHECK-DARWIN: memset
- ; CHECK-EABI: mov r2, #0
+ ; CHECK-EABI: mov r2, #1
; CHECK-EABI: __aeabi_memset
%arr2 = alloca [13 x i8], align 1
%2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1
- call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
unreachable
}
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(1|5)}}
- ; CHECK-IOS: mov r1, #0
+ ; CHECK-IOS: mov r1, #1
; CHECK-IOS: memset
- ; CHECK-DARWIN: movs r1, #0
+ ; CHECK-DARWIN: movs r1, #1
; CHECK-DARWIN: memset
- ; CHECK-EABI: mov r2, #0
+ ; CHECK-EABI: mov r2, #1
; CHECK-EABI: __aeabi_memset
%arr2 = alloca [13 x i8], align 1
%2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i
- call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
unreachable
}
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(1|5)}}
- ; CHECK-IOS: mov r1, #0
+ ; CHECK-IOS: mov r1, #1
; CHECK-IOS: memset
- ; CHECK-DARWIN: movs r1, #0
+ ; CHECK-DARWIN: movs r1, #1
; CHECK-DARWIN: memset
- ; CHECK-EABI: mov r2, #0
+ ; CHECK-EABI: mov r2, #1
; CHECK-EABI: __aeabi_memset
%arr2 = alloca [13 x i8], align 1
%2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4
- call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
unreachable
}
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(1|5)}}
- ; CHECK-IOS: mov r1, #0
+ ; CHECK-IOS: mov r1, #1
; CHECK-IOS: memset
- ; CHECK-DARWIN: movs r1, #0
+ ; CHECK-DARWIN: movs r1, #1
; CHECK-DARWIN: memset
- ; CHECK-EABI: mov r2, #0
+ ; CHECK-EABI: mov r2, #1
; CHECK-EABI: __aeabi_memset
%arr2 = alloca [13 x i8], align 1
%2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16
- call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
unreachable
}