/// the object whose address is being passed. If so then MinSize is set to the
/// minimum size the object must be to be aligned and PrefAlign is set to the
/// preferred alignment.
- virtual bool
- shouldUpdatePointerArgAlignment(const CallInst *CI, unsigned &MinSize,
- Align &PrefAlign,
- const TargetTransformInfo &TTI) const;
+ virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
+ Align & /*PrefAlign*/) const {
+ return false;
+ }
//===--------------------------------------------------------------------===//
/// \name Helpers for TargetTransformInfo implementations
}
// Align the pointer arguments to this call if the target thinks it's a good
- // idea (generally only useful for memcpy/memmove/memset).
+ // idea
unsigned MinSize;
Align PrefAlign;
- if (TLI->shouldUpdatePointerArgAlignment(CI, MinSize, PrefAlign, *TTI)) {
+ if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
for (auto &Arg : CI->args()) {
// We want to align both objects whose address is used directly and
// objects whose address is used in casts and GEPs, though it only makes
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
}
-bool TargetLoweringBase::shouldUpdatePointerArgAlignment(
- const CallInst *CI, unsigned &MinSize, Align &PrefAlign,
- const TargetTransformInfo &TTI) const {
- // For now, we only adjust alignment for memcpy/memmove/memset calls.
- auto *MemCI = dyn_cast<MemIntrinsic>(CI);
- if (!MemCI)
- return false;
- auto AddrSpace = MemCI->getDestAddressSpace();
- // We assume that scalar register sized values can be loaded/stored
- // efficiently. If this is not the case for a given target it should override
- // this function.
- auto PrefSizeBits =
- TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedValue();
- PrefAlign = Align(PrefSizeBits / 8);
- // When building with -Oz, we only increase the alignment if the object is
- // at least 8 bytes in size to avoid increased stack/global padding.
- // Otherwise, we require at least PrefAlign bytes to be copied.
- MinSize = PrefAlign.value();
- if (CI->getFunction()->hasMinSize())
- MinSize = std::max(MinSize, 8u);
-
- // XXX: we could determine the MachineMemOperand flags instead of assuming
- // load+store (but it probably makes no difference for supported targets).
- unsigned FastUnalignedAccess = 0;
- if (allowsMisalignedMemoryAccesses(
- LLT::scalar(PrefSizeBits), AddrSpace, Align(1),
- MachineMemOperand::MOStore | MachineMemOperand::MOLoad,
- &FastUnalignedAccess) &&
- FastUnalignedAccess) {
- // If unaligned loads&stores are fast, there is no need to adjust
- // alignment.
- return false;
- }
- return true; // unaligned accesses are not possible or slow.
-}
-
void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
// If the command-line option was specified, ignore this request.
if (!JumpIsExpensiveOverride.getNumOccurrences())
// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
// source/dest is aligned and the copy size is large enough. We therefore want
// to align such objects passed to memory intrinsics.
-bool ARMTargetLowering::shouldUpdatePointerArgAlignment(
- const CallInst *CI, unsigned &MinSize, Align &PrefAlign,
- const TargetTransformInfo &TTI) const {
+bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ Align &PrefAlign) const {
if (!isa<MemIntrinsic>(CI))
return false;
MinSize = 8;
const TargetRegisterClass *
getRegClassFor(MVT VT, bool isDivergent = false) const override;
- bool shouldUpdatePointerArgAlignment(
- const CallInst *CI, unsigned &MinSize, Align &PrefAlign,
- const TargetTransformInfo &TTI) const override;
+ bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ Align &PrefAlign) const override;
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
}
define void @t6() nounwind {
-; RV32-LABEL: t6:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: lui a0, %hi(spool.splbuf)
-; RV32-NEXT: li a1, 88
-; RV32-NEXT: sh a1, %lo(spool.splbuf+12)(a0)
-; RV32-NEXT: lui a1, 361862
-; RV32-NEXT: addi a1, a1, -1960
-; RV32-NEXT: sw a1, %lo(spool.splbuf+8)(a0)
-; RV32-NEXT: lui a1, 362199
-; RV32-NEXT: addi a1, a1, 559
-; RV32-NEXT: sw a1, %lo(spool.splbuf+4)(a0)
-; RV32-NEXT: lui a1, 460503
-; RV32-NEXT: addi a1, a1, 1071
-; RV32-NEXT: sw a1, %lo(spool.splbuf)(a0)
-; RV32-NEXT: ret
+; RV32ALIGNED-LABEL: t6:
+; RV32ALIGNED: # %bb.0: # %entry
+; RV32ALIGNED-NEXT: addi sp, sp, -16
+; RV32ALIGNED-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ALIGNED-NEXT: lui a0, %hi(spool.splbuf)
+; RV32ALIGNED-NEXT: addi a0, a0, %lo(spool.splbuf)
+; RV32ALIGNED-NEXT: lui a1, %hi(.L.str6)
+; RV32ALIGNED-NEXT: addi a1, a1, %lo(.L.str6)
+; RV32ALIGNED-NEXT: li a2, 14
+; RV32ALIGNED-NEXT: call memcpy@plt
+; RV32ALIGNED-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ALIGNED-NEXT: addi sp, sp, 16
+; RV32ALIGNED-NEXT: ret
;
; RV64ALIGNED-LABEL: t6:
; RV64ALIGNED: # %bb.0: # %entry
+; RV64ALIGNED-NEXT: addi sp, sp, -16
+; RV64ALIGNED-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ALIGNED-NEXT: lui a0, %hi(spool.splbuf)
-; RV64ALIGNED-NEXT: li a1, 88
-; RV64ALIGNED-NEXT: sh a1, %lo(spool.splbuf+12)(a0)
-; RV64ALIGNED-NEXT: lui a1, %hi(.LCPI6_0)
-; RV64ALIGNED-NEXT: ld a1, %lo(.LCPI6_0)(a1)
-; RV64ALIGNED-NEXT: lui a2, 361862
-; RV64ALIGNED-NEXT: addiw a2, a2, -1960
-; RV64ALIGNED-NEXT: sw a2, %lo(spool.splbuf+8)(a0)
-; RV64ALIGNED-NEXT: sd a1, %lo(spool.splbuf)(a0)
+; RV64ALIGNED-NEXT: addi a0, a0, %lo(spool.splbuf)
+; RV64ALIGNED-NEXT: lui a1, %hi(.L.str6)
+; RV64ALIGNED-NEXT: addi a1, a1, %lo(.L.str6)
+; RV64ALIGNED-NEXT: li a2, 14
+; RV64ALIGNED-NEXT: call memcpy@plt
+; RV64ALIGNED-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ALIGNED-NEXT: addi sp, sp, 16
; RV64ALIGNED-NEXT: ret
;
+; RV32UNALIGNED-LABEL: t6:
+; RV32UNALIGNED: # %bb.0: # %entry
+; RV32UNALIGNED-NEXT: lui a0, %hi(spool.splbuf)
+; RV32UNALIGNED-NEXT: li a1, 88
+; RV32UNALIGNED-NEXT: sh a1, %lo(spool.splbuf+12)(a0)
+; RV32UNALIGNED-NEXT: lui a1, 361862
+; RV32UNALIGNED-NEXT: addi a1, a1, -1960
+; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf+8)(a0)
+; RV32UNALIGNED-NEXT: lui a1, 362199
+; RV32UNALIGNED-NEXT: addi a1, a1, 559
+; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf+4)(a0)
+; RV32UNALIGNED-NEXT: lui a1, 460503
+; RV32UNALIGNED-NEXT: addi a1, a1, 1071
+; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf)(a0)
+; RV32UNALIGNED-NEXT: ret
+;
; RV64UNALIGNED-LABEL: t6:
; RV64UNALIGNED: # %bb.0: # %entry
; RV64UNALIGNED-NEXT: lui a0, %hi(.L.str6)
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 8
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 8
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 8
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 8
+; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 8
+; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 112
; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 8
+; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12
; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100
; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i
; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.0, align 4)
- ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.1, align 8), (load (s8) from %ir.2, align 8)
+ ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.1, align 4), (load (s8) from %ir.2, align 4)
; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.3, align 4)
; ALL: $rax = COPY [[LOAD]](s64)
; ALL: RET 0, implicit $rax
; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64)
; ALL: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.1)
- ; ALL: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 8), (load (s8) from %ir.3, align 8)
- ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 8), (load (s8) from %ir.5, align 8)
- ; ALL: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 8)
+ ; ALL: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 4), (load (s8) from %ir.3, align 4)
+ ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 4), (load (s8) from %ir.5, align 4)
+ ; ALL: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 4)
; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load (s64) from %ir.tmp)
; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64)
; ALL: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8)
; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64)
; ALL: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2, align 4)
- ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 8), (load (s8) from %ir.4, align 8)
+ ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 4), (load (s8) from %ir.4, align 4)
; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5, align 4)
; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4)
+++ /dev/null
-; RUN: opt -mtriple=riscv32 -data-layout="e-m:e-p:32:32" -S -codegenprepare < %s \
-; RUN: | FileCheck %s '-D#NEW_ALIGNMENT=4'
-; RUN: opt -mtriple=riscv64 -data-layout="e-m:e-p:64:64" -S -codegenprepare < %s \
-; RUN: | FileCheck %s '-D#NEW_ALIGNMENT=8'
-
-@str = private unnamed_addr constant [45 x i8] c"THIS IS A LONG STRING THAT SHOULD BE ALIGNED\00", align 1
-
-
-declare void @use(ptr %arg)
-
-
-; CHECK: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [45 x i8] c"THIS IS A LONG STRING THAT SHOULD BE ALIGNED\00", align [[#NEW_ALIGNMENT]]
-
-define void @foo() {
-; CHECK-LABEL: @foo(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[DST:%.*]] = alloca [45 x i8], align [[#NEW_ALIGNMENT]]
-; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i32(ptr align [[#NEW_ALIGNMENT]] [[DST]], ptr align [[#NEW_ALIGNMENT]] dereferenceable(31) @str, i32 31, i1 false)
-; CHECK-NEXT: ret void
-
-entry:
- %dst = alloca [45 x i8], align 1
- tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %dst, ptr align 1 dereferenceable(31) @str, i32 31, i1 false)
- ret void
-}
-
-; negative test - check that we don't align objects that are too small
-define void @no_align(ptr %src) {
-; CHECK-LABEL: @no_align(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[DST:%.*]] = alloca [3 x i8], align 1
-; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[SRC:%.*]], i32 31, i1 false)
-; CHECK-NEXT: ret void
-;
-entry:
- %dst = alloca [3 x i8], align 1
- tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %dst, ptr %src, i32 31, i1 false)
- ret void
-}
-
-; negative test - check that minsize requires at least 8 byte object size
-define void @no_align_minsize(ptr %src) minsize {
-; CHECK-LABEL: @no_align_minsize(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[DST:%.*]] = alloca [7 x i8], align 1
-; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[SRC:%.*]], i32 31, i1 false)
-; CHECK-NEXT: ret void
-;
-entry:
- %dst = alloca [7 x i8], align 1
- tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %dst, ptr %src, i32 31, i1 false)
- ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)