// ------------------------ Align Argument Attribute ------------------------
-static unsigned int getKnownAlignForUse(Attributor &A,
- AbstractAttribute &QueryingAA,
- Value &AssociatedValue, const Use *U,
- const Instruction *I, bool &TrackUse) {
+/// \p Ptr is accessed so we can get alignment information if the ABI requires
+/// the element type to be aligned.
+static MaybeAlign getKnownAlignmentFromAccessedPtr(const Value *Ptr,
+ const DataLayout &DL) {
+ MaybeAlign KnownAlignment = Ptr->getPointerAlignment(DL);
+ Type *ElementTy = Ptr->getType()->getPointerElementType();
+ if (ElementTy->isSized())
+ KnownAlignment = max(KnownAlignment, DL.getABITypeAlign(ElementTy));
+ return KnownAlignment;
+}
+
+static unsigned getKnownAlignForUse(Attributor &A,
+ AbstractAttribute &QueryingAA,
+ Value &AssociatedValue, const Use *U,
+ const Instruction *I, bool &TrackUse) {
// We need to follow common pointer manipulation uses to the accesses they
// feed into.
if (isa<CastInst>(I)) {
}
}
- unsigned Alignment = 0;
+ MaybeAlign MA;
if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
if (ICS.isBundleOperand(U) || ICS.isCallee(U))
return 0;
// dependences here.
auto &AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP,
/* TrackDependence */ false);
- Alignment = AlignAA.getKnownAlign();
+ MA = MaybeAlign(AlignAA.getKnownAlign());
}
+ const DataLayout &DL = A.getDataLayout();
const Value *UseV = U->get();
if (auto *SI = dyn_cast<StoreInst>(I)) {
- if (SI->getPointerOperand() == UseV)
- Alignment = SI->getAlignment();
- } else if (auto *LI = dyn_cast<LoadInst>(I))
- Alignment = LI->getAlignment();
+ if (SI->getPointerOperand() == UseV) {
+ if (unsigned SIAlign = SI->getAlignment())
+ MA = MaybeAlign(SIAlign);
+ else
+ MA = getKnownAlignmentFromAccessedPtr(UseV, DL);
+ }
+ } else if (auto *LI = dyn_cast<LoadInst>(I)) {
+ if (LI->getPointerOperand() == UseV) {
+ if (unsigned LIAlign = LI->getAlignment())
+ MA = MaybeAlign(LIAlign);
+ else
+ MA = getKnownAlignmentFromAccessedPtr(UseV, DL);
+ }
+ }
- if (Alignment <= 1)
+ if (!MA.hasValue() || MA <= 1)
return 0;
- auto &DL = A.getDataLayout();
+ unsigned Alignment = MA->value();
int64_t Offset;
if (const Value *Base = GetPointerBaseWithConstantOffset(UseV, Offset, DL)) {
return Alignment;
}
+
struct AAAlignImpl : AAAlign {
AAAlignImpl(const IRPosition &IRP) : AAAlign(IRP) {}
; CHECK-NEXT: unreachable
; CHECK: F:
; CHECK-NEXT: [[A_2:%.*]] = getelementptr i32, i32* null, i32 2
-; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[A_2]]
+; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[A_2]], align 4
; CHECK-NEXT: ret i32 [[R]]
;
entry:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb:
; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
-; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
-; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void
;
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb:
; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
-; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
-; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void
;
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb:
; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
-; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
-; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void
;
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb:
; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
-; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
-; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void
;
; This should not promote
define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]])
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
-; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]])
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]])
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void
;
; This should not promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]])
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
-; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]])
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]])
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void
;
; This should promote
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb:
; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
-; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
-; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void
;
; This should promote
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
; CHECK-NEXT: bb:
; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>
; CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
-; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
-; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64
; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
; CHECK-NEXT: ret void
;
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1
; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8
-; CHECK-NEXT: store i32 0, i32* [[X_PRIV]]
-; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[X_PRIV]]
+; CHECK-NEXT: store i32 0, i32* [[X_PRIV]], align 4
+; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[X_PRIV]], align 4
; CHECK-NEXT: [[A:%.*]] = add i32 [[L]], [[TMP2]]
; CHECK-NEXT: ret i32 [[A]]
;
; Also make sure we don't drop the call zeroext attribute.
define i32 @test(i32* %X) {
; CHECK-LABEL: define {{[^@]+}}@test
-; CHECK-SAME: (i32* nocapture nofree readonly [[X:%.*]])
+; CHECK-SAME: (i32* nocapture nofree readonly align 4 [[X:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
define i32 @test(i32* %X) {
; CHECK-LABEL: define {{[^@]+}}@test
-; CHECK-SAME: (i32* nocapture nofree readonly [[X:%.*]])
+; CHECK-SAME: (i32* nocapture nofree readonly align 4 [[X:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
; CHECK-LABEL: define {{[^@]+}}@test()
; CHECK-NEXT: entry:
; CHECK-NEXT: [[Y:%.*]] = load i32*, i32** @G2, align 8
-; CHECK-NEXT: [[Z:%.*]] = load i32, i32* [[Y]]
+; CHECK-NEXT: [[Z:%.*]] = load i32, i32* [[Y]], align 4
; CHECK-NEXT: ret i32 [[Z]]
;
entry:
; CHECK-SAME: (%T* nocapture nofree readonly [[P:%.*]])
; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3
; CHECK-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2
-; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]]
-; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]]
+; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]], align 4
+; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 4
; CHECK-NEXT: [[V:%.*]] = add i32 [[A]], [[B]]
; CHECK-NEXT: ret i32 [[V]]
;
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: (%T* nocapture nofree nonnull writeonly dereferenceable(4) [[P:%.*]], i32 [[V:%.*]])
; CHECK-NEXT: [[I32PTR:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 0
-; CHECK-NEXT: store i32 [[V]], i32* [[I32PTR]]
+; CHECK-NEXT: store i32 [[V]], i32* [[I32PTR]], align 4
; CHECK-NEXT: ret i32 0
;
%i32ptr = getelementptr %T, %T* %p, i64 0, i32 0
; CHECK-SAME: (%T* nocapture nofree readonly [[P:%.*]], i32 [[P2:%.*]])
; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3
; CHECK-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2
-; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]]
-; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]]
+; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]], align 4
+; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 4
; CHECK-NEXT: [[V:%.*]] = add i32 [[A]], [[B]]
; CHECK-NEXT: [[CA:%.*]] = musttail call i32 @bar(%T* undef, i32 [[V]])
; CHECK-NEXT: ret i32 [[CA]]
; CHECK-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0
; CHECK-NEXT: [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1
; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[AP]], align 8
-; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[BP]]
+; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[BP]], align 4
; CHECK-NEXT: [[AB:%.*]] = add i32 [[A]], [[B]]
; CHECK-NEXT: store i32 [[AB]], i32* [[R]], align 4
; CHECK-NEXT: ret void
; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
; CHECK-LABEL: define {{[^@]+}}@t0_callback_callee
-; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
+; CHECK-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
-; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; CHECK-NEXT: tail call void @t0_check(i32* align 256 [[A]], i64 99, i32* [[TMP0]])
; CHECK-NEXT: ret void
; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
; CHECK-LABEL: define {{[^@]+}}@t1_callback_callee
-; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
+; CHECK-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
-; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; CHECK-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
; CHECK-NEXT: ret void
; FIXME: We should derive noalias for %a and add a "fake use" of %a in all potentially synchronizing calls.
define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
; CHECK-LABEL: define {{[^@]+}}@t2_callback_callee
-; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
+; CHECK-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
-; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; CHECK-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
; CHECK-NEXT: ret void
; FIXME: We should derive noalias for %a and add a "fake use" of %a in all potentially synchronizing calls.
define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
; CHECK-LABEL: define {{[^@]+}}@t3_callback_callee
-; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
+; CHECK-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
-; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
; CHECK-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
; CHECK-NEXT: ret void
ret i32* %ptr
}
-; ATTRIBUTOR: define void @f7_1(i32* nonnull dereferenceable(4) %ptr, i1 %c)
+; ATTRIBUTOR: define void @f7_1(i32* nonnull align 4 dereferenceable(4) %ptr, i1 %c)
define void @f7_1(i32* %ptr, i1 %c) {
-; ATTRIBUTOR: %A = tail call i32 @unkown_f(i32* nonnull dereferenceable(4) %ptr)
+; ATTRIBUTOR: %A = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(4) %ptr)
%A = tail call i32 @unkown_f(i32* %ptr)
%ptr.0 = load i32, i32* %ptr
; deref 4 hold
; FIXME: this should be %B = tail call i32 @unkown_f(i32* nonnull dereferenceable(4) %ptr)
-; ATTRIBUTOR: %B = tail call i32 @unkown_f(i32* nonnull dereferenceable(4) %ptr)
+; ATTRIBUTOR: %B = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(4) %ptr)
%B = tail call i32 @unkown_f(i32* dereferenceable(1) %ptr)
br i1%c, label %if.true, label %if.false
if.true:
-; ATTRIBUTOR: %C = tail call i32 @unkown_f(i32* nonnull dereferenceable(8) %ptr)
+; ATTRIBUTOR: %C = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) %ptr)
%C = tail call i32 @unkown_f(i32* %ptr)
-; ATTRIBUTOR: %D = tail call i32 @unkown_f(i32* nonnull dereferenceable(8) %ptr)
+; ATTRIBUTOR: %D = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) %ptr)
%D = tail call i32 @unkown_f(i32* dereferenceable(8) %ptr)
-; ATTRIBUTOR: %E = tail call i32 @unkown_f(i32* nonnull dereferenceable(8) %ptr)
+; ATTRIBUTOR: %E = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) %ptr)
%E = tail call i32 @unkown_f(i32* %ptr)
ret void
%ptr = tail call i32* @unkown_ptr()
-; ATTRIBUTOR: %A = tail call i32 @unkown_f(i32* nonnull dereferenceable(4) %ptr)
+; ATTRIBUTOR: %A = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(4) %ptr)
%A = tail call i32 @unkown_f(i32* %ptr)
%arg_a.0 = load i32, i32* %ptr
; deref 4 hold
-; ATTRIBUTOR: %B = tail call i32 @unkown_f(i32* nonnull dereferenceable(4) %ptr)
+; ATTRIBUTOR: %B = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(4) %ptr)
%B = tail call i32 @unkown_f(i32* dereferenceable(1) %ptr)
br i1%c, label %if.true, label %if.false
if.true:
-; ATTRIBUTOR: %C = tail call i32 @unkown_f(i32* nonnull dereferenceable(8) %ptr)
+; ATTRIBUTOR: %C = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) %ptr)
%C = tail call i32 @unkown_f(i32* %ptr)
-; ATTRIBUTOR: %D = tail call i32 @unkown_f(i32* nonnull dereferenceable(8) %ptr)
+; ATTRIBUTOR: %D = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) %ptr)
%D = tail call i32 @unkown_f(i32* dereferenceable(8) %ptr)
%E = tail call i32 @unkown_f(i32* %ptr)
-; ATTRIBUTOR: %E = tail call i32 @unkown_f(i32* nonnull dereferenceable(8) %ptr)
+; ATTRIBUTOR: %E = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) %ptr)
ret void
define i32* @test_for_minus_index(i32* %p) {
; FIXME: This should have a return dereferenceable(8) but we need to make sure it will work in loops as well.
-; ATTRIBUTOR: define nonnull i32* @test_for_minus_index(i32* nofree nonnull writeonly "no-capture-maybe-returned" %p)
+; ATTRIBUTOR: define nonnull align 4 i32* @test_for_minus_index(i32* nofree nonnull writeonly align 4 "no-capture-maybe-returned" %p)
%q = getelementptr inbounds i32, i32* %p, i32 -2
store i32 1, i32* %q
ret i32* %q
}
define void @deref_or_null_and_nonnull(i32* dereferenceable_or_null(100) %0) {
-; ATTRIBUTOR: define void @deref_or_null_and_nonnull(i32* nocapture nofree nonnull writeonly dereferenceable(100) %0)
+; ATTRIBUTOR: define void @deref_or_null_and_nonnull(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(100) %0)
store i32 1, i32* %0
ret void
}
; Multiple arguments may be dereferenceable.
define void @ordering(i8* %ptr1, i32* %ptr2) {
-; ATTRIBUTOR-LABEL: @ordering(i8* nocapture nofree nonnull readnone dereferenceable(3) %ptr1, i32* nocapture nofree nonnull readnone dereferenceable(8) %ptr2)
+; ATTRIBUTOR-LABEL: @ordering(i8* nocapture nofree nonnull readnone dereferenceable(3) %ptr1, i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) %ptr2)
%a20 = getelementptr i32, i32* %ptr2, i64 0
%a12 = getelementptr i8, i8* %ptr1, i64 2
%t12 = load i8, i8* %a12
; The last load may not execute, so derefenceable bytes only covers the 1st two loads.
define void @partial_in_entry(i16* %ptr, i1 %cond) {
-; ATTRIBUTOR-LABEL: @partial_in_entry(i16* nocapture nofree nonnull readnone dereferenceable(4) %ptr, i1 %cond)
+; ATTRIBUTOR-LABEL: @partial_in_entry(i16* nocapture nofree nonnull readnone align 2 dereferenceable(4) %ptr, i1 %cond)
entry:
%arrayidx0 = getelementptr i16, i16* %ptr, i64 0
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
; The 2nd and 3rd loads may never execute.
define void @volatile_is_not_dereferenceable(i16* %ptr) {
-; ATTRIBUTOR-LABEL: @volatile_is_not_dereferenceable(i16* nofree %ptr)
+; ATTRIBUTOR-LABEL: @volatile_is_not_dereferenceable(i16* nofree align 2 %ptr)
%arrayidx0 = getelementptr i16, i16* %ptr, i64 0
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
declare void @may_not_return()
define void @not_guaranteed_to_transfer_execution(i16* %ptr) {
-; ATTRIBUTOR-LABEL: @not_guaranteed_to_transfer_execution(i16* nocapture nofree nonnull readnone dereferenceable(2) %ptr)
+; ATTRIBUTOR-LABEL: @not_guaranteed_to_transfer_execution(i16* nocapture nofree nonnull readnone align 2 dereferenceable(2) %ptr)
%arrayidx0 = getelementptr i16, i16* %ptr, i64 0
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
; Could round weird bitwidths down?
define void @not_byte_multiple(i9* %ptr) {
-; ATTRIBUTOR-LABEL: @not_byte_multiple(i9* nocapture nofree nonnull readnone dereferenceable(2) %ptr)
+; ATTRIBUTOR-LABEL: @not_byte_multiple(i9* nocapture nofree nonnull readnone align 2 dereferenceable(2) %ptr)
%arrayidx0 = getelementptr i9, i9* %ptr, i64 0
%t0 = load i9, i9* %arrayidx0
ret void
; Missing direct access from the pointer.
define void @no_pointer_deref(i16* %ptr) {
-; ATTRIBUTOR-LABEL: @no_pointer_deref(i16* nocapture nofree readnone %ptr)
+; ATTRIBUTOR-LABEL: @no_pointer_deref(i16* nocapture nofree readnone align 2 %ptr)
%arrayidx1 = getelementptr i16, i16* %ptr, i64 1
%arrayidx2 = getelementptr i16, i16* %ptr, i64 2
%t1 = load i16, i16* %arrayidx1
; Out-of-order is ok, but missing access concludes dereferenceable range.
define void @non_consecutive(i32* %ptr) {
-; ATTRIBUTOR-LABEL: @non_consecutive(i32* nocapture nofree nonnull readnone dereferenceable(8) %ptr)
+; ATTRIBUTOR-LABEL: @non_consecutive(i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) %ptr)
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
; Improve on existing dereferenceable attribute.
define void @more_bytes(i32* dereferenceable(8) %ptr) {
-; ATTRIBUTOR-LABEL: @more_bytes(i32* nocapture nofree nonnull readnone dereferenceable(16) %ptr)
+; ATTRIBUTOR-LABEL: @more_bytes(i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) %ptr)
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
; Improve on existing dereferenceable_or_null attribute.
define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) {
-; ATTRIBUTOR-LABEL: @more_bytes_and_not_null(i32* nocapture nofree nonnull readnone dereferenceable(16) %ptr)
+; ATTRIBUTOR-LABEL: @more_bytes_and_not_null(i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) %ptr)
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
; But don't pessimize existing dereferenceable attribute.
define void @better_bytes(i32* dereferenceable(100) %ptr) {
-; ATTRIBUTOR-LABEL: @better_bytes(i32* nocapture nofree nonnull readnone dereferenceable(100) %ptr)
+; ATTRIBUTOR-LABEL: @better_bytes(i32* nocapture nofree nonnull readnone align 4 dereferenceable(100) %ptr)
%arrayidx3 = getelementptr i32, i32* %ptr, i64 3
%arrayidx1 = getelementptr i32, i32* %ptr, i64 1
%arrayidx0 = getelementptr i32, i32* %ptr, i64 0
}
define void @bitcast(i32* %arg) {
-; ATTRIBUTOR-LABEL: @bitcast(i32* nocapture nofree nonnull readnone dereferenceable(8) %arg)
+; ATTRIBUTOR-LABEL: @bitcast(i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) %arg)
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 1
}
define void @bitcast_different_sizes(double* %arg1, i8* %arg2) {
-; ATTRIBUTOR-LABEL: @bitcast_different_sizes(double* nocapture nofree nonnull readnone dereferenceable(12) %arg1, i8* nocapture nofree nonnull readnone dereferenceable(16) %arg2)
+; ATTRIBUTOR-LABEL: @bitcast_different_sizes(double* nocapture nofree nonnull readnone align 4 dereferenceable(12) %arg1, i8* nocapture nofree nonnull readnone align 4 dereferenceable(16) %arg2)
%ptr1 = bitcast double* %arg1 to float*
%a10 = getelementptr float, float* %ptr1, i64 0
%a11 = getelementptr float, float* %ptr1, i64 1
}
define void @negative_offset(i32* %arg) {
-; ATTRIBUTOR-LABEL: @negative_offset(i32* nocapture nofree nonnull readnone dereferenceable(4) %arg)
+; ATTRIBUTOR-LABEL: @negative_offset(i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) %arg)
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 -1
}
define void @stores(i32* %arg) {
-; ATTRIBUTOR-LABEL: @stores(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg)
+; ATTRIBUTOR-LABEL: @stores(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) %arg)
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 1
}
define void @load_store(i32* %arg) {
-; ATTRIBUTOR-LABEL: @load_store(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg)
+; ATTRIBUTOR-LABEL: @load_store(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) %arg)
%ptr = bitcast i32* %arg to float*
%arrayidx0 = getelementptr float, float* %ptr, i64 0
%arrayidx1 = getelementptr float, float* %ptr, i64 1
}
define void @different_size1(i32* %arg) {
-; ATTRIBUTOR-LABEL: @different_size1(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg)
+; ATTRIBUTOR-LABEL: @different_size1(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(8) %arg)
%arg-cast = bitcast i32* %arg to double*
store double 0.000000e+00, double* %arg-cast
store i32 0, i32* %arg
}
define void @different_size2(i32* %arg) {
-; ATTRIBUTOR-LABEL: @different_size2(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg)
+; ATTRIBUTOR-LABEL: @different_size2(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(8) %arg)
store i32 0, i32* %arg
%arg-cast = bitcast i32* %arg to double*
store double 0.000000e+00, double* %arg-cast
store i32 10, i32* %2
%3 = load i32, i32* %2
tail call void @foo(i32* %2)
- ; CHECK: @free(i8* nonnull dereferenceable(4) %1)
+ ; CHECK: @free(i8* nonnull align 4 dereferenceable(4) %1)
tail call void @free(i8* %1)
ret void
}
store i32 10, i32* %2
%3 = load i32, i32* %2
tail call void @foo_nounw(i32* %2)
- ; CHECK: @free(i8* nonnull dereferenceable(4) %1)
+ ; CHECK: @free(i8* nonnull align 4 dereferenceable(4) %1)
tail call void @free(i8* %1)
ret void
}
store i32 10, i32* %2
%3 = load i32, i32* %2
tail call void @free(i8* %1)
- ; CHECK: tail call void @free(i8* noalias nonnull dereferenceable(4) %1)
+ ; CHECK: tail call void @free(i8* noalias nonnull align 4 dereferenceable(4) %1)
ret i32 %3
}
store i32 10, i32* %2
%3 = load i32, i32* %2
tail call void @free(i8* %1)
- ; CHECK: tail call void @free(i8* noalias nonnull dereferenceable(4) %1)
+ ; CHECK: tail call void @free(i8* noalias nonnull align 4 dereferenceable(4) %1)
ret i32 %3
}
store i32 10, i32* %2
%3 = load i32, i32* %2
tail call void @free(i8* %1)
- ; CHECK: tail call void @free(i8* noalias nonnull dereferenceable(4) %1)
+ ; CHECK: tail call void @free(i8* noalias nonnull align 4 dereferenceable(4) %1)
ret i32 %3
}
define internal void @foo(i32* %a) {
; ALL-LABEL: define {{[^@]+}}@foo
-; ALL-SAME: (i32* nocapture nofree nonnull writeonly dereferenceable(4) [[A:%.*]])
+; ALL-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A:%.*]])
; ALL-NEXT: entry:
-; ALL-NEXT: store i32 0, i32* [[A]]
+; ALL-NEXT: store i32 0, i32* [[A]], align 4
; ALL-NEXT: ret void
;
entry:
define i32 @test0(i32* %p) {
; CHECK-LABEL: define {{[^@]+}}@test0
-; CHECK-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[P:%.*]])
-; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P]], !range !0
+; CHECK-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]])
+; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P]], align 4, !range !0
; CHECK-NEXT: ret i32 [[A]]
;
%a = load i32, i32* %p, !range !0
define i32 @test0-range-check(i32* %p) {
;
; OLD_PM-LABEL: define {{[^@]+}}@test0-range-check
-; OLD_PM-SAME: (i32* nocapture nofree readonly [[P:%.*]])
-; OLD_PM-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly [[P]]) #{{[0-9]+}}, !range !0
+; OLD_PM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]])
+; OLD_PM-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #{{[0-9]+}}, !range !0
; OLD_PM-NEXT: ret i32 [[A]]
;
; NEW_PM-LABEL: define {{[^@]+}}@test0-range-check
-; NEW_PM-SAME: (i32* nocapture nofree readonly [[P:%.*]])
-; NEW_PM-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly [[P]]) #{{[0-9]+}}, !range !0
+; NEW_PM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]])
+; NEW_PM-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #{{[0-9]+}}, !range !0
; NEW_PM-NEXT: ret i32 [[A]]
;
; CGSCC_OLD_PM-LABEL: define {{[^@]+}}@test0-range-check
-; CGSCC_OLD_PM-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[P:%.*]])
-; CGSCC_OLD_PM-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree nonnull readonly dereferenceable(4) [[P]])
+; CGSCC_OLD_PM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]])
+; CGSCC_OLD_PM-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P]])
; CGSCC_OLD_PM-NEXT: ret i32 [[A]]
;
; CGSCC_NEW_PM-LABEL: define {{[^@]+}}@test0-range-check
-; CGSCC_NEW_PM-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[P:%.*]])
-; CGSCC_NEW_PM-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree nonnull readonly dereferenceable(4) [[P]])
+; CGSCC_NEW_PM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]])
+; CGSCC_NEW_PM-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P]])
; CGSCC_NEW_PM-NEXT: ret i32 [[A]]
;
%a = tail call i32 @test0(i32* %p)
; TEST0 icmp test
define void @test0-icmp-check(i32* %p){
; OLD_PM-LABEL: define {{[^@]+}}@test0-icmp-check
-; OLD_PM-SAME: (i32* nocapture nofree readonly [[P:%.*]])
-; OLD_PM-NEXT: [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly [[P]]) #{{[0-9]+}}, !range !0
+; OLD_PM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]])
+; OLD_PM-NEXT: [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #{{[0-9]+}}, !range !0
; OLD_PM-NEXT: [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
; OLD_PM-NEXT: [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
; OLD_PM-NEXT: [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
; OLD_PM-NEXT: ret void
;
; NEW_PM-LABEL: define {{[^@]+}}@test0-icmp-check
-; NEW_PM-SAME: (i32* nocapture nofree readonly [[P:%.*]])
-; NEW_PM-NEXT: [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly [[P]]) #{{[0-9]+}}, !range !0
+; NEW_PM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]])
+; NEW_PM-NEXT: [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #{{[0-9]+}}, !range !0
; NEW_PM-NEXT: [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
; NEW_PM-NEXT: [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
; NEW_PM-NEXT: [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
; NEW_PM-NEXT: ret void
;
; CGSCC_OLD_PM-LABEL: define {{[^@]+}}@test0-icmp-check
-; CGSCC_OLD_PM-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[P:%.*]])
-; CGSCC_OLD_PM-NEXT: [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree nonnull readonly dereferenceable(4) [[P]])
+; CGSCC_OLD_PM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]])
+; CGSCC_OLD_PM-NEXT: [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P]])
; CGSCC_OLD_PM-NEXT: [[CMP_EQ_1:%.*]] = icmp eq i32 [[RET]], 10
; CGSCC_OLD_PM-NEXT: [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
; CGSCC_OLD_PM-NEXT: [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
; CGSCC_OLD_PM-NEXT: ret void
;
; CGSCC_NEW_PM-LABEL: define {{[^@]+}}@test0-icmp-check
-; CGSCC_NEW_PM-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[P:%.*]])
-; CGSCC_NEW_PM-NEXT: [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree nonnull readonly dereferenceable(4) [[P]])
+; CGSCC_NEW_PM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]])
+; CGSCC_NEW_PM-NEXT: [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P]])
; CGSCC_NEW_PM-NEXT: [[CMP_EQ_1:%.*]] = icmp eq i32 [[RET]], 10
; CGSCC_NEW_PM-NEXT: [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
; CGSCC_NEW_PM-NEXT: [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
}
define i32 @test1(i32* %p) {
; CHECK-LABEL: define {{[^@]+}}@test1
-; CHECK-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[P:%.*]])
-; CHECK-NEXT: [[LOAD_10_100:%.*]] = load i32, i32* [[P]], !range !1
+; CHECK-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]])
+; CHECK-NEXT: [[LOAD_10_100:%.*]] = load i32, i32* [[P]], align 4, !range !1
; CHECK-NEXT: [[ADD_10_THEN_20_110:%.*]] = add i32 [[LOAD_10_100]], 10
; CHECK-NEXT: [[MUL_10_THEN_200_1091:%.*]] = mul i32 [[ADD_10_THEN_20_110]], 10
; CHECK-NEXT: ret i32 [[MUL_10_THEN_200_1091]]
define i1 @test1-check(i32* %p) {
; OLD_PM-LABEL: define {{[^@]+}}@test1-check
-; OLD_PM-SAME: (i32* nocapture nofree readonly [[P:%.*]])
-; OLD_PM-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly [[P]]) #{{[0-9]+}}, !range !2
+; OLD_PM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]])
+; OLD_PM-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #{{[0-9]+}}, !range !2
; OLD_PM-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 500
; OLD_PM-NEXT: ret i1 [[CMP]]
;
; NEW_PM-LABEL: define {{[^@]+}}@test1-check
-; NEW_PM-SAME: (i32* nocapture nofree readonly [[P:%.*]])
-; NEW_PM-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly [[P]]) #{{[0-9]+}}, !range !2
+; NEW_PM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]])
+; NEW_PM-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #{{[0-9]+}}, !range !2
; NEW_PM-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 500
; NEW_PM-NEXT: ret i1 [[CMP]]
;
; CGSCC_OLD_PM-LABEL: define {{[^@]+}}@test1-check
-; CGSCC_OLD_PM-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[P:%.*]])
-; CGSCC_OLD_PM-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree nonnull readonly dereferenceable(4) [[P]])
+; CGSCC_OLD_PM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]])
+; CGSCC_OLD_PM-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P]])
; CGSCC_OLD_PM-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 500
; CGSCC_OLD_PM-NEXT: ret i1 [[CMP]]
;
; CGSCC_NEW_PM-LABEL: define {{[^@]+}}@test1-check
-; CGSCC_NEW_PM-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[P:%.*]])
-; CGSCC_NEW_PM-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree nonnull readonly dereferenceable(4) [[P]])
+; CGSCC_NEW_PM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]])
+; CGSCC_NEW_PM-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P]])
; CGSCC_NEW_PM-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 500
; CGSCC_NEW_PM-NEXT: ret i1 [[CMP]]
;
ret void
}
-; ATTRIBUTOR: define void @test5(i8** nocapture nofree nonnull writeonly dereferenceable(8) %p, i8* nofree writeonly %q)
+; ATTRIBUTOR: define void @test5(i8** nocapture nofree nonnull writeonly align 8 dereferenceable(8) %p, i8* nofree writeonly %q)
; Missed optz'n: we could make %q readnone, but don't break test6!
define void @test5(i8** %p, i8* %q) {
store i8* %q, i8** %p
}
declare void @test6_1()
-; ATTRIBUTOR: define void @test6_2(i8** nocapture nonnull writeonly dereferenceable(8) %p, i8* %q)
+; ATTRIBUTOR: define void @test6_2(i8** nocapture nonnull writeonly align 8 dereferenceable(8) %p, i8* %q)
; This is not a missed optz'n.
define void @test6_2(i8** %p, i8* %q) {
store i8* %q, i8** %p
define internal void @test_sret(%struct.X* sret %a, %struct.X** %b) {
; CHECK-LABEL: define {{[^@]+}}@test_sret
-; CHECK-SAME: (%struct.X* noalias nofree sret writeonly align 536870912 [[A:%.*]], %struct.X** nocapture nofree nonnull writeonly dereferenceable(8) [[B:%.*]])
-; CHECK-NEXT: store %struct.X* [[A]], %struct.X** [[B]]
+; CHECK-SAME: (%struct.X* noalias nofree sret writeonly align 536870912 [[A:%.*]], %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]])
+; CHECK-NEXT: store %struct.X* [[A]], %struct.X** [[B]], align 8
; CHECK-NEXT: ret void
;
store %struct.X* %a, %struct.X** %b
ret void
}
+; FIXME: Alignment and dereferenceability are not propagated to the argument
define void @complicated_args_sret(%struct.X** %b) {
; CHECK-LABEL: define {{[^@]+}}@complicated_args_sret
; CHECK-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]])
-; CHECK-NEXT: call void @test_sret(%struct.X* noalias nofree writeonly align 536870912 null, %struct.X** nocapture nofree writeonly [[B]])
+; CHECK-NEXT: call void @test_sret(%struct.X* noalias nofree writeonly align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]])
; CHECK-NEXT: ret void
;
call void @test_sret(%struct.X* null, %struct.X** %b)