There's a potential change in dereferenceability attribute semantics in the nearish future. See llvm-dev thread "RFC: Decomposing deref(N) into deref(N) + nofree" and D99100 for context.
This change simply adds appropriate attributes to tests to keep transform logic exercised under both old and new/proposed semantics. Note that for many of these cases, O3 would infer exactly these attributes on the test IR.
This change handles the idiomatic pattern of a dereferenceable object being passed to a call which can not free that memory. There's a couple other tests which need more one-off attention, they'll be handled in another change.
declare void @use(i32)
-define void @f_0(i8* align 4 dereferenceable(1024) %ptr) {
+define void @f_0(i8* align 4 dereferenceable(1024) %ptr) nofree nosync {
; CHECK-LABEL: @f_0(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR_GEP:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 32
br label %loop
}
-define void @f_1(i8* align 4 dereferenceable_or_null(1024) %ptr) {
+define void @f_1(i8* align 4 dereferenceable_or_null(1024) %ptr) nofree nosync {
; CHECK-LABEL: @f_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR_GEP:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 32
ret void
}
-define void @checkLaunder(i8* align 4 dereferenceable(1024) %p) {
+define void @checkLaunder(i8* align 4 dereferenceable(1024) %p) nofree nosync {
; CHECK-LABEL: @checkLaunder(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[P:%.*]])
; CHECK-NEXT: ## %bb.3: ## %exit
; CHECK-NEXT: retq
i64* dereferenceable(8) %x2,
- i128* %y, i64 %count) nounwind {
+ i128* %y, i64 %count) nounwind nofree nosync {
entry:
br label %for.body
; CHECK-NEXT: ## %bb.3: ## %exit
; CHECK-NEXT: retq
i32* dereferenceable(8) %x2,
- i32* %y, i32 %count) nounwind {
+ i32* %y, i32 %count) nounwind nofree nosync {
entry:
br label %for.body
i8* dereferenceable(1) %a1,
i8* dereferenceable(1) %a2,
i8* dereferenceable(1) %a3,
- i64 %count) nounwind uwtable optsize ssp readonly {
+ i64 %count) nounwind uwtable optsize ssp readonly nofree nosync {
entry:
br label %for.body.i
; Partial Vector Loads - PR16739
;
-define <4 x float> @load_float4_float3(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
; SSE-LABEL: load_float4_float3:
; SSE: # %bb.0:
; SSE-NEXT: movups (%rdi), %xmm0
ret <4 x float> %r2
}
-define <4 x float> @load_float4_float3_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3_0122(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
; SSE-LABEL: load_float4_float3_0122:
; SSE: # %bb.0:
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
ret <4 x float> %r3
}
-define <8 x float> @load_float8_float3(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <8 x float> @load_float8_float3(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
; SSE-LABEL: load_float8_float3:
; SSE: # %bb.0:
; SSE-NEXT: movups (%rdi), %xmm0
ret <8 x float> %r2
}
-define <8 x float> @load_float8_float3_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <8 x float> @load_float8_float3_0122(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
; SSE-LABEL: load_float8_float3_0122:
; SSE: # %bb.0:
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
ret <8 x float> %r3
}
-define <4 x float> @load_float4_float3_as_float2_float(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3_as_float2_float(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
; SSE-LABEL: load_float4_float3_as_float2_float:
; SSE: # %bb.0:
; SSE-NEXT: movups (%rdi), %xmm0
ret <4 x float> %10
}
-define <4 x float> @load_float4_float3_as_float2_float_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3_as_float2_float_0122(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
; SSE-LABEL: load_float4_float3_as_float2_float_0122:
; SSE: # %bb.0:
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
ret <4 x float> %16
}
-define <4 x float> @load_float4_float3_trunc_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3_trunc_0122(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
; SSE-LABEL: load_float4_float3_trunc_0122:
; SSE: # %bb.0:
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
ret <4 x float> %17
}
-define <4 x float> @load_float4_float3_trunc_0123(<4 x float>* nocapture readonly dereferenceable(16)) {
+define <4 x float> @load_float4_float3_trunc_0123(<4 x float>* nocapture readonly dereferenceable(16)) nofree nosync {
; SSE2-LABEL: load_float4_float3_trunc_0123:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps (%rdi), %xmm0
}
; PR21780
-define <4 x double> @load_double4_0u2u(double* nocapture readonly dereferenceable(32)) {
+define <4 x double> @load_double4_0u2u(double* nocapture readonly dereferenceable(32)) nofree nosync {
; SSE2-LABEL: load_double4_0u2u:
; SSE2: # %bb.0:
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; Test case identified in rL366501
@h = dso_local local_unnamed_addr global i8 0, align 1
-define dso_local i32 @load_partial_illegal_type() {
+define dso_local i32 @load_partial_illegal_type() {
; SSE2-LABEL: load_partial_illegal_type:
; SSE2: # %bb.0:
; SSE2-NEXT: movzwl h(%rip), %eax
; X64-NEXT: sete %al
; X64-NEXT: retq
%"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
- %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+ %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr nofree nosync {
entry:
%first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
%0 = load i32, i32* %first.i, align 4
br label %header
}
-define i32 @test6b(i1 %cnd, i32* dereferenceable(8) align 4 %p) {
+define i32 @test6b(i1 %cnd, i32* dereferenceable(8) align 4 %p) nofree nosync {
; CHECK-LABEL: @test6b(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[V1_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4
; dereferenceable can be loaded from speculatively without a risk of trapping.
; Since it is OK to speculate, PRE is allowed.
-define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) {
+define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) nofree nosync {
; CHECK-LABEL: @test15(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
; dereferenceable can be loaded from speculatively without a risk of trapping.
; Since it is OK to speculate, PRE is allowed.
-define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) {
+define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) nofree nosync {
; CHECK-LABEL: @test16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
; load may be speculated, address is not null using context search.
; There is a critical edge.
-define i32 @loadpre_critical_edge(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) {
+define i32 @loadpre_critical_edge(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) nofree nosync {
; CHECK-LABEL: @loadpre_critical_edge(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null
}
; load may be speculated, address is not null using context search.
-define i32 @loadpre_basic(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) {
+define i32 @loadpre_basic(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N) nofree nosync {
; CHECK-LABEL: @loadpre_basic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[ARG:%.*]], null
}
; load cannot be speculated, check "address is not null" does not dominate the loop.
-define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N, i1 %c) {
+define i32 @loadpre_maybe_null(i32* align 8 dereferenceable_or_null(48) %arg, i32 %N, i1 %c) nofree nosync {
; CHECK-LABEL: @loadpre_maybe_null(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C:%.*]], label [[NULL_CHECK:%.*]], label [[PREHEADER:%.*]]
ret void
}
-define void @deref_load(i32 %V1, i32* dereferenceable(4) align 4 %P) {
+define void @deref_load(i32 %V1, i32* dereferenceable(4) align 4 %P) nofree nosync {
; CHECK-LABEL: @deref_load(
; CHECK-NEXT: [[V2:%.*]] = load i32, i32* [[P:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[V2]], [[V1:%.*]]
ret <2 x double> %res
}
-define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) {
+define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) nofree nosync {
; CHECK-LABEL: @load_speculative(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
ret <2 x double> %res
}
-define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) {
+define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
; CHECK-LABEL: @load_speculative_less_aligned(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
; Can't speculate since only half of required size is known deref
-define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) {
+define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
; CHECK-LABEL: @load_spec_neg_size(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
}
; Can only speculate one lane (but it's the only one active)
-define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) {
+define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
; CHECK-LABEL: @load_spec_lan0(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> poison, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
ret <2 x double> %res
}
-define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) {
+define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) nofree nosync {
; CHECK-LABEL: @load_speculative(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
ret <2 x double> %res
}
-define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) {
+define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
; CHECK-LABEL: @load_speculative_less_aligned(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
; Can't speculate since only half of required size is known deref
-define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) {
+define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
; CHECK-LABEL: @load_spec_neg_size(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
}
; Can only speculate one lane (but it's the only one active)
-define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) {
+define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) nofree nosync {
; CHECK-LABEL: @load_spec_lan0(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> poison, <2 x i32> zeroinitializer
; Test that we can speculate the loads around the select even when we can't
; fold the load completely away.
-define i32 @test78_deref(i1 %flag, i32* dereferenceable(4) align 4 %x, i32* dereferenceable(4) align 4 %y, i32* %z) {
+define i32 @test78_deref(i1 %flag, i32* dereferenceable(4) align 4 %x, i32* dereferenceable(4) align 4 %y, i32* %z) nofree nosync {
; CHECK-LABEL: @test78_deref(
; CHECK-NEXT: [[X_VAL:%.*]] = load i32, i32* [[X:%.*]], align 4
; CHECK-NEXT: [[Y_VAL:%.*]] = load i32, i32* [[Y:%.*]], align 4
; The same as @test78_deref but we can't speculate the load because
; one of the arguments is not sufficiently dereferenceable.
-define i32 @test78_deref_neg(i1 %flag, i32* dereferenceable(2) %x, i32* dereferenceable(4) %y, i32* %z) {
+define i32 @test78_deref_neg(i1 %flag, i32* dereferenceable(2) %x, i32* dereferenceable(4) %y, i32* %z) nofree nosync {
; CHECK-LABEL: @test78_deref_neg(
; CHECK-NEXT: [[P:%.*]] = select i1 [[FLAG:%.*]], i32* [[X:%.*]], i32* [[Y:%.*]]
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P]], align 4
declare void @use(i32)
-define i32 @strcmp_memcmp([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
declare i32 @strcmp(i8* nocapture, i8* nocapture)
-define i32 @strcmp_memcmp2([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp2([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp2(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
ret i32 %conv
}
-define i32 @strcmp_memcmp3([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp3([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp3(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
ret i32 %conv
}
-define i32 @strcmp_memcmp4([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp4([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp4(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
ret i32 %conv
}
-define i32 @strcmp_memcmp5([5 x i8]* dereferenceable (5) %buf) {
+define i32 @strcmp_memcmp5([5 x i8]* dereferenceable (5) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp5(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [5 x i8], [5 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
ret i32 %conv
}
-define i32 @strcmp_memcmp6([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp6([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp6(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
ret i32 %conv
}
-define i32 @strcmp_memcmp7([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp7([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp7(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
ret i32 %conv
}
-define i32 @strcmp_memcmp8([4 x i8]* dereferenceable (4) %buf) {
+define i32 @strcmp_memcmp8([4 x i8]* dereferenceable (4) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp8(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
ret i32 %conv
}
-define i32 @strcmp_memcmp9([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp9([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp9(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 4)
}
-define i32 @strncmp_memcmp([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(2) [[STRING]], i8* noundef nonnull dereferenceable(2) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 2)
declare i32 @strncmp(i8* nocapture, i8* nocapture, i64)
-define i32 @strncmp_memcmp2([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp2([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp2(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
ret i32 %conv
}
-define i32 @strncmp_memcmp3([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp3([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp3(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
ret i32 %conv
}
-define i32 @strncmp_memcmp4([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp4([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp4(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
ret i32 %conv
}
-define i32 @strncmp_memcmp5([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp5([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp5(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
}
-define i32 @strncmp_memcmp6([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp6([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp6(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
ret i32 %conv
}
-define i32 @strncmp_memcmp7([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp7([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp7(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
ret i32 %conv
}
-define i32 @strncmp_memcmp8([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp8([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp8(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(3) [[STRING]], i8* noundef nonnull dereferenceable(3) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 3)
ret i32 %conv
}
-define i32 @strncmp_memcmp9([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp9([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp9(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
ret i32 %conv
}
-define i32 @strncmp_memcmp10([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp10([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp10(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
ret i32 %conv
}
-define i32 @strncmp_memcmp11([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp11([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp11(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
ret i32 %conv
}
-define i32 @strncmp_memcmp12([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp12([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp12(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
ret i32 %conv
}
-define i32 @strncmp_memcmp13([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp13([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp13(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(2) [[STRING]], i8* noundef nonnull dereferenceable(2) getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 2)
ret i32 %conv
}
-define i32 @strncmp_memcmp14([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp14([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp14(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 4)
}
; Negative tests
-define i32 @strcmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp_bad(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
ret i32 %conv
}
-define i32 @strcmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp_bad2(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull [[STRING]])
ret i32 %conv
}
-define i32 @strcmp_memcmp_bad3([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strcmp_memcmp_bad3([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp_bad3(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
}
-define i32 @strcmp_memcmp_bad4(i8* nocapture readonly %buf) {
+define i32 @strcmp_memcmp_bad4(i8* nocapture readonly %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp_bad4(
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @strcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) [[BUF:%.*]])
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
}
-define i32 @strcmp_memcmp_bad5([3 x i8]* dereferenceable (3) %buf) {
+define i32 @strcmp_memcmp_bad5([3 x i8]* dereferenceable (3) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp_bad5(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [3 x i8], [3 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
ret i32 %conv
}
-define i32 @strcmp_memcmp_bad6([4 x i8]* dereferenceable (4) %buf, i8* nocapture readonly %k) {
+define i32 @strcmp_memcmp_bad6([4 x i8]* dereferenceable (4) %buf, i8* nocapture readonly %k) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp_bad6(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(1) [[K:%.*]])
ret i32 %conv
}
-define i32 @strcmp_memcmp_bad7(i8* nocapture readonly %k) {
+define i32 @strcmp_memcmp_bad7(i8* nocapture readonly %k) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp_bad7(
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @strcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) [[K:%.*]])
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
ret i32 %conv
}
-define i32 @strcmp_memcmp_bad8([4 x i8]* dereferenceable (4) %buf) {
+define i32 @strcmp_memcmp_bad8([4 x i8]* dereferenceable (4) %buf) nofree nosync {
; CHECK-LABEL: @strcmp_memcmp_bad8(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
ret i32 0
}
-define i32 @strncmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp_bad(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[CALL:%.*]] = call i32 @strncmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull [[STRING]], i64 5)
}
-define i32 @strncmp_memcmp_bad1([12 x i8]* dereferenceable (12) %buf) {
+define i32 @strncmp_memcmp_bad1([12 x i8]* dereferenceable (12) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp_bad1(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[CALL:%.*]] = call i32 @strncmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull [[STRING]], i64 5)
ret i32 %conv
}
-define i32 @strncmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf, i64 %n) {
+define i32 @strncmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf, i64 %n) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp_bad2(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[CALL:%.*]] = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 [[N:%.*]])
ret i32 %conv
}
-define i32 @strncmp_memcmp_bad3(i8* nocapture readonly %k) {
+define i32 @strncmp_memcmp_bad3(i8* nocapture readonly %k) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp_bad3(
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @strncmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) [[K:%.*]], i64 2)
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
ret i32 %conv
}
-define i32 @strncmp_memcmp_bad4([4 x i8]* dereferenceable (4) %buf) {
+define i32 @strncmp_memcmp_bad4([4 x i8]* dereferenceable (4) %buf) nofree nosync {
; CHECK-LABEL: @strncmp_memcmp_bad4(
; CHECK-NEXT: [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
; CHECK-NEXT: [[CALL:%.*]] = call i32 @strncmp(i8* noundef nonnull [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 2)
}
; Make sure that we can safely PRE a speculable load across a guard.
-define void @safe_pre_across_guard(i8* noalias nocapture readonly dereferenceable(8) %p, i1 %load.is.valid) {
+define void @safe_pre_across_guard(i8* noalias nocapture readonly dereferenceable(8) %p, i1 %load.is.valid) nofree nosync {
; CHECK-LABEL: @safe_pre_across_guard(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOADED_PR:%.*]] = load i8, i8* [[P:%.*]], align 1
}
; Make sure that we can safely PRE a speculable load across a call.
-define void @safe_pre_across_call(i8* noalias nocapture readonly dereferenceable(8) %p) {
+define void @safe_pre_across_call(i8* noalias nocapture readonly dereferenceable(8) %p) nofree nosync {
; CHECK-LABEL: @safe_pre_across_call(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOADED_PR:%.*]] = load i8, i8* [[P:%.*]], align 1
ret void
}
-attributes #0 = { nounwind uwtable }
+attributes #0 = { nounwind uwtable nofree nosync }
!0 = !{i64 4}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
; all bytes of %dst that are touch by the memset are dereferenceable
-define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) {
+define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) nofree nosync {
; CHECK-LABEL: @must_remove_memcpy(
; CHECK-NEXT: [[SRC:%.*]] = alloca [4096 x i8], align 1
; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0
; memset touch more bytes than those guaranteed to be dereferenceable
; We can't remove the memcpy, but we can turn it into an independent memset.
-define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) {
+define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) nofree nosync {
; CHECK-LABEL: @must_not_remove_memcpy(
; CHECK-NEXT: [[SRC:%.*]] = alloca [4096 x i8], align 1
; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0
; X86-NEXT: ret i1 [[TMP2]]
;
%S* nocapture readonly dereferenceable(16) %a,
- %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+ %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
entry:
%ptr = alloca i32
; CHECK-NEXT: ret i1 [[TMP13]]
;
%S* nocapture readonly dereferenceable(16) %a,
- %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+ %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
entry:
%first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3
%0 = load i32, i32* %first.i, align 4
; bb1 references a gep introduced in bb0. The gep must remain available after
; the merge.
-define i1 @bug(%Triple* nonnull dereferenceable(16) %lhs, %Triple* nonnull dereferenceable(16) %rhs) {
+define i1 @bug(%Triple* nonnull dereferenceable(16) %lhs, %Triple* nonnull dereferenceable(16) %rhs) nofree nosync {
; CHECK-LABEL: @bug(
; CHECK-NEXT: bb0:
; CHECK-NEXT: store i32 1, i32* @g, align 4
%struct.inner = type { i32, i32, i32 }
; Function Attrs: nounwind uwtable
-define i1 @test(%struct.outer* align 8 dereferenceable(16) %o1, %struct.outer* align 8 dereferenceable(116) %o2) local_unnamed_addr #0 {
+define i1 @test(%struct.outer* align 8 dereferenceable(16) %o1, %struct.outer* align 8 dereferenceable(116) %o2) local_unnamed_addr nofree nosync {
; CHECK-LABEL: @test(
; CHECK-NEXT: "entry+if.then":
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_OUTER:%.*]], %struct.outer* [[O1:%.*]], i64 0, i32 0
; X86-NEXT: ret i1 [[TMP7]]
;
%S* nocapture readonly dereferenceable(16) %a,
- %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+ %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
entry:
%first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0
%0 = load i32, i32* %first.i, align 4
; X86-NOBUILTIN-NEXT: ret i1 [[TMP4]]
;
%S* nocapture readonly dereferenceable(8) %a,
- %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+ %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr nofree nosync {
entry:
%first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0
%0 = load i32, i32* %first.i, align 4
; X86-NOBUILTIN-NEXT: ret i1 [[TMP4]]
;
%S* nocapture readonly dereferenceable(8) %a,
- %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+ %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr nofree nosync {
entry:
%first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1
%0 = load i32, i32* %first.i, align 4
;
; Make sure this call is moved to the beginning of the entry block.
%S* nocapture readonly dereferenceable(16) %a,
- %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+ %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync {
entry:
%first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0
%0 = load i32, i32* %first.i, align 4
ret i8* %x10
}
-define i32* @test5(i32 %a, i32 %b, i32 %c, i32* dereferenceable(10) %ptr1, i32* dereferenceable(10) %ptr2, i32** dereferenceable(10) align 8 %ptr3) {
+define i32* @test5(i32 %a, i32 %b, i32 %c, i32* dereferenceable(10) %ptr1, i32* dereferenceable(10) %ptr2, i32** dereferenceable(10) align 8 %ptr3) nofree nosync {
; CHECK-LABEL: @test5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T1:%.*]] = icmp eq i32 [[B:%.*]], 0
; This load can be moved above the call because the function won't write to it
; and the a_arg is dereferenceable.
-define fastcc i32 @raise_load_5(i32* dereferenceable(4) align 4 %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+define fastcc i32 @raise_load_5(i32* dereferenceable(4) align 4 %a_arg, i32 %a_len_arg, i32 %start_arg) readonly nofree nosync {
; CHECK-LABEL: @raise_load_5(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
ret double %r
}
-define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
ret <4 x float> %r
}
-define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) {
+define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; Element type does not change cost.
-define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) {
+define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
; Pointer type does not change cost.
-define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceable(16) %p) {
+define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; This is canonical form for vector element access.
-define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; Should work with addrspace as well.
-define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) {
+define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float> addrspace(44)* [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; If there are enough dereferenceable bytes, we can offset the vector load.
-define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) nofree nosync {
; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>*
; Can't safely load the offset vector, but can load+shuffle if it is profitable.
-define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(17) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(17) %p) nofree nosync {
; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref(
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 2
; Verify that alignment of the new load is not over-specified.
-define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 dereferenceable(16) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 dereferenceable(16) %p) nofree nosync {
; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 8
; must be a multiple of element size.
; TODO: Could bitcast around this limitation.
-define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(16) %p) {
+define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[GEP]] to i32*
ret <4 x i32> %r
}
-define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) {
+define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync {
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
; must be a multiple of element size and the offset must be low enough to fit in the vector
; (bitcasting would not help this case).
-define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) {
+define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync {
; CHECK-LABEL: @gep013_bitcast_load_i32_insert_v4i32(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 13
; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[GEP]] to i32*
; If there are enough dereferenceable bytes, we can offset the vector load.
-define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) {
+define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) nofree nosync {
; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>*
; Negative test - can't safely load the offset vector, but could load+shuffle.
-define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) {
+define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) nofree nosync {
; CHECK-LABEL: @gep10_load_i16_insert_v8i16_deref(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16
; Negative test - do not alter volatile.
-define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32_volatile(
; CHECK-NEXT: [[S:%.*]] = load volatile float, float* [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> poison, float [[S]], i32 0
; Pointer is not as aligned as load, but that's ok.
; The new load uses the larger alignment value.
-define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32_align(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; Negative test - not enough bytes.
-define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(15) %p) {
+define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(15) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32_deref(
; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> poison, float [[S]], i32 0
ret <4 x float> %r
}
-define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) {
+define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_i32_insert_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
ret <8 x i32> %r
}
-define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) {
+define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_i32_insert_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i32> %r
}
-define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) {
+define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v16f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
ret <16 x float> %r
}
-define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) {
+define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v2f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
}
declare float* @getscaleptr()
-define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr, <2 x float>* nocapture nonnull readonly %opptr) {
+define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr, <2 x float>* nocapture nonnull readonly %opptr) nofree nosync {
; CHECK-LABEL: @PR47558_multiple_use_load(
; CHECK-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) float* @getscaleptr()
; CHECK-NEXT: [[OP:%.*]] = load <2 x float>, <2 x float>* [[OPPTR:%.*]], align 4
ret void
}
-define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float>* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
ret <4 x float> %r
}
-define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float>* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
ret <4 x float> %r
}
-define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 dereferenceable(16) %p, <1 x i32>* %store_ptr) {
+define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 dereferenceable(16) %p, <1 x i32>* %store_ptr) nofree nosync {
; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use(
; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, <1 x i32>* [[P:%.*]], align 4
; CHECK-NEXT: store <1 x i32> [[L]], <1 x i32>* [[STORE_PTR:%.*]], align 4
; Can't safely load the offset vector, but can load+shuffle if it is profitable.
-define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 dereferenceable(16) %p) {
+define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 dereferenceable(16) %p) nofree nosync {
; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[P:%.*]], i64 1
; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[GEP]], i32 0, i32 0
ret double %r
}
-define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
ret <4 x float> %r
}
-define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) {
+define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; Element type does not change cost.
-define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) {
+define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
; Pointer type does not change cost.
-define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceable(16) %p) {
+define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; This is canonical form for vector element access.
-define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @gep00_load_f32_insert_v4f32(<4 x float>* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; Should work with addrspace as well.
-define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) {
+define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(44)* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float> addrspace(44)* [[P:%.*]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; If there are enough dereferenceable bytes, we can offset the vector load.
-define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) nofree nosync {
; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>*
; Can't safely load the offset vector, but can load+shuffle if it is profitable.
-define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(17) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(17) %p) nofree nosync {
; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref(
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 2
; Verify that alignment of the new load is not over-specified.
-define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 dereferenceable(16) %p) {
+define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 dereferenceable(16) %p) nofree nosync {
; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 8
ret <4 x i32> %r
}
-define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) {
+define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync {
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
; must be a multiple of element size and the offset must be low enough to fit in the vector
; (bitcasting would not help this case).
-define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) {
+define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) nofree nosync {
; CHECK-LABEL: @gep013_bitcast_load_i32_insert_v4i32(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 13
; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[GEP]] to i32*
; If there are enough dereferenceable bytes, we can offset the vector load.
-define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) {
+define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) nofree nosync {
; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP]] to <8 x i16>*
; Negative test - disable under asan because widened load can cause spurious
; use-after-poison issues when __asan_poison_memory_region is used.
-define <8 x i16> @gep10_load_i16_insert_v8i16_asan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_address {
+define <8 x i16> @gep10_load_i16_insert_v8i16_asan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_address nofree nosync {
; CHECK-LABEL: @gep10_load_i16_insert_v8i16_asan(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16
; hwasan and memtag should be similarly suppressed.
-define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_hwaddress {
+define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_hwaddress nofree nosync {
; CHECK-LABEL: @gep10_load_i16_insert_v8i16_hwasan(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16
ret <8 x i16> %r
}
-define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_memtag {
+define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_memtag nofree nosync {
; CHECK-LABEL: @gep10_load_i16_insert_v8i16_memtag(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16
; Negative test - disable under tsan because widened load may overlap bytes
; being concurrently modified. tsan does not know that some bytes are undef.
-define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_thread {
+define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_thread nofree nosync {
; CHECK-LABEL: @gep10_load_i16_insert_v8i16_tsan(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16
; Negative test - can't safely load the offset vector, but could load+shuffle.
-define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) {
+define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) nofree nosync {
; CHECK-LABEL: @gep10_load_i16_insert_v8i16_deref(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 16
; Negative test - do not alter volatile.
-define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32_volatile(
; CHECK-NEXT: [[S:%.*]] = load volatile float, float* [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
; Pointer is not as aligned as load, but that's ok.
; The new load uses the larger alignment value.
-define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) {
+define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32_align(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; Negative test - not enough bytes.
-define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(15) %p) {
+define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(15) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v4f32_deref(
; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
ret <4 x float> %r
}
-define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) {
+define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_i32_insert_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
ret <8 x i32> %r
}
-define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) {
+define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @casted_load_i32_insert_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i32> %r
}
-define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) {
+define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v16f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
ret <16 x float> %r
}
-define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) {
+define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_f32_insert_v2f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
ret void
}
-define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float>* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
ret <4 x float> %r
}
-define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) {
+define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) nofree nosync {
; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float>* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
ret <4 x float> %r
}
-define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 dereferenceable(16) %p, <1 x i32>* %store_ptr) {
+define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 dereferenceable(16) %p, <1 x i32>* %store_ptr) nofree nosync {
; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use(
; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, <1 x i32>* [[P:%.*]], align 4
; CHECK-NEXT: store <1 x i32> [[L]], <1 x i32>* [[STORE_PTR:%.*]], align 4
; Can't safely load the offset vector, but can load+shuffle if it is profitable.
-define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 dereferenceable(16) %p) {
+define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 dereferenceable(16) %p) nofree nosync {
; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[P:%.*]], i64 1
; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[GEP]], i32 0, i32 0