* The ``inalloca`` attribute now has a mandatory type field, similar
to ``byval`` and ``sret``.
-
Changes to building LLVM
------------------------
Changes to TableGen
-------------------
+Changes to Backend Code Generation
+----------------------------------
+
+* When lowering calls, only ABI attributes on the call itself are checked, not
+ the caller. Frontends need to make sure to properly set ABI attributes on
+ calls (and always should have).
+
Changes to the ARM Backend
--------------------------
return true;
}
-/// Set CallLoweringInfo attribute flags based on a call instruction
-/// and called function attributes.
+/// Set CallLoweringInfo attribute flags based on the call instruction's
+/// argument attributes.
void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
unsigned ArgIdx) {
- IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
- IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
- IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
- IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
- IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
- IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
- IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
- IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
- IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
- IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
- IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
- Alignment = Call->getParamStackAlign(ArgIdx);
+ auto Attrs = Call->getAttributes();
+
+ IsSExt = Attrs.hasParamAttribute(ArgIdx, Attribute::SExt);
+ IsZExt = Attrs.hasParamAttribute(ArgIdx, Attribute::ZExt);
+ IsInReg = Attrs.hasParamAttribute(ArgIdx, Attribute::InReg);
+ IsSRet = Attrs.hasParamAttribute(ArgIdx, Attribute::StructRet);
+ IsNest = Attrs.hasParamAttribute(ArgIdx, Attribute::Nest);
+ IsReturned = Attrs.hasParamAttribute(ArgIdx, Attribute::Returned);
+ IsSwiftSelf = Attrs.hasParamAttribute(ArgIdx, Attribute::SwiftSelf);
+ IsSwiftError = Attrs.hasParamAttribute(ArgIdx, Attribute::SwiftError);
+ Alignment = Attrs.getParamStackAlignment(ArgIdx);
+
+ IsByVal = Attrs.hasParamAttribute(ArgIdx, Attribute::ByVal);
ByValType = nullptr;
if (IsByVal) {
ByValType = Call->getParamByValType(ArgIdx);
if (!Alignment)
Alignment = Call->getParamAlign(ArgIdx);
}
+ IsInAlloca = Attrs.hasParamAttribute(ArgIdx, Attribute::InAlloca);
+ IsPreallocated = Attrs.hasParamAttribute(ArgIdx, Attribute::Preallocated);
PreallocatedType = nullptr;
if (IsPreallocated)
PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
; CHECK-NOT: mov x0, {{x[0-9]+}}
; CHECK: b {{_?B_ctor_base}}
%0 = bitcast %struct.C* %this to %struct.A*
- %call = tail call %struct.A* @A_ctor_base(%struct.A* %0)
+ %call = tail call %struct.A* @A_ctor_base(%struct.A* returned %0)
%1 = getelementptr inbounds %struct.C, %struct.C* %this, i32 0, i32 0
- %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x)
+ %call2 = tail call %struct.B* @B_ctor_base(%struct.B* returned %1, i32 %x)
ret %struct.C* %this
}
entry:
; CHECK-LABEL: C_ctor_complete:
; CHECK: b {{_?C_ctor_base}}
- %call = tail call %struct.C* @C_ctor_base(%struct.C* %this, i32 %x)
+ %call = tail call %struct.C* @C_ctor_base(%struct.C* returned %this, i32 %x)
ret %struct.C* %this
}
; CHECK-NOT: mov x0, {{x[0-9]+}}
; CHECK: b {{_?B_ctor_complete}}
%b = getelementptr inbounds %struct.D, %struct.D* %this, i32 0, i32 0
- %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
- %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
+ %call = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x)
+ %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x)
ret %struct.D* %this
}
; CHECK-LABEL: E_ctor_base:
; CHECK-NOT: b {{_?B_ctor_complete}}
%b = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 0
- %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
+ %call = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x)
%b2 = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 1
- %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b2, i32 %x)
+ %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b2, i32 %x)
ret %struct.E* %this
}
define void @test11(i64 %a) {
%tmp = lshr i64 %a, 23
%res = trunc i64 %tmp to i16
- call void @use(i16 %res, i64 %tmp)
+ call void @use(i16 signext %res, i64 %tmp)
ret void
}
; CHECK-LABEL: _test_tailcall_explicit_sret:
; CHECK-NEXT: b _test_explicit_sret
define void @test_tailcall_explicit_sret(i1024* sret(i1024) %arg) #0 {
- tail call void @test_explicit_sret(i1024* %arg)
+ tail call void @test_explicit_sret(i1024* sret(i1024) %arg)
ret void
}
; CHECK: bl _test_explicit_sret
; CHECK: ret
define void @test_call_explicit_sret(i1024* sret(i1024) %arg) #0 {
- call void @test_explicit_sret(i1024* %arg)
+ call void @test_explicit_sret(i1024* sret(i1024) %arg)
ret void
}
; CHECK: ret
define void @test_tailcall_explicit_sret_alloca_unused() #0 {
%l = alloca i1024, align 8
- tail call void @test_explicit_sret(i1024* %l)
+ tail call void @test_explicit_sret(i1024* sret(i1024) %l)
ret void
}
%l = alloca i1024, align 8
%r = load i1024, i1024* %ptr, align 8
store i1024 %r, i1024* %l, align 8
- tail call void @test_explicit_sret(i1024* %l)
+ tail call void @test_explicit_sret(i1024* sret(i1024) %l)
ret void
}
; CHECK: ret
define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 {
%ptr2 = getelementptr i1024, i1024* %ptr, i32 1
- tail call void @test_explicit_sret(i1024* %ptr2)
+ tail call void @test_explicit_sret(i1024* sret(i1024) %ptr2)
ret void
}
; CHECK: ret
define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
%l = alloca i1024, align 8
- tail call void @test_explicit_sret(i1024* %l)
+ tail call void @test_explicit_sret(i1024* sret(i1024) %l)
%r = load i1024, i1024* %l, align 8
ret i1024 %r
}
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
%var = load volatile i1, i1 addrspace(1)* undef
- call void @external_void_func_i1_signext(i1 %var)
+ call void @external_void_func_i1_signext(i1 signext %var)
ret void
}
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
%var = load volatile i1, i1 addrspace(1)* undef
- call void @external_void_func_i1_zeroext(i1 %var)
+ call void @external_void_func_i1_zeroext(i1 zeroext %var)
ret void
}
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
%var = load volatile i8, i8 addrspace(1)* undef
- call void @external_void_func_i8_signext(i8 %var)
+ call void @external_void_func_i8_signext(i8 signext %var)
ret void
}
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
%var = load volatile i8, i8 addrspace(1)* undef
- call void @external_void_func_i8_zeroext(i8 %var)
+ call void @external_void_func_i8_zeroext(i8 zeroext %var)
ret void
}
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
%var = load volatile i16, i16 addrspace(1)* undef
- call void @external_void_func_i16_signext(i16 %var)
+ call void @external_void_func_i16_signext(i16 signext %var)
ret void
}
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
%var = load volatile i16, i16 addrspace(1)* undef
- call void @external_void_func_i16_zeroext(i16 %var)
+ call void @external_void_func_i16_zeroext(i16 zeroext %var)
ret void
}
i32 210, i32 220, i32 230, i32 240,
i32 250, i32 260, i32 270, i32 280,
i32 290, i32 300, i32 310, i32 320,
- i32 addrspace(5)* %alloca)
+ i32 addrspace(5)* byval(i32) %alloca)
ret void
}
i32 210, i32 220, i32 230, i32 240,
i32 250, i32 260, i32 270, i32 280,
i32 290, i32 300, i32 310, i32 320,
- i32 addrspace(5)* %alloca)
+ i32 addrspace(5)* byval(i32) %alloca)
ret void
}
i32 210, i32 220, i32 230, i32 240,
i32 250, i32 260, i32 270, i32 280,
i32 290, i32 300, i32 310, i32 320,
- i32 addrspace(5)* %alloca)
+ i32 addrspace(5)* byval(i32) %alloca)
ret void
}
i32 210, i32 220, i32 230, i32 240,
i32 250, i32 260, i32 270, i32 280,
i32 290, i32 300, i32 310, i32 320,
- i32 addrspace(5)* %alloca)
+ i32 addrspace(5)* byval(i32) %alloca)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%var = load volatile i1, i1 addrspace(1)* undef
- call amdgpu_gfx void @external_void_func_i1_signext(i1 %var)
+ call amdgpu_gfx void @external_void_func_i1_signext(i1 signext%var)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%var = load volatile i1, i1 addrspace(1)* undef
- call amdgpu_gfx void @external_void_func_i1_zeroext(i1 %var)
+ call amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext %var)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%var = load volatile i8, i8 addrspace(1)* undef
- call amdgpu_gfx void @external_void_func_i8_signext(i8 %var)
+ call amdgpu_gfx void @external_void_func_i8_signext(i8 signext %var)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%var = load volatile i8, i8 addrspace(1)* undef
- call amdgpu_gfx void @external_void_func_i8_zeroext(i8 %var)
+ call amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext %var)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%var = load volatile i16, i16 addrspace(1)* undef
- call amdgpu_gfx void @external_void_func_i16_signext(i16 %var)
+ call amdgpu_gfx void @external_void_func_i16_signext(i16 signext %var)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%var = load volatile i16, i16 addrspace(1)* undef
- call amdgpu_gfx void @external_void_func_i16_zeroext(i16 %var)
+ call amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext %var)
ret void
}
%gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1
store i8 3, i8 addrspace(5)* %gep0
store i32 8, i32 addrspace(5)* %gep1
- call amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %val)
+ call amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %val)
ret void
}
%in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1
store i8 3, i8 addrspace(5)* %in.gep0
store i32 8, i32 addrspace(5)* %in.gep1
- call amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %out.val, { i8, i32 } addrspace(5)* %in.val)
+ call amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %out.val, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %in.val)
%out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0
%out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1
%out.val0 = load i8, i8 addrspace(5)* %out.gep0
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_i1_inreg(i1 true)
+ call amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg true)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_i8_inreg(i8 123)
+ call amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg 123)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_i16_inreg(i16 123)
+ call amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg 123)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_i32_inreg(i32 42)
+ call amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg 42)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_i64_inreg(i64 123)
+ call amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg 123)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%val = load <2 x i64>, <2 x i64> addrspace(4)* null
- call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> %val)
+ call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg %val)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> <i64 8589934593, i64 17179869187>)
+ call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg <i64 8589934593, i64 17179869187>)
ret void
}
%load = load <2 x i64>, <2 x i64> addrspace(4)* null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
- call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> %val)
+ call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val)
ret void
}
; GFX10-NEXT: s_setpc_b64 s[4:5]
%load = load <2 x i64>, <2 x i64> addrspace(4)* null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> %val)
+ call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg %val)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_f16_inreg(half 4.0)
+ call amdgpu_gfx void @external_void_func_f16_inreg(half inreg 4.0)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_f32_inreg(float 4.0)
+ call amdgpu_gfx void @external_void_func_f32_inreg(float inreg 4.0)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> <float 1.0, float 2.0>)
+ call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg <float 1.0, float 2.0>)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> <float 1.0, float 2.0, float 4.0>)
+ call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg <float 1.0, float 2.0, float 4.0>)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
+ call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_f64_inreg(double 4.0)
+ call amdgpu_gfx void @external_void_func_f64_inreg(double inreg 4.0)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> <double 2.0, double 4.0>)
+ call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg <double 2.0, double 4.0>)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> <double 2.0, double 4.0, double 8.0>)
+ call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg <double 2.0, double 4.0, double 8.0>)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%val = load <2 x i16>, <2 x i16> addrspace(4)* undef
- call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> %val)
+ call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg %val)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%val = load <3 x i16>, <3 x i16> addrspace(4)* undef
- call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> %val)
+ call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg %val)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%val = load <3 x half>, <3 x half> addrspace(4)* undef
- call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> %val)
+ call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg %val)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> <i16 1, i16 2, i16 3>)
+ call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg <i16 1, i16 2, i16 3>)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> <half 1.0, half 2.0, half 4.0>)
+ call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg <half 1.0, half 2.0, half 4.0>)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%val = load <4 x i16>, <4 x i16> addrspace(4)* undef
- call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> %val)
+ call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg %val)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
+ call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg <i16 1, i16 2, i16 3, i16 4>)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%val = load <2 x half>, <2 x half> addrspace(4)* undef
- call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> %val)
+ call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg %val)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%val = load <2 x i32>, <2 x i32> addrspace(4)* undef
- call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> %val)
+ call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg %val)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> <i32 1, i32 2>)
+ call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg <i32 1, i32 2>)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> <i32 3, i32 4, i32 5>)
+ call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
+ call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>, i32 inreg 6)
ret void
}
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
%val = load <4 x i32>, <4 x i32> addrspace(4)* undef
- call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> %val)
+ call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg %val)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+ call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg <i32 1, i32 2, i32 3, i32 4>)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
+ call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5>)
ret void
}
; GFX10-NEXT: s_setpc_b64 s[4:5]
%ptr = load <8 x i32> addrspace(4)*, <8 x i32> addrspace(4)* addrspace(4)* undef
%val = load <8 x i32>, <8 x i32> addrspace(4)* %ptr
- call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> %val)
+ call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg %val)
ret void
}
; GFX10-NEXT: s_mov_b32 exec_lo, s6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
- call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
+ call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
ret void
}
; GFX10-NEXT: s_setpc_b64 s[4:5]
%ptr = load <16 x i32> addrspace(4)*, <16 x i32> addrspace(4)* addrspace(4)* undef
%val = load <16 x i32>, <16 x i32> addrspace(4)* %ptr
- call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> %val)
+ call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg %val)
ret void
}
; GFX10-NEXT: s_setpc_b64 s[4:5]
%ptr = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef
%val = load <32 x i32>, <32 x i32> addrspace(4)* %ptr
- call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> %val)
+ call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg %val)
ret void
}
%ptr0 = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef
%val0 = load <32 x i32>, <32 x i32> addrspace(4)* %ptr0
%val1 = load i32, i32 addrspace(4)* undef
- call amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> %val0, i32 %val1)
+ call amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg %val0, i32 inreg %val1)
ret void
}
; GCN-NEXT: s_addc_u32 s7, s7, callee@rel32@hi+12
; GCN-NEXT: s_setpc_b64 s[6:7]
%add = fadd float %arg0, 1.0
- %call = tail call amdgpu_gfx float @callee(float %add, float 2.0)
+ %call = tail call amdgpu_gfx float @callee(float %add, float inreg 2.0)
ret float %call
}
; CHECK-NOT: r0
; CHECK: bl returns_r0
; CHECK-NOT: r0
- %b = call i32 @returns_r0(i32 %a)
+ %b = call i32 @returns_r0(i32 returned %a)
ret i32 %a
}
; CHECKT2D: uxth r0, r0
; CHECKT2D: bl _identity32
; CHECKT2D: mov r0, [[SAVEX]]
- %call = tail call i16 @identity16(i16 %x)
+ %call = tail call i16 @identity16(i16 returned %x)
%b = zext i16 %call to i32
- %call2 = tail call i32 @identity32(i32 %b)
+ %call2 = tail call i32 @identity32(i32 returned %b)
ret i16 %x
}
; This shouldn't be required
; CHECKT2D: mov r0, [[SAVEX]]
- %call = tail call i16 @retzext16(i16 %x)
+ %call = tail call i16 @retzext16(i16 returned %x)
%b = zext i16 %call to i32
- %call2 = tail call i32 @identity32(i32 %b)
+ %call2 = tail call i32 @identity32(i32 returned %b)
ret i16 %x
}
; CHECKT2D: sxth r0, {{r[0-9]+}}
; CHECKT2D: bl _identity32
; CHECKT2D: mov r0, [[SAVEX]]
- %call = tail call i16 @retzext16(i16 %x)
+ %call = tail call i16 @retzext16(i16 returned %x)
%b = sext i16 %call to i32
- %call2 = tail call i32 @identity32(i32 %b)
+ %call2 = tail call i32 @identity32(i32 returned %b)
ret i16 %x
}
; CHECKT2D: uxth r0, r0
; CHECKT2D: bl _identity32
; CHECKT2D: b.w _paramzext16
- %call = tail call i16 @paramzext16(i16 %x)
+ %call = tail call i16 @paramzext16(i16 zeroext returned %x)
%b = zext i16 %call to i32
- %call2 = tail call i32 @identity32(i32 %b)
- %call3 = tail call i16 @paramzext16(i16 %call)
+ %call2 = tail call i32 @identity32(i32 returned %b)
+ %call3 = tail call i16 @paramzext16(i16 zeroext returned %call)
ret i16 %call3
}
; CHECKT2D: bl _paramzext16
; CHECKT2D: bl _identity32
; CHECKT2D: b.w _paramzext16
- %call = tail call i16 @paramzext16(i16 %x)
+ %call = tail call i16 @paramzext16(i16 zeroext returned %x)
; Should make no difference if %x is used below rather than %call, but it does
%b = zext i16 %x to i32
%call2 = tail call i32 @identity32(i32 %b)
- %call3 = tail call i16 @paramzext16(i16 %call)
+ %call3 = tail call i16 @paramzext16(i16 zeroext returned %call)
ret i16 %call3
}
; FIXME: Tail call should be OK here
; CHECKT2D: bl _identity32
- %call = tail call i16 @bothzext16(i16 %x)
+ %call = tail call i16 @bothzext16(i16 zeroext returned %x)
%b = zext i16 %x to i32
- %call2 = tail call i32 @identity32(i32 %b)
+ %call2 = tail call i32 @identity32(i32 returned %b)
ret i16 %call
}
; CHECKT2D: sxth r0, [[SAVEX]]
; CHECKT2D: bl _identity32
; CHECKT2D: mov r0, [[SAVEX]]
- %call = tail call i16 @bothzext16(i16 %x)
+ %call = tail call i16 @bothzext16(i16 zeroext returned %x)
%b = sext i16 %x to i32
- %call2 = tail call i32 @identity32(i32 %b)
+ %call2 = tail call i32 @identity32(i32 returned %b)
ret i16 %x
}
; CHECKT2D-NOT: mov r0, {{r[0-9]+}}
; CHECKT2D: b.w _B_ctor_base
%0 = bitcast %struct.C* %this to %struct.A*
- %call = tail call %struct.A* @A_ctor_base(%struct.A* %0)
+ %call = tail call %struct.A* @A_ctor_base(%struct.A* returned %0)
%1 = getelementptr inbounds %struct.C, %struct.C* %this, i32 0, i32 0
- %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x)
+ %call2 = tail call %struct.B* @B_ctor_base(%struct.B* returned %1, i32 %x)
ret %struct.C* %this
}
; CHECKELF: b C_ctor_base
; CHECKT2D-LABEL: C_ctor_complete:
; CHECKT2D: b.w _C_ctor_base
- %call = tail call %struct.C* @C_ctor_base(%struct.C* %this, i32 %x)
+ %call = tail call %struct.C* @C_ctor_base(%struct.C* returned %this, i32 %x)
ret %struct.C* %this
}
; CHECKT2D-NOT: mov r0, {{r[0-9]+}}
; CHECKT2D: b.w _B_ctor_complete
%b = getelementptr inbounds %struct.D, %struct.D* %this, i32 0, i32 0
- %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
- %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
+ %call = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x)
+ %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x)
ret %struct.D* %this
}
; CHECKT2D-LABEL: E_ctor_base:
; CHECKT2D-NOT: b.w _B_ctor_complete
%b = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 0
- %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
+ %call = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x)
%b2 = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 1
- %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b2, i32 %x)
+ %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b2, i32 %x)
ret %struct.E* %this
}
; CHECK-NOT: add %sp
; CHECK: restore
define void @call_intarg(i32 %i0, i8* %i1) {
- call void @intarg(i8 0, i8 1, i16 2, i32 3, i8* undef, i32 5, i32 %i0, i8* %i1)
+ call void @intarg(i8 0, i8 1, i16 2, i32 3, i8* undef, i32 5, i32 signext %i0, i8* %i1)
ret void
}
; SOFT: or %i1, %i0, %o0
; CHECK: call inreg_fi
define void @call_inreg_fi(i32* %p, i32 %i1, float %f5) {
- %x = call i32 @inreg_fi(i32 %i1, float %f5)
+ %x = call i32 @inreg_fi(i32 inreg %i1, float inreg %f5)
ret void
}
; SOFT: or %i1, %i0, %o0
; CHECK: call inreg_ff
define void @call_inreg_ff(i32* %p, float %f3, float %f5) {
- %x = call float @inreg_ff(float %f3, float %f5)
+ %x = call float @inreg_ff(float inreg %f3, float inreg %f5)
ret void
}
; SOFT: or %i1, %i0, %o0
; CHECK: call inreg_if
define void @call_inreg_if(i32* %p, float %f3, i32 %i2) {
- %x = call i32 @inreg_if(float %f3, i32 %i2)
+ %x = call i32 @inreg_if(float inreg %f3, i32 inreg %i2)
ret void
}
; CHECK: or [[R1]], [[R2]], %o0
; CHECK: call inreg_ii
define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) {
- %x = call i32 @inreg_ii(i32 %i1, i32 %i2)
+ %x = call i32 @inreg_ii(i32 inreg %i1, i32 inreg %i2)
ret void
}
; CHECK-STACK: mvghi 160(%r15), -5
; CHECK-STACK: brasl %r14, bar@PLT
- call void @bar (i8 -1, i16 -2, i32 -3, i64 -4, float 0.0, double 0.0,
+ call void @bar (i8 signext -1, i16 signext -2, i32 signext -3, i64 -4, float 0.0, double 0.0,
fp128 0xL00000000000000000000000000000000, i64 -5,
- float -0.0, double -0.0, i8 -6, i16 -7, i32 -8, i64 -9,
+ float -0.0, double -0.0, i8 signext -6, i16 signext -7, i32 signext -8, i64 -9,
float 0.0, double 0.0,
fp128 0xL00000000000000000000000000000000)
ret void
; CHECK-STACK: mvghi 160(%r15), -5
; CHECK-STACK: brasl %r14, bar@PLT
- call void @bar (i8 -1, i16 -2, i32 -3, i64 -4, float 0.0, double 0.0,
+ call void @bar (i8 zeroext -1, i16 zeroext -2, i32 zeroext -3, i64 -4, float 0.0, double 0.0,
fp128 0xL00000000000000000000000000000000, i64 -5,
- float -0.0, double -0.0, i8 -6, i16 -7, i32 -8, i64 -9,
+ float -0.0, double -0.0, i8 zeroext -6, i16 zeroext -7, i32 zeroext -8, i64 -9,
float 0.0, double 0.0,
fp128 0xL00000000000000000000000000000000)
ret void
define x86_fastcallcc void @caller(i32, i64) {
%X = alloca i32 ; <i32*> [#uses=1]
- call x86_fastcallcc void @func( i32* %X, i64 0 )
+ call x86_fastcallcc void @func( i32* %X, i64 inreg 0 )
ret void
}
declare x86_fastcallcc i64 @callee(i64 inreg)
define i64 @caller() {
- %X = call x86_fastcallcc i64 @callee( i64 4294967299 ) ; <i64> [#uses=1]
+ %X = call x86_fastcallcc i64 @callee( i64 inreg 4294967299 ) ; <i64> [#uses=1]
; CHECK: mov{{.*}}edx, 1
ret i64 %X
}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
+
+; This tests that we only look at the call site for ABI attributes, so f and f2 should codegen differently
+
+define void @b(i8* byval(i8) %p) {
+; CHECK-LABEL: b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: retq
+ ret void
+}
+
+define void @f(i8 %p) {
+; CHECK-LABEL: f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-NEXT: movb %al, (%rsp)
+; CHECK-NEXT: callq b@PLT
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+ %a = alloca i8
+ ;store i8 %p, i8* %a
+ call void @b(i8* byval(i8) %a)
+ ret void
+}
+
+define void @f2(i8 %p) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT: callq b@PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+ %a = alloca i8
+ ;store i8 %p, i8* %a
+ call void @b(i8* %a)
+ ret void
+}
+
; NORMAL-NEXT: addl $12, %esp
define void @test4() optsize {
entry:
- call void @inreg(i32 1, i32 2, i32 3, i32 4)
+ call void @inreg(i32 1, i32 inreg 2, i32 3, i32 4)
ret void
}
define void @test12() optsize {
entry:
%s = alloca %struct.s, align 4
- call void @struct(%struct.s* %s, i32 2, i32 3, i32 4)
+ call void @struct(%struct.s* byval(%struct.s) %s, i32 2, i32 3, i32 4)
call void @good(i32 5, i32 6, i32 7, i32 8)
- call void @struct(%struct.s* %s, i32 10, i32 11, i32 12)
+ call void @struct(%struct.s* byval(%struct.s) %s, i32 10, i32 11, i32 12)
ret void
}
entry:
%s = alloca %struct.s, align 4
call void @good(i32 1, i32 2, i32 3, i32 4)
- call void @struct(%struct.s* %s, i32 6, i32 7, i32 8)
+ call void @struct(%struct.s* byval(%struct.s) %s, i32 6, i32 7, i32 8)
call void @good(i32 9, i32 10, i32 11, i32 12)
ret void
}
%0 = bitcast %struct.A* %a to i64*
%1 = load i64, i64* %0, align 4
store i64 %1, i64* %agg.tmp, align 4
- %call = call x86_thiscallcc %struct.B* @B_ctor(%struct.B* %ref.tmp, %struct.A* byval(%struct.A) %tmpcast)
+ %call = call x86_thiscallcc %struct.B* @B_ctor(%struct.B* returned %ref.tmp, %struct.A* byval(%struct.A) %tmpcast)
%2 = getelementptr inbounds %struct.B, %struct.B* %tmp, i32 0, i32 0
call void @B_func(%struct.B* sret(%struct.B) %tmp, %struct.B* %ref.tmp, i32 1)
ret void
; CHECK-DAG: movl {{.*}}, %edx
; CHECK: calll _spill
%i = call i32 @param2_ret(i32 1, i32 2)
- call void @spill(i32 %a, i32 %b, i32 %c)
+ call void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c)
ret void
}
; CHECK: pushl [[REGISTER2]]
; CHECK: calll _init
- call void @foo_ret_p(%Foo* %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)]
+ call void @foo_ret_p(%Foo* sret(%Foo) %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)]
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: calll _foo_ret_p
- call void @foo_ret_p(%Foo* %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)]
+ call void @foo_ret_p(%Foo* sret(%Foo) %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)]
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: calll _foo_ret_p
; CHECK: subl $8, %esp
; CHECK: movl $9, %eax
; CHECK: calll _foo_inreg_p
- call void @foo_inreg_p(i32 9, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+ call void @foo_inreg_p(i32 inreg 9, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}
declare x86_fastcallcc void @fastcall2(i32 inreg %a, i32 inreg %b)
define void @cdecl_fastcall_tail(i32 %a, i32 %b) {
- tail call x86_fastcallcc void @fastcall2(i32 %a, i32 %b)
+ tail call x86_fastcallcc void @fastcall2(i32 inreg %a, i32 inreg %b)
ret void
}
; fastcall2 won't pop anything.