-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; FUNC-LABEL: {{^}}test_kernel:
+; GCN-LABEL: {{^}}test_default_si:
+; GCN: FloatMode: 192
+; GCN: IeeeMode: 0
+define void @test_default_si(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 {
+ store float 0.0, float addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
-; DEFAULT: FloatMode: 192
-; DEFAULT: IeeeMode: 0
+; GCN-LABEL: {{^}}test_default_vi:
+; GCN: FloatMode: 192
+; GCN: IeeeMode: 0
+define void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 {
+ store float 0.0, float addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
-; FP64-DENORMAL: FloatMode: 192
-; FP64-DENORMAL: IeeeMode: 0
+; GCN-LABEL: {{^}}test_f64_denormals:
+; GCN: FloatMode: 192
+; GCN: IeeeMode: 0
+define void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 {
+ store float 0.0, float addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
-; FP32-DENORMAL: FloatMode: 48
-; FP32-DENORMAL: IeeeMode: 0
+; GCN-LABEL: {{^}}test_f32_denormals:
+; GCNL: FloatMode: 48
+; GCN: IeeeMode: 0
+define void @test_f32_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #3 {
+ store float 0.0, float addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
-; BOTH-DENORMAL: FloatMode: 240
-; BOTH-DENORMAL: IeeeMode: 0
+; GCN-LABEL: {{^}}test_f32_f64_denormals:
+; GCN: FloatMode: 240
+; GCN: IeeeMode: 0
+define void @test_f32_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #4 {
+ store float 0.0, float addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
-; NO-DENORMAL: FloatMode: 0
-; NO-DENORMAL: IeeeMode: 0
-define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
+; GCN-LABEL: {{^}}test_no_denormals
+; GCN: FloatMode: 0
+; GCN: IeeeMode: 0
+define void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #5 {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1
ret void
}
+
+attributes #0 = { nounwind "target-cpu"="tahiti" }
+attributes #1 = { nounwind "target-cpu"="fiji" }
+attributes #2 = { nounwind "target-features"="+fp64-denormals" }
+attributes #3 = { nounwind "target-features"="+fp32-denormals" }
+attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
+attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
-; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=NODENORM %s
-; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=DENORM %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare float @llvm.canonicalize.f32(float) #0
declare double @llvm.canonicalize.f64(double) #0
ret void
}
-; GCN-LABEL: {{^}}test_fold_canonicalize_denormal0_f32:
-; NODENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
-; DENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}}
+; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dword [[REG]]
+define void @test_no_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 {
+ %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
+ store float %canonicalized, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}}
; GCN: buffer_store_dword [[REG]]
-define void @test_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 {
+define void @test_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #3 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
store float %canonicalized, float addrspace(1)* %out
ret void
}
-; GCN-LABEL: {{^}}test_fold_canonicalize_denormal1_f32:
-; NODENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
-; DENORM: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}}
+; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dword [[REG]]
+define void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 {
+ %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
+ store float %canonicalized, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f32:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}}
; GCN: buffer_store_dword [[REG]]
-define void @test_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 {
+define void @test_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #3 {
%canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
store float %canonicalized, float addrspace(1)* %out
ret void
ret void
}
-; GCN-LABEL: {{^}}test_fold_canonicalize_denormal0_f64:
-; DENORM-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
-; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
+; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64:
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
+; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 {
+ %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
-; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
+; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64:
+; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @test_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #1 {
+define void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
store double %canonicalized, double addrspace(1)* %out
ret void
}
-; GCN-LABEL: {{^}}test_fold_canonicalize_denormal1_f64:
-; DENORM-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
-; DENORM-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
+; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64:
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
+; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 {
+ %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
-; NODENORM: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; NODENORM: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
+; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64:
+; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
-define void @test_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #1 {
+define void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 {
%canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
store double %canonicalized, double addrspace(1)* %out
ret void
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
+attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
+attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
ret void
}
-; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64:
-; FIXME-CI: v_floor_f64_e32
-; FIXME-CI: v_floor_f64_e32
-; FIXME-CI: v_floor_f64_e32
-; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
-; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
-; store <3 x double> %y, <3 x double> addrspace(1)* %out
-; ret void
-; }
+; FUNC-LABEL: {{^}}ffloor_v3f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI-NOT: v_floor_f64_e32
+define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
+ %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
+ store <3 x double> %y, <3 x double> addrspace(1)* %out
+ ret void
+}
; FUNC-LABEL: {{^}}ffloor_v4f64:
; CI: v_floor_f64_e32
-; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn-unknown-amdhsa -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=COMMON %s
+; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; COMMON-LABEL: {{^}}test_kernel:
-; COMMON-DENORMAL: compute_pgm_rsrc1_float_mode = compute_pgm_rsrc1_float_mode = 192
-; COMMON-DENORMAL: compute_pgm_rsrc1_dx10_clamp = 1
-define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
+; GCN-LABEL: {{^}}test_default_ci:
+; GCN: compute_pgm_rsrc1_float_mode = 192
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_default_ci(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1
ret void
}
+
+; GCN-LABEL: {{^}}test_default_vi:
+; GCN: compute_pgm_rsrc1_float_mode = 192
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 {
+ store float 0.0, float addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_f64_denormals:
+; GCN: compute_pgm_rsrc1_float_mode = 192
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 {
+ store float 0.0, float addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_f32_denormals:
+; GCN: compute_pgm_rsrc1_float_mode = 48
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_f32_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #3 {
+ store float 0.0, float addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_f32_f64_denormals:
+; GCN: compute_pgm_rsrc1_float_mode = 240
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_f32_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #4 {
+ store float 0.0, float addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_no_denormals:
+; GCN: compute_pgm_rsrc1_float_mode = 0
+; GCN: compute_pgm_rsrc1_dx10_clamp = 1
+; GCN: compute_pgm_rsrc1_ieee_mode = 0
+define void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #5 {
+ store float 0.0, float addrspace(1)* %out0
+ store double 0.0, double addrspace(1)* %out1
+ ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="kaveri" }
+attributes #1 = { nounwind "target-cpu"="fiji" }
+attributes #2 = { nounwind "target-features"="-fp32-denormals,+fp64-denormals" }
+attributes #3 = { nounwind "target-features"="+fp32-denormals,-fp64-denormals" }
+attributes #4 = { nounwind "target-features"="+fp32-denormals,+fp64-denormals" }
+attributes #5 = { nounwind "target-features"="-fp32-denormals,-fp64-denormals" }
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=amdgcn -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare float @llvm.amdgcn.rcp.f32(float) #0
declare double @llvm.amdgcn.rcp.f64(double) #0
declare double @llvm.sqrt.f64(double) #0
declare float @llvm.sqrt.f32(float) #0
+; FUNC-LABEL: {{^}}rcp_undef_f32:
+; SI-NOT: v_rcp_f32
+define void @rcp_undef_f32(float addrspace(1)* %out) #1 {
+ %rcp = call float @llvm.amdgcn.rcp.f32(float undef)
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
-; FUNC-LABEL: {{^}}rcp_f32:
-; SI: v_rcp_f32_e32
-define void @rcp_f32(float addrspace(1)* %out, float %src) #1 {
- %rcp = call float @llvm.amdgcn.rcp.f32(float %src) #0
+; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32:
+; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dword [[RESULT]]
+define void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 {
+ %rcp = fdiv float 1.0, %src
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: {{^}}rcp_pat_f32:
+; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32:
+; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dword [[RESULT]]
+define void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 {
+ %rcp = fdiv float 1.0, %src
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
-; SI-SAFE: v_rcp_f32_e32
-; XSI-SAFE-SPDENORM-NOT: v_rcp_f32_e32
-define void @rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
+; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32:
+; SI: v_div_scale_f32
+define void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 {
%rcp = fdiv float 1.0, %src
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: {{^}}rsq_rcp_pat_f32:
-; SI-UNSAFE: v_rsq_f32_e32
-; SI-SAFE: v_sqrt_f32_e32
-; SI-SAFE: v_rcp_f32_e32
-define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
- %sqrt = call float @llvm.sqrt.f32(float %src) #0
- %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) #0
+; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32:
+; SI: v_sqrt_f32_e32
+; SI: v_rcp_f32_e32
+define void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
+ %sqrt = call float @llvm.sqrt.f32(float %src)
+ %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32:
+; SI: v_rsq_f32_e32
+define void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 {
+ %sqrt = call float @llvm.sqrt.f32(float %src)
+ %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}rcp_f64:
-; SI: v_rcp_f64_e32
+; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
define void @rcp_f64(double addrspace(1)* %out, double %src) #1 {
- %rcp = call double @llvm.amdgcn.rcp.f64(double %src) #0
+ %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
+ store double %rcp, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_rcp_f64:
+; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 {
+ %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
store double %rcp, double addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}rcp_pat_f64:
-; SI: v_rcp_f64_e32
+; SI: v_div_scale_f64
define void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
%rcp = fdiv double 1.0, %src
store double %rcp, double addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: {{^}}rsq_rcp_pat_f64:
-; SI-UNSAFE: v_rsq_f64_e32
-; SI-SAFE-NOT: v_rsq_f64_e32
-; SI-SAFE: v_sqrt_f64
-; SI-SAFE: v_rcp_f64
-define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
- %sqrt = call double @llvm.sqrt.f64(double %src) #0
- %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) #0
+; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64:
+; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
+ %rcp = fdiv double 1.0, %src
store double %rcp, double addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: {{^}}rcp_undef_f32:
-; SI-NOT: v_rcp_f32
-define void @rcp_undef_f32(float addrspace(1)* %out) #1 {
- %rcp = call float @llvm.amdgcn.rcp.f32(float undef) #0
- store float %rcp, float addrspace(1)* %out, align 4
+; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64:
+; SI-NOT: v_rsq_f64_e32
+; SI: v_sqrt_f64
+; SI: v_rcp_f64
+define void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
+ %sqrt = call double @llvm.sqrt.f64(double %src)
+ %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
+ store double %rcp, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64:
+; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
+; SI-NOT: [[RESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
+ %sqrt = call double @llvm.sqrt.f64(double %src)
+ %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
+ store double %rcp, double addrspace(1)* %out, align 8
ret void
}
attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
+attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
+attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
+attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" }
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: sin_f32
; EG: MULADD_IEEE *
; EG: ADD *
; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG-NOT: SIN
+
; SI: v_mul_f32
; SI: v_fract_f32
; SI: v_sin_f32
; SI-NOT: v_sin_f32
-
define void @sin_f32(float addrspace(1)* %out, float %x) #1 {
%sin = call float @llvm.sin.f32(float %x)
store float %sin, float addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}sin_3x_f32:
-; SI-UNSAFE-NOT: v_add_f32
-; SI-UNSAFE: 0x3ef47644
-; SI-UNSAFE: v_mul_f32
-; SI-SAFE: v_mul_f32
-; SI-SAFE: v_mul_f32
+; FUNC-LABEL: {{^}}safe_sin_3x_f32:
+; SI: v_mul_f32
+; SI: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @safe_sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
+ %y = fmul float 3.0, %x
+ %sin = call float @llvm.sin.f32(float %y)
+ store float %sin, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_sin_3x_f32:
+; SI-NOT: v_add_f32
+; SI: 0x3ef47644
+; SI: v_mul_f32
; SI: v_fract_f32
; SI: v_sin_f32
; SI-NOT: v_sin_f32
-define void @sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
+define void @unsafe_sin_3x_f32(float addrspace(1)* %out, float %x) #2 {
%y = fmul float 3.0, %x
%sin = call float @llvm.sin.f32(float %y)
store float %sin, float addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}sin_2x_f32:
-; SI-UNSAFE-NOT: v_add_f32
-; SI-UNSAFE: 0x3ea2f983
-; SI-UNSAFE: v_mul_f32
-; SI-SAFE: v_add_f32
-; SI-SAFE: v_mul_f32
+; FUNC-LABEL: {{^}}safe_sin_2x_f32:
+; SI: v_add_f32
+; SI: v_mul_f32
; SI: v_fract_f32
; SI: v_sin_f32
; SI-NOT: v_sin_f32
-define void @sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
+define void @safe_sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
%y = fmul float 2.0, %x
%sin = call float @llvm.sin.f32(float %y)
store float %sin, float addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}test_2sin_f32:
-; SI-UNSAFE: 0x3ea2f983
-; SI-UNSAFE: v_mul_f32
-; SI-SAFE: v_add_f32
-; SI-SAFE: v_mul_f32
+; FUNC-LABEL: {{^}}unsafe_sin_2x_f32:
+; SI-NOT: v_add_f32
+; SI: 0x3ea2f983
+; SI: v_mul_f32
; SI: v_fract_f32
; SI: v_sin_f32
; SI-NOT: v_sin_f32
-define void @test_2sin_f32(float addrspace(1)* %out, float %x) #1 {
+define void @unsafe_sin_2x_f32(float addrspace(1)* %out, float %x) #2 {
+ %y = fmul float 2.0, %x
+ %sin = call float @llvm.sin.f32(float %y)
+ store float %sin, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_safe_2sin_f32:
+; SI: v_add_f32
+; SI: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @test_safe_2sin_f32(float addrspace(1)* %out, float %x) #1 {
+ %y = fmul float 2.0, %x
+ %sin = call float @llvm.sin.f32(float %y)
+ store float %sin, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_unsafe_2sin_f32:
+; SI: 0x3ea2f983
+; SI: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @test_unsafe_2sin_f32(float addrspace(1)* %out, float %x) #2 {
%y = fmul float 2.0, %x
%sin = call float @llvm.sin.f32(float %y)
store float %sin, float addrspace(1)* %out
; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG-NOT: SIN
+
; SI: v_sin_f32
; SI: v_sin_f32
; SI: v_sin_f32
; SI: v_sin_f32
; SI-NOT: v_sin_f32
-
define void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
%sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
store <4 x float> %sin, <4 x float> addrspace(1)* %out
ret void
}
-declare float @llvm.sin.f32(float) readnone
-declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone
+declare float @llvm.sin.f32(float) #0
+declare <4 x float> @llvm.sin.v4f32(<4 x float>) #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind "unsafe-fp-math"="false" }
+attributes #2 = { nounwind "unsafe-fp-math"="true" }