; VI-SDAG-LABEL: s_exp_f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; VI-SDAG-NEXT: s_mov_b32 s3, 0xc2fc0000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
-; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s3, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: s_and_b32 s3, s2, 0xfffff000
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, s2, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s3, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x39a3b295
+; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, s3, v3
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v2
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0
; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1
; VI-SDAG-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-LABEL: s_exp_f32:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x39a3b295
; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: s_and_b32 s3, s2, 0xfffff000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s3
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, s3, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, s3, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
;
; GFX900-SDAG-LABEL: s_exp_f32:
; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c
-; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX900-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
-; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v2
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3
; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX900-SDAG-NEXT: global_store_dword v1, v0, s[2:3]
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
+; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_exp_f32:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f
; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v1, s3, -v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
+; GFX900-GISEL-NEXT: v_fma_f32 v0, s2, v0, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX900-GISEL-NEXT: s_endpgm
;
; SI-SDAG-LABEL: s_exp_f32:
; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_load_dword s2, s[0:1], 0xb
-; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-SDAG-NEXT: s_load_dword s4, s[0:1], 0xb
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, s4, v0
+; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v2
+; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-SDAG-NEXT: s_mov_b32 s2, -1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_exp_f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: s_mov_b32 s3, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f
; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, s2
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
+; SI-GISEL-NEXT: v_fma_f32 v1, v1, s3, -v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
+; SI-GISEL-NEXT: v_fma_f32 v0, s2, v0, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp_f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 59, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: MUL_IEEE * T0.W, KC0[2].Z, literal.x,
-; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; R600-NEXT: SETGT * T1.W, literal.x, PV.W,
-; R600-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
-; R600-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x,
-; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
-; R600-NEXT: ADD T0.W, T0.W, PV.W,
-; R600-NEXT: CNDE * T1.W, T1.W, 1.0, literal.x,
-; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
-; R600-NEXT: EXP_IEEE * T0.X, PV.W,
-; R600-NEXT: MUL_IEEE T0.X, PS, T1.W,
-; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
-; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; R600-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD T1.W, KC0[2].Z, -PV.W,
+; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x,
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: RNDNE T3.W, PS,
+; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x,
+; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS,
+; R600-NEXT: TRUNC * T4.W, PV.W,
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: FLT_TO_INT T0.Z, PS,
+; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
+; R600-NEXT: ADD * T1.W, T2.W, -T3.W,
+; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT: ADD T1.Z, PS, PV.W,
+; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x,
+; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y,
+; R600-NEXT: -330(nan), 381(5.338947e-43)
+; R600-NEXT: ADD_INT T0.X, PS, literal.x,
+; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y,
+; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
+; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w,
+; R600-NEXT: EXP_IEEE * T1.X, PV.Z,
+; R600-NEXT: -254(nan), 204(2.858649e-43)
+; R600-NEXT: 102(1.429324e-43), -229(nan)
+; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x,
+; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y,
+; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x,
+; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z,
+; R600-NEXT: -127(nan), 254(3.559298e-43)
+; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
+; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y,
+; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
+; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X,
+; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z,
+; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
+; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W,
+; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.x,
+; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T1.Z, T1.Y, T3.X, PS,
+; R600-NEXT: CNDE_INT T0.W, T1.W, PV.W, T1.X,
+; R600-NEXT: LSHL * T1.W, PV.Z, literal.x,
+; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00)
+; R600-NEXT: ADD_INT T1.W, PS, literal.x,
+; R600-NEXT: CNDE_INT * T0.W, T4.W, PV.W, PV.Z,
+; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T0.W, PS, PV.W,
+; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].Z,
+; R600-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
+; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0,
+; R600-NEXT: SETGT * T1.W, KC0[2].Z, literal.x,
+; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
+; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; R600-NEXT: 2139095040(INF), 2(2.802597e-45)
;
; CM-LABEL: s_exp_f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 15, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 64, @4, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: MUL_IEEE * T0.W, KC0[2].Z, literal.x,
-; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; CM-NEXT: SETGT * T1.W, literal.x, PV.W,
-; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
-; CM-NEXT: CNDE * T2.W, PV.W, 0.0, literal.x,
-; CM-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
-; CM-NEXT: CNDE T0.Z, T1.W, 1.0, literal.x,
-; CM-NEXT: ADD * T0.W, T0.W, PV.W, BS:VEC_120/SCL_212
-; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T0.X, T0.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE * T0.X, PV.X, T0.Z,
+; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
+; CM-NEXT: ADD * T1.W, KC0[2].Z, -PV.W,
+; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
+; CM-NEXT: RNDNE * T2.W, PV.Z,
+; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; CM-NEXT: TRUNC T2.Z, PV.W,
+; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
+; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
+; CM-NEXT: ADD T0.Z, T0.Z, -T2.W,
+; CM-NEXT: FLT_TO_INT * T0.W, PV.Z,
+; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x,
+; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T1.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x,
+; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y,
+; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z,
+; CM-NEXT: 2130706432(1.701412e+38), -254(nan)
+; CM-NEXT: -330(nan), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T1.X, T0.W, literal.x,
+; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
+; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z,
+; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w,
+; CM-NEXT: -127(nan), 204(2.858649e-43)
+; CM-NEXT: 102(1.429324e-43), -229(nan)
+; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y,
+; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z,
+; CM-NEXT: 254(3.559298e-43), -127(nan)
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
+; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
+; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y,
+; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43)
+; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
+; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W,
+; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x,
+; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T0.Y, T2.X, T0.Y, PV.W,
+; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.X,
+; CM-NEXT: LSHL * T1.W, PV.Y, literal.x,
+; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T1.Z, PV.W, literal.x,
+; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Z, PV.Y,
+; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T0.Z, PV.W, PV.Z,
+; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z,
+; CM-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
+; CM-NEXT: CNDE T0.Z, PV.W, PV.Z, 0.0,
+; CM-NEXT: SETGT * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
+; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
+; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call float @llvm.exp.f32(float %in)
define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
; VI-SDAG-LABEL: s_exp_v2f32:
; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; VI-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0
-; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0
-; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
-; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, v2, v1
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: s_and_b32 s4, s3, 0xfffff000
; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, s3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v4
+; VI-SDAG-NEXT: s_and_b32 s4, s2, 0xfffff000
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
+; VI-SDAG-NEXT: v_mov_b32_e32 v6, s4
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2
+; VI-SDAG-NEXT: v_sub_f32_e32 v6, s2, v6
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v6
+; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: v_rndne_f32_e32 v5, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, s4, v4
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
+; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v5
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v2
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0
; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_exp_v2f32:
; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x39a3b295
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, s6
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s7
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3
-; VI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
-; VI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0
-; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_exp_f32_e32 v3, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: s_and_b32 s4, s2, 0xfffff000
; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, s4, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, s4, v1
+; VI-GISEL-NEXT: s_and_b32 s4, s3, 0xfffff000
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, s4
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v5, s3, v5
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x39a3b295, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8a000, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
+; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
+; VI-GISEL-NEXT: v_rndne_f32_e32 v5, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v5
+; VI-GISEL-NEXT: v_exp_f32_e32 v5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v5, v1
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v4
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0
; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_exp_v2f32:
; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s7, v0
-; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0
-; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v4
-; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v3
-; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s2, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6
+; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v6
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7
+; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v8, v0
; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1
-; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1]
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
-; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v7
+; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x42b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc
+; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v6
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
+; GFX900-SDAG-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_exp_v2f32:
; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
+; GFX900-GISEL-NEXT: s_mov_b32 s5, 0x42b17218
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, s6
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, s7
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
-; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v3, v0
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v1, s4, -v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
+; GFX900-GISEL-NEXT: v_fma_f32 v1, s2, v0, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s3, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v3, s3, v3, -v5
+; GFX900-GISEL-NEXT: v_fma_f32 v0, s3, v0, v3
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v5
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v5, v5, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v2
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v5, v3
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
+; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX900-GISEL-NEXT: s_endpgm
;
; SI-SDAG-LABEL: s_exp_v2f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
; SI-SDAG-NEXT: s_mov_b32 s6, -1
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0
+; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; SI-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2
+; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; SI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5
+; SI-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v5
+; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6
+; SI-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v7, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v5, v6
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v3
+; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v5
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v4
; SI-SDAG-NEXT: s_mov_b32 s4, s0
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, s3, v0
-; SI-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
; SI-SDAG-NEXT: s_mov_b32 s5, s1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_exp_v2f32:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
+; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x32a5705f
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
+; SI-GISEL-NEXT: s_mov_b32 s5, 0x42b17218
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, s6
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, s7
-; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3
-; SI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v3
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[0:1]
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_exp_f32_e32 v3, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v1, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v2, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, v3, v1
-; SI-GISEL-NEXT: s_mov_b32 s6, -1
-; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
-; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, s2
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
+; SI-GISEL-NEXT: v_fma_f32 v1, v1, s4, -v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
+; SI-GISEL-NEXT: v_fma_f32 v1, s2, v0, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v5, s3, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v3, s3, v3, -v5
+; SI-GISEL-NEXT: v_fma_f32 v0, s3, v0, v3
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v5
+; SI-GISEL-NEXT: v_sub_f32_e32 v5, v5, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v5, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v2
+; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v5, v3
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
+; SI-GISEL-NEXT: s_mov_b32 s2, -1
+; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp_v2f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 21, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT: ALU 96, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 12, @101, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
; R600-NEXT: CF_END
-; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: MUL_IEEE * T0.W, KC0[3].X, literal.x,
-; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; R600-NEXT: SETGT T1.W, literal.x, PV.W,
-; R600-NEXT: MUL_IEEE * T2.W, KC0[2].W, literal.y,
-; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00)
-; R600-NEXT: SETGT T3.W, literal.x, PS,
-; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.y,
-; R600-NEXT: -1023672320(-1.260000e+02), 1115684864(6.400000e+01)
-; R600-NEXT: ADD T0.W, T0.W, PS,
-; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.x,
-; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
-; R600-NEXT: ADD T0.Z, T2.W, PS,
-; R600-NEXT: CNDE T1.W, T1.W, 1.0, literal.x, BS:VEC_120/SCL_212
-; R600-NEXT: EXP_IEEE * T0.X, PV.W,
-; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W,
-; R600-NEXT: CNDE T0.W, T3.W, 1.0, literal.x,
+; R600-NEXT: AND_INT * T0.W, KC0[3].X, literal.x,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD * T1.W, KC0[3].X, -PV.W,
+; R600-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
+; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.y,
+; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.z,
+; R600-NEXT: -4096(nan), 967029397(3.122284e-04)
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: RNDNE T1.Z, PS,
+; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
+; R600-NEXT: ADD * T2.W, KC0[2].W, -PV.Z,
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x,
+; R600-NEXT: MUL_IEEE T2.Z, T0.Z, literal.y,
+; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
+; R600-NEXT: ADD * T1.W, T3.W, -PV.Z,
+; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; R600-NEXT: ADD T3.Z, PS, PV.W,
+; R600-NEXT: RNDNE T0.W, PV.Z,
+; R600-NEXT: MULADD_IEEE * T1.W, T2.W, literal.x, PV.Y, BS:VEC_021/SCL_122
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: TRUNC T0.Y, T1.Z,
+; R600-NEXT: MULADD_IEEE T0.Z, T0.Z, literal.x, PS, BS:VEC_120/SCL_212
+; R600-NEXT: ADD T1.W, T2.Z, -PV.W, BS:VEC_201
; R600-NEXT: EXP_IEEE * T0.X, PV.Z,
-; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.X, PS, PV.W,
-; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
-; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT: ADD T0.Z, PV.W, PV.Z,
+; R600-NEXT: FLT_TO_INT T1.W, PV.Y,
+; R600-NEXT: MUL_IEEE * T2.W, PS, literal.x,
+; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T1.Z, PS, literal.x,
+; R600-NEXT: SETGT_UINT T3.W, PV.W, literal.y,
+; R600-NEXT: EXP_IEEE * T0.Y, PV.Z,
+; R600-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
+; R600-NEXT: CNDE_INT T1.X, PV.W, T2.W, PV.Z,
+; R600-NEXT: MUL_IEEE T1.Y, PS, literal.x,
+; R600-NEXT: MAX_INT T0.Z, T1.W, literal.y,
+; R600-NEXT: MIN_INT T2.W, T1.W, literal.z,
+; R600-NEXT: TRUNC * T0.W, T0.W,
+; R600-NEXT: 2130706432(1.701412e+38), -330(nan)
+; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; R600-NEXT: FLT_TO_INT T2.X, PS,
+; R600-NEXT: ADD_INT T2.Y, PV.W, literal.x,
+; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
+; R600-NEXT: ADD_INT T0.W, T1.W, literal.z,
+; R600-NEXT: SETGT_UINT * T2.W, T1.W, literal.w,
+; R600-NEXT: -254(nan), 204(2.858649e-43)
+; R600-NEXT: 102(1.429324e-43), -229(nan)
+; R600-NEXT: ADD_INT T3.X, T1.W, literal.x,
+; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W,
+; R600-NEXT: SETGT_INT T0.Z, T1.W, literal.x,
+; R600-NEXT: MUL_IEEE T0.W, T0.X, literal.y,
+; R600-NEXT: MUL_IEEE * T4.W, T0.Y, literal.y,
+; R600-NEXT: -127(nan), 209715200(1.972152e-31)
+; R600-NEXT: MUL_IEEE T4.X, PS, literal.x,
+; R600-NEXT: MUL_IEEE T4.Y, PV.W, literal.x,
+; R600-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, T1.W,
+; R600-NEXT: CNDE_INT T3.W, T3.W, PV.X, T2.Y,
+; R600-NEXT: MAX_INT * T5.W, T2.X, literal.y,
+; R600-NEXT: 209715200(1.972152e-31), -330(nan)
+; R600-NEXT: SETGT_INT T3.X, T1.W, literal.x,
+; R600-NEXT: ADD_INT T2.Y, PS, literal.y,
+; R600-NEXT: ADD_INT T2.Z, T2.X, literal.z,
+; R600-NEXT: SETGT_UINT * T1.W, T2.X, literal.w,
+; R600-NEXT: 127(1.779649e-43), 204(2.858649e-43)
+; R600-NEXT: 102(1.429324e-43), -229(nan)
+; R600-NEXT: MIN_INT * T5.W, T2.X, literal.x,
+; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; R600-NEXT: ADD_INT T5.X, PV.W, literal.x,
+; R600-NEXT: ADD_INT T3.Y, T2.X, literal.y,
+; R600-NEXT: SETGT_UINT T3.Z, T2.X, literal.z,
+; R600-NEXT: CNDE_INT T5.W, T1.W, T2.Y, T2.Z,
+; R600-NEXT: SETGT_INT * T6.W, T2.X, literal.y,
+; R600-NEXT: -254(nan), -127(nan)
+; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T2.X,
+; R600-NEXT: CNDE_INT T2.Y, PV.Z, PV.Y, PV.X,
+; R600-NEXT: SETGT_INT T2.Z, T2.X, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE_INT T3.W, T3.X, T1.Z, T3.W, BS:VEC_021/SCL_122
+; R600-NEXT: CNDE_INT * T0.W, T2.W, T4.Y, T0.W,
+; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T0.X, T0.Z, PS, T0.X,
+; R600-NEXT: LSHL T3.Y, PV.W, literal.x,
+; R600-NEXT: CNDE_INT T0.Z, PV.Z, PV.X, PV.Y,
+; R600-NEXT: CNDE_INT T0.W, T1.W, T4.X, T4.W,
+; R600-NEXT: MUL_IEEE * T1.W, T1.Y, literal.y,
+; R600-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T2.X, T3.Z, T1.Y, PS,
+; R600-NEXT: CNDE_INT T0.Y, T6.W, PV.W, T0.Y,
+; R600-NEXT: LSHL T0.Z, PV.Z, literal.x,
+; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y,
+; R600-NEXT: CNDE_INT * T1.W, T3.X, PV.X, T1.X,
+; R600-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
+; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W,
+; R600-NEXT: SETGT T1.Z, literal.x, KC0[3].X,
+; R600-NEXT: ADD_INT * T0.W, PV.Z, literal.y,
+; R600-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
+; R600-NEXT: ALU clause starting at 101:
+; R600-NEXT: CNDE_INT * T1.W, T2.Z, T0.Y, T2.X,
+; R600-NEXT: MUL_IEEE T0.Y, PV.W, T0.W,
+; R600-NEXT: SETGT T0.Z, literal.x, KC0[2].W,
+; R600-NEXT: CNDE T0.W, T1.Z, T1.Y, 0.0,
+; R600-NEXT: SETGT * T1.W, KC0[3].X, literal.y,
+; R600-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
+; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x,
+; R600-NEXT: CNDE T0.W, PV.Z, PV.Y, 0.0,
+; R600-NEXT: SETGT * T1.W, KC0[2].W, literal.y,
+; R600-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
+; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x,
+; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y,
+; R600-NEXT: 2139095040(INF), 2(2.802597e-45)
;
; CM-LABEL: s_exp_v2f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 26, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
+; CM-NEXT: ALU 100, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 18, @105, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
; CM-NEXT: CF_END
-; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].X, literal.x,
-; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T0.Z, KC0[2].W, literal.x,
-; CM-NEXT: SETGT * T1.W, literal.y, PV.W,
-; CM-NEXT: 1069066811(1.442695e+00), -1023672320(-1.260000e+02)
-; CM-NEXT: CNDE T1.Z, PV.W, 0.0, literal.x,
-; CM-NEXT: SETGT * T2.W, literal.y, PV.Z,
-; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
-; CM-NEXT: CNDE T0.Y, PV.W, 0.0, literal.x,
-; CM-NEXT: CNDE T2.Z, T1.W, 1.0, literal.y,
-; CM-NEXT: ADD * T0.W, T0.W, PV.Z, BS:VEC_120/SCL_212
-; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
-; CM-NEXT: EXP_IEEE T0.X, T0.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE T1.Y, PV.X, T2.Z,
-; CM-NEXT: CNDE T1.Z, T2.W, 1.0, literal.x,
-; CM-NEXT: ADD * T0.W, T0.Z, T0.Y,
-; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
+; CM-NEXT: AND_INT * T0.W, KC0[2].W, literal.x,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
+; CM-NEXT: ADD * T1.W, KC0[2].W, -PV.W,
+; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
+; CM-NEXT: RNDNE * T2.W, PV.Z,
+; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; CM-NEXT: TRUNC T0.Y, PV.W,
+; CM-NEXT: AND_INT T2.Z, KC0[3].X, literal.x,
+; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.y, PV.Z,
+; CM-NEXT: -4096(nan), 1069064192(1.442383e+00)
+; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
+; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y,
+; CM-NEXT: FLT_TO_INT T1.Z, PV.Y,
+; CM-NEXT: ADD * T0.W, KC0[3].X, -PV.Z,
+; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT: ADD T1.X, T0.Z, -T2.W,
+; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT: MAX_INT T0.Z, PV.Z, literal.y,
+; CM-NEXT: RNDNE * T1.W, PV.Y,
+; CM-NEXT: 967029397(3.122284e-04), -330(nan)
+; CM-NEXT: TRUNC T2.X, PV.W,
+; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x,
+; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.Y,
+; CM-NEXT: ADD * T0.W, PV.X, T0.X,
+; CM-NEXT: 204(2.858649e-43), 1069064192(1.442383e+00)
; CM-NEXT: EXP_IEEE T0.X, T0.W,
; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE * T1.X, PV.X, T1.Z,
-; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: ADD_INT T1.X, T1.Z, literal.x,
+; CM-NEXT: MULADD_IEEE T0.Y, T2.Z, literal.y, T0.Z, BS:VEC_102/SCL_221
+; CM-NEXT: ADD T0.Z, T1.Y, -T1.W,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z,
+; CM-NEXT: 102(1.429324e-43), 967029397(3.122284e-04)
+; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT: SETGT_UINT T3.X, T1.Z, literal.x,
+; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y,
+; CM-NEXT: SETGT_UINT T2.Z, T1.Z, literal.z,
+; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT: -229(nan), 2130706432(1.701412e+38)
+; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE T0.Y, T1.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT: CNDE_INT T4.X, T2.Z, T0.W, T1.Y,
+; CM-NEXT: CNDE_INT T1.Y, T3.X, T2.Y, T1.X,
+; CM-NEXT: FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212
+; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.x,
+; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT: SETGT_INT T1.X, T1.Z, literal.x,
+; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y,
+; CM-NEXT: MUL_IEEE T3.Z, PV.W, literal.z,
+; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w,
+; CM-NEXT: -127(nan), 209715200(1.972152e-31)
+; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
+; CM-NEXT: CNDE_INT T2.X, PV.W, T0.W, PV.Z,
+; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
+; CM-NEXT: CNDE_INT T3.Z, PV.X, T1.Y, T1.Z,
+; CM-NEXT: MAX_INT * T0.W, T0.Z, literal.y,
+; CM-NEXT: 209715200(1.972152e-31), -330(nan)
+; CM-NEXT: ADD_INT T5.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T1.Y, T0.Z, literal.y,
+; CM-NEXT: SETGT_UINT T4.Z, T0.Z, literal.z,
+; CM-NEXT: MUL_IEEE * T0.W, T0.Y, literal.w,
+; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT: -229(nan), 209715200(1.972152e-31)
+; CM-NEXT: MUL_IEEE T6.X, PV.W, literal.x,
+; CM-NEXT: MIN_INT T4.Y, T0.Z, literal.y,
+; CM-NEXT: CNDE_INT T5.Z, PV.Z, PV.X, PV.Y,
+; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
+; CM-NEXT: -127(nan), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
+; CM-NEXT: MIN_INT T1.Y, T1.Z, literal.x,
+; CM-NEXT: ADD_INT T5.Z, PV.Y, literal.y,
+; CM-NEXT: ADD_INT * T3.W, T0.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: 381(5.338947e-43), -254(nan)
+; CM-NEXT: -127(nan), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T7.X, T1.W, PV.W, PV.Z,
+; CM-NEXT: SETGT_INT T4.Y, T0.Z, literal.x,
+; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
+; CM-NEXT: ADD_INT * T1.W, T1.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: 127(1.779649e-43), -254(nan)
+; CM-NEXT: -127(nan), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T8.X, T2.Z, PV.W, PV.Z,
+; CM-NEXT: SETGT_INT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: CNDE_INT T0.Z, PV.Y, T5.X, PV.X,
+; CM-NEXT: CNDE_INT * T0.W, T4.Z, T6.X, T0.W, BS:VEC_201
+; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T5.X, T2.W, PV.W, T0.Y,
+; CM-NEXT: LSHL T0.Y, PV.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, PV.Y, T3.Z, PV.X,
+; CM-NEXT: CNDE_INT * T0.W, T3.X, T3.Y, T2.Y, BS:VEC_201
+; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T0.X, T1.X, PV.W, T0.X,
+; CM-NEXT: LSHL T2.Y, PV.Z, literal.x,
+; CM-NEXT: ADD_INT * T0.Z, PV.Y, literal.y,
+; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
+; CM-NEXT: ALU clause starting at 105:
+; CM-NEXT: CNDE_INT * T0.W, T4.Y, T5.X, T2.X,
+; CM-NEXT: MUL_IEEE T1.X, PV.W, T0.Z,
+; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].X,
+; CM-NEXT: ADD_INT T0.Z, T2.Y, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, T1.Y, T0.X, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
+; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
+; CM-NEXT: SETGT T1.Y, literal.x, KC0[2].W,
+; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0,
+; CM-NEXT: SETGT * T0.W, KC0[3].X, literal.y,
+; CM-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
+; CM-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.x,
+; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0,
+; CM-NEXT: SETGT * T0.W, KC0[2].W, literal.y,
+; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
+; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
+; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <2 x float> @llvm.exp.v2f32(<2 x float> %in)
store <2 x float> %result, ptr addrspace(1) %out
; VI-SDAG-LABEL: s_exp_v3f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
+; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s7, 0xc2fc0000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
-; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s7, v2
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v0
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
-; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s7, v4
-; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s7, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
-; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
-; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; VI-SDAG-NEXT: s_and_b32 s2, s6, 0xfffff000
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, s6, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v4
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_and_b32 s2, s5, 0xfffff000
+; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2
+; VI-SDAG-NEXT: v_sub_f32_e32 v7, s5, v7
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7
+; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, s2, v4
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6
+; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7
+; VI-SDAG-NEXT: v_exp_f32_e32 v7, v2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5
+; VI-SDAG-NEXT: s_and_b32 s2, s4, 0xfffff000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v7, v6
+; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2
+; VI-SDAG-NEXT: v_sub_f32_e32 v7, s4, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7
+; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v9
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v6
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, s3
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, s2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v6
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
+; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0
; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_exp_v3f32:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8a000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x39a3b295
+; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
+; VI-GISEL-NEXT: s_and_b32 s2, s4, 0xfffff000
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, s4, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, s2, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, s2, v2
+; VI-GISEL-NEXT: s_and_b32 s2, s5, 0xfffff000
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, s2
+; VI-GISEL-NEXT: v_sub_f32_e32 v5, s5, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8a000, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, s2, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v7, v5
+; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v6, v5
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7
+; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0
+; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3
+; VI-GISEL-NEXT: s_and_b32 s2, s6, 0xfffff000
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_ldexp_f32 v5, v5, v6
+; VI-GISEL-NEXT: v_mov_b32_e32 v6, s2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, s6, v6
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8aa3b, v5
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v1, v3
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v2, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v6
+; VI-GISEL-NEXT: v_rndne_f32_e32 v6, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v3
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
-; VI-GISEL-NEXT: v_exp_f32_e32 v2, v1
-; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1]
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, v5, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v3
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v6
+; VI-GISEL-NEXT: v_exp_f32_e32 v6, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v2, v6, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v4
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s0
; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_exp_v3f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218
+; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v0
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7
+; GFX900-SDAG-NEXT: v_fma_f32 v6, s5, v0, -v6
+; GFX900-SDAG-NEXT: v_fma_f32 v6, s5, v1, v6
; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
-; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v8, v6
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v4, s6, v0, -v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v6, v6
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_fma_f32 v4, s6, v1, v4
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v2, v5
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, s5, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
-; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
-; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
-; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v6
-; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v5, v5
+; GFX900-SDAG-NEXT: v_ldexp_f32 v6, v6, v7
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s4, v0
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v9, v7
+; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v7
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9
+; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
+; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3
; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
-; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v5, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v4
-; GFX900-SDAG-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3]
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
+; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v7
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
+; GFX900-SDAG-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_exp_v3f32:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
+; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v1, v0
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s5, v3
+; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v6
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v8, v6
+; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v1, v7
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v6, v6, v8
+; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8aa3b, v5
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v1, v3
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v2, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
-; GFX900-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
-; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v5
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v5, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v8, s5
+; GFX900-GISEL-NEXT: v_ldexp_f32 v6, v6, v7
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3
+; GFX900-GISEL-NEXT: v_fma_f32 v3, s6, v3, -v7
+; GFX900-GISEL-NEXT: v_fma_f32 v1, s6, v1, v3
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v7
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v7, v7, v3
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v7, v1
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v7, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
+; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v7, v3
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v3, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v4
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3]
+; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
; GFX900-GISEL-NEXT: s_endpgm
;
; SI-SDAG-LABEL: s_exp_v3f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; SI-SDAG-NEXT: s_mov_b32 s0, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x32a5705f
+; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v0
+; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5
+; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6
+; SI-SDAG-NEXT: v_fma_f32 v5, s4, v0, -v5
+; SI-SDAG-NEXT: v_fma_f32 v5, s4, v2, v5
; SI-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
+; SI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5
+; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
+; SI-SDAG-NEXT: v_fma_f32 v4, s5, v0, -v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v5, v5
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6
+; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-SDAG-NEXT: v_fma_f32 v4, s5, v2, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
-; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v4
-; SI-SDAG-NEXT: v_mul_f32_e32 v4, s4, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4
-; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v2, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
-; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, v5, v6
+; SI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v0
+; SI-SDAG-NEXT: v_rndne_f32_e32 v8, v6
+; SI-SDAG-NEXT: v_fma_f32 v0, s6, v0, -v6
+; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v8
+; SI-SDAG-NEXT: v_fma_f32 v0, s6, v2, v0
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v3
+; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v9, v0
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v3
+; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42b17218
; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
-; SI-SDAG-NEXT: s_mov_b32 s10, -1
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v3
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v4, v0
-; SI-SDAG-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:8
-; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v8
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v6
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v3
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v4
+; SI-SDAG-NEXT: s_mov_b32 s2, -1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
+; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
+; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_exp_v3f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; SI-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
+; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2
+; SI-GISEL-NEXT: v_fma_f32 v0, s4, v1, v0
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v2, v0
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v6, s5, v3
+; SI-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v6
+; SI-GISEL-NEXT: v_rndne_f32_e32 v8, v6
+; SI-GISEL-NEXT: v_fma_f32 v7, s5, v1, v7
+; SI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v8
+; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8
+; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
-; SI-GISEL-NEXT: v_mov_b32_e32 v5, s5
-; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8aa3b, v5
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v2, vcc
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
-; SI-GISEL-NEXT: v_exp_f32_e32 v5, v5
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, v5, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[0:1]
-; SI-GISEL-NEXT: s_mov_b32 s10, -1
-; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
-; SI-GISEL-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:8
+; SI-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v6, v6, v7
+; SI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3
+; SI-GISEL-NEXT: v_fma_f32 v3, s6, v3, -v7
+; SI-GISEL-NEXT: v_fma_f32 v1, s6, v1, v3
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v7
+; SI-GISEL-NEXT: v_sub_f32_e32 v7, v7, v3
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v7, v1
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v7, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v4
+; SI-GISEL-NEXT: v_mov_b32_e32 v8, s5
+; SI-GISEL-NEXT: s_mov_b32 s3, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, 0, vcc
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v7, v3
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v3, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v4
+; SI-GISEL-NEXT: s_mov_b32 s2, -1
+; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
+; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
; SI-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp_v3f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 33, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 100, @6, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 69, @107, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; R600-NEXT: CF_END
-; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: MUL_IEEE * T0.W, KC0[3].Z, literal.x,
-; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; R600-NEXT: SETGT T1.W, literal.x, PV.W,
-; R600-NEXT: MUL_IEEE * T2.W, KC0[3].Y, literal.y,
-; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00)
-; R600-NEXT: SETGT T3.W, literal.x, PS,
-; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.y,
-; R600-NEXT: -1023672320(-1.260000e+02), 1115684864(6.400000e+01)
-; R600-NEXT: ADD T0.Z, T0.W, PS,
-; R600-NEXT: MUL_IEEE T0.W, KC0[3].W, literal.x,
-; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.y,
-; R600-NEXT: 1069066811(1.442695e+00), 1115684864(6.400000e+01)
-; R600-NEXT: ADD T0.Y, T2.W, PS,
-; R600-NEXT: SETGT T1.Z, literal.x, PV.W,
-; R600-NEXT: CNDE T1.W, T1.W, 1.0, literal.y, BS:VEC_120/SCL_212
-; R600-NEXT: EXP_IEEE * T0.X, PV.Z,
-; R600-NEXT: -1023672320(-1.260000e+02), 528482304(5.421011e-20)
-; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W,
-; R600-NEXT: CNDE T0.Z, PV.Z, 0.0, literal.x,
-; R600-NEXT: CNDE T1.W, T3.W, 1.0, literal.y,
-; R600-NEXT: EXP_IEEE * T0.X, PV.Y,
-; R600-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
-; R600-NEXT: MUL_IEEE T1.X, PS, PV.W,
-; R600-NEXT: ADD T0.W, T0.W, PV.Z,
-; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
-; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; R600-NEXT: CNDE T1.W, T1.Z, 1.0, literal.x,
-; R600-NEXT: EXP_IEEE * T0.Y, PV.W,
-; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T2.X, PS, PV.W,
-; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
-; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 6:
+; R600-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD T1.W, KC0[3].Y, -PV.W,
+; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x,
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: RNDNE T3.W, PS,
+; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x,
+; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS,
+; R600-NEXT: TRUNC * T4.W, PV.W,
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: FLT_TO_INT T0.Z, PS,
+; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
+; R600-NEXT: ADD * T1.W, T2.W, -T3.W,
+; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT: ADD T0.W, PS, PV.W,
+; R600-NEXT: MAX_INT * T1.W, PV.Z, literal.x,
+; R600-NEXT: -330(nan), 0(0.000000e+00)
+; R600-NEXT: ADD_INT T0.Y, PS, literal.x,
+; R600-NEXT: ADD_INT T1.Z, T0.Z, literal.y,
+; R600-NEXT: SETGT_UINT T1.W, T0.Z, literal.z,
+; R600-NEXT: EXP_IEEE * T0.X, PV.W,
+; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT: -229(nan), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; R600-NEXT: SETGT_INT T0.W, T0.Z, literal.x,
+; R600-NEXT: MUL_IEEE * T2.W, PS, literal.y,
+; R600-NEXT: -127(nan), 209715200(1.972152e-31)
+; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x,
+; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
+; R600-NEXT: MIN_INT T3.W, T0.Z, literal.y,
+; R600-NEXT: AND_INT * T4.W, KC0[3].W, literal.z,
+; R600-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T1.X, T0.X, literal.x,
+; R600-NEXT: ADD T1.Y, KC0[3].W, -PS,
+; R600-NEXT: ADD_INT T2.Z, PV.W, literal.y,
+; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z,
+; R600-NEXT: SETGT_UINT * T5.W, T0.Z, literal.w,
+; R600-NEXT: 2130706432(1.701412e+38), -254(nan)
+; R600-NEXT: -127(nan), 254(3.559298e-43)
+; R600-NEXT: CNDE_INT T2.X, PS, PV.W, PV.Z,
+; R600-NEXT: SETGT_INT T2.Y, T0.Z, literal.x,
+; R600-NEXT: MUL_IEEE T0.Z, PV.Y, literal.y,
+; R600-NEXT: MUL_IEEE T3.W, T4.W, literal.z,
+; R600-NEXT: MUL_IEEE * T6.W, PV.X, literal.w,
+; R600-NEXT: 127(1.779649e-43), 967029397(3.122284e-04)
+; R600-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T1.X, T5.W, T1.X, PS, BS:VEC_120/SCL_212
+; R600-NEXT: RNDNE T3.Y, PV.W,
+; R600-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
+; R600-NEXT: CNDE_INT T5.W, PV.Y, T1.Z, PV.X,
+; R600-NEXT: CNDE_INT * T1.W, T1.W, T0.Y, T2.W,
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T0.X, T0.W, PS, T0.X,
+; R600-NEXT: LSHL T0.Y, PV.W, literal.x,
+; R600-NEXT: AND_INT T1.Z, KC0[3].Z, literal.y,
+; R600-NEXT: MULADD_IEEE T0.W, T4.W, literal.z, PV.Z, BS:VEC_120/SCL_212
+; R600-NEXT: ADD * T1.W, T3.W, -PV.Y,
+; R600-NEXT: 23(3.222986e-44), -4096(nan)
+; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT: ADD T1.Y, PS, PV.W,
+; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.x,
+; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y,
+; R600-NEXT: CNDE_INT * T1.W, T2.Y, PV.X, T1.X,
+; R600-NEXT: 1069064192(1.442383e+00), 1065353216(1.000000e+00)
+; R600-NEXT: MUL_IEEE T0.X, PS, PV.W,
+; R600-NEXT: ADD T0.Y, KC0[3].Z, -T1.Z,
+; R600-NEXT: RNDNE T2.Z, PV.Z,
+; R600-NEXT: TRUNC T0.W, T3.Y,
+; R600-NEXT: EXP_IEEE * T1.X, PV.Y,
+; R600-NEXT: SETGT T2.X, literal.x, KC0[3].Y,
+; R600-NEXT: FLT_TO_INT T1.Y, PV.W,
+; R600-NEXT: TRUNC T3.Z, PV.Z,
+; R600-NEXT: MUL_IEEE T0.W, PV.Y, literal.y,
+; R600-NEXT: MUL_IEEE * T1.W, PS, literal.z,
+; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04)
+; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y,
+; R600-NEXT: MULADD_IEEE T4.Z, T0.Y, literal.z, PV.W,
+; R600-NEXT: FLT_TO_INT T0.W, PV.Z,
+; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.w,
+; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: 1069064192(1.442383e+00), 381(5.338947e-43)
+; R600-NEXT: ADD_INT T4.X, PS, literal.x,
+; R600-NEXT: MAX_INT T0.Y, PV.W, literal.y,
+; R600-NEXT: MULADD_IEEE T1.Z, T1.Z, literal.z, PV.Z,
+; R600-NEXT: ADD T2.W, T0.Z, -T2.Z, BS:VEC_120/SCL_212
+; R600-NEXT: MIN_INT * T3.W, PV.W, literal.w,
+; R600-NEXT: -254(nan), -330(nan)
+; R600-NEXT: 967029397(3.122284e-04), 381(5.338947e-43)
+; R600-NEXT: ADD_INT T5.X, PS, literal.x,
+; R600-NEXT: ADD T3.Y, PV.W, PV.Z,
+; R600-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
+; R600-NEXT: ADD_INT T2.W, T0.W, literal.z,
+; R600-NEXT: SETGT_UINT * T3.W, T0.W, literal.w,
+; R600-NEXT: -254(nan), 204(2.858649e-43)
+; R600-NEXT: 102(1.429324e-43), -229(nan)
+; R600-NEXT: ADD_INT * T6.X, T0.W, literal.x,
+; R600-NEXT: -127(nan), 0(0.000000e+00)
+; R600-NEXT: ALU clause starting at 107:
+; R600-NEXT: SETGT_UINT T0.Y, T0.W, literal.x,
+; R600-NEXT: CNDE_INT T0.Z, T3.W, T0.Z, T2.W, BS:VEC_102/SCL_221
+; R600-NEXT: SETGT_INT T2.W, T0.W, literal.y,
+; R600-NEXT: EXP_IEEE * T1.Z, T3.Y,
+; R600-NEXT: 254(3.559298e-43), -127(nan)
+; R600-NEXT: ADD_INT T7.X, T1.Y, literal.x,
+; R600-NEXT: MUL_IEEE T3.Y, PS, literal.y,
+; R600-NEXT: CNDE_INT T0.Z, PV.W, PV.Z, T0.W,
+; R600-NEXT: CNDE_INT T4.W, PV.Y, T6.X, T5.X,
+; R600-NEXT: SETGT_INT * T0.W, T0.W, literal.z,
+; R600-NEXT: -127(nan), 209715200(1.972152e-31)
+; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
+; R600-NEXT: SETGT_UINT T5.X, T1.Y, literal.x,
+; R600-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W,
+; R600-NEXT: MAX_INT T0.Z, T1.Y, literal.y,
+; R600-NEXT: MUL_IEEE T4.W, T1.Z, literal.z,
+; R600-NEXT: MUL_IEEE * T5.W, PV.Y, literal.w,
+; R600-NEXT: 254(3.559298e-43), -330(nan)
+; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: CNDE_INT T6.X, T3.W, PS, T3.Y, BS:VEC_021/SCL_122
+; R600-NEXT: MUL_IEEE T3.Y, PV.W, literal.x,
+; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
+; R600-NEXT: ADD_INT T3.W, T1.Y, literal.z,
+; R600-NEXT: SETGT_UINT * T5.W, T1.Y, literal.w,
+; R600-NEXT: 2130706432(1.701412e+38), 204(2.858649e-43)
+; R600-NEXT: 102(1.429324e-43), -229(nan)
+; R600-NEXT: CNDE_INT T8.X, PS, PV.Z, PV.W,
+; R600-NEXT: SETGT_INT T5.Y, T1.Y, literal.x,
+; R600-NEXT: CNDE_INT T0.Z, T0.Y, T4.W, PV.Y, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE_INT T2.W, T2.W, PV.X, T1.Z,
+; R600-NEXT: LSHL * T3.W, T4.Y, literal.y,
+; R600-NEXT: -127(nan), 23(3.222986e-44)
+; R600-NEXT: ADD_INT T6.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T0.Y, T0.W, PV.W, PV.Z,
+; R600-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T1.Y,
+; R600-NEXT: CNDE_INT T0.W, T5.X, T7.X, T4.X,
+; R600-NEXT: SETGT_INT * T2.W, T1.Y, literal.y,
+; R600-NEXT: 1065353216(1.000000e+00), 127(1.779649e-43)
+; R600-NEXT: CNDE_INT T4.X, PS, PV.Z, PV.W,
+; R600-NEXT: MUL_IEEE T0.Y, PV.Y, PV.X,
+; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].Z,
+; R600-NEXT: CNDE_INT T0.W, T5.W, T2.Y, T1.W,
+; R600-NEXT: MUL_IEEE * T1.W, T3.X, literal.y,
+; R600-NEXT: -1026650416(-1.032789e+02), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T3.X, T5.X, T3.X, PS,
+; R600-NEXT: CNDE_INT T1.Y, T5.Y, PV.W, T1.X,
+; R600-NEXT: CNDE T0.Z, PV.Z, PV.Y, 0.0,
+; R600-NEXT: SETGT T0.W, KC0[3].Z, literal.x,
+; R600-NEXT: LSHL * T1.W, PV.X, literal.y,
+; R600-NEXT: 1118925336(8.872284e+01), 23(3.222986e-44)
+; R600-NEXT: ADD_INT T1.X, PS, literal.x,
+; R600-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.y,
+; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.Y, PV.X,
+; R600-NEXT: CNDE T0.W, T2.X, T0.X, 0.0,
+; R600-NEXT: SETGT * T1.W, KC0[3].Y, literal.z,
+; R600-NEXT: 1065353216(1.000000e+00), 2139095040(INF)
+; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
+; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x,
+; R600-NEXT: MUL_IEEE T0.W, PV.Z, PV.X,
+; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
+; R600-NEXT: 2139095040(INF), -1026650416(-1.032789e+02)
+; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0,
+; R600-NEXT: SETGT * T1.W, KC0[3].W, literal.y,
+; R600-NEXT: 2(2.802597e-45), 1118925336(8.872284e+01)
+; R600-NEXT: CNDE T2.X, PS, PV.W, literal.x,
+; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; R600-NEXT: 2139095040(INF), 8(1.121039e-44)
; R600-NEXT: LSHR * T3.X, PV.W, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: s_exp_v3f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 39, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T3, T0.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T2.X
+; CM-NEXT: ALU 102, @6, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 80, @109, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T3.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X
; CM-NEXT: CF_END
-; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: MUL_IEEE * T0.W, KC0[3].W, literal.x,
-; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T0.Y, KC0[3].Y, literal.x,
-; CM-NEXT: MUL_IEEE T0.Z, KC0[3].Z, literal.x,
-; CM-NEXT: SETGT * T1.W, literal.y, PV.W,
-; CM-NEXT: 1069066811(1.442695e+00), -1023672320(-1.260000e+02)
-; CM-NEXT: CNDE T1.Y, PV.W, 0.0, literal.x,
-; CM-NEXT: SETGT T1.Z, literal.y, PV.Z,
-; CM-NEXT: SETGT * T2.W, literal.y, PV.Y,
-; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
-; CM-NEXT: CNDE T0.X, PV.W, 0.0, literal.x,
-; CM-NEXT: CNDE T2.Y, PV.Z, 0.0, literal.x,
-; CM-NEXT: CNDE T2.Z, T1.W, 1.0, literal.y,
-; CM-NEXT: ADD * T0.W, T0.W, PV.Y, BS:VEC_120/SCL_212
-; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
-; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T0.W, T0.W,
-; CM-NEXT: MUL_IEEE T1.X, PV.W, T2.Z,
-; CM-NEXT: CNDE T1.Y, T1.Z, 1.0, literal.x,
-; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y,
-; CM-NEXT: ADD * T0.W, T0.Z, T2.Y, BS:VEC_201
-; CM-NEXT: 528482304(5.421011e-20), 8(1.121039e-44)
-; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Z, T0.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: LSHR T2.X, T1.Z, literal.x,
-; CM-NEXT: MUL_IEEE T3.Y, PV.Z, T1.Y,
-; CM-NEXT: CNDE T0.Z, T2.W, 1.0, literal.y,
-; CM-NEXT: ADD * T0.W, T0.Y, T0.X,
-; CM-NEXT: 2(2.802597e-45), 528482304(5.421011e-20)
-; CM-NEXT: EXP_IEEE T0.X, T0.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE * T3.X, PV.X, T0.Z,
-; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 6:
+; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
+; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W,
+; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
+; CM-NEXT: RNDNE * T2.W, PV.Z,
+; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; CM-NEXT: TRUNC T2.Z, PV.W,
+; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
+; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
+; CM-NEXT: ADD T0.Z, T0.Z, -T2.W,
+; CM-NEXT: FLT_TO_INT * T0.W, PV.Z,
+; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x,
+; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T1.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x,
+; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y,
+; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z,
+; CM-NEXT: 2130706432(1.701412e+38), -254(nan)
+; CM-NEXT: -330(nan), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T1.X, T0.W, literal.x,
+; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
+; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z,
+; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w,
+; CM-NEXT: -127(nan), 204(2.858649e-43)
+; CM-NEXT: 102(1.429324e-43), -229(nan)
+; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y,
+; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z,
+; CM-NEXT: 254(3.559298e-43), -127(nan)
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
+; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
+; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y,
+; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43)
+; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
+; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W,
+; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x,
+; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T1.X, T2.X, T0.Y, PV.W,
+; CM-NEXT: CNDE_INT T0.Y, T1.Z, PV.Z, T0.X,
+; CM-NEXT: LSHL T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, KC0[3].Z, literal.y,
+; CM-NEXT: 23(3.222986e-44), -4096(nan)
+; CM-NEXT: MUL_IEEE T0.X, PV.W, literal.x,
+; CM-NEXT: ADD T1.Y, KC0[3].Z, -PV.W,
+; CM-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Y, PV.X,
+; CM-NEXT: 1069064192(1.442383e+00), 1065353216(1.000000e+00)
+; CM-NEXT: MUL_IEEE T0.Y, PV.W, PV.Z,
+; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
+; CM-NEXT: RNDNE * T0.W, PV.X,
+; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; CM-NEXT: SETGT T1.X, literal.x, KC0[3].Y,
+; CM-NEXT: TRUNC T2.Y, PV.W,
+; CM-NEXT: AND_INT T1.Z, KC0[3].W, literal.y,
+; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.z, PV.Z,
+; CM-NEXT: -1026650416(-1.032789e+02), -4096(nan)
+; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T2.X, T1.W, literal.x, PV.W,
+; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y,
+; CM-NEXT: FLT_TO_INT T0.Z, PV.Y,
+; CM-NEXT: ADD * T1.W, KC0[3].W, -PV.Z,
+; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT: ADD T0.X, T0.X, -T0.W,
+; CM-NEXT: MUL_IEEE T2.Y, PV.W, literal.x,
+; CM-NEXT: MAX_INT T2.Z, PV.Z, literal.y,
+; CM-NEXT: RNDNE * T0.W, PV.Y,
+; CM-NEXT: 967029397(3.122284e-04), -330(nan)
+; CM-NEXT: TRUNC T3.X, PV.W,
+; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.x,
+; CM-NEXT: MULADD_IEEE T2.Z, T1.W, literal.y, PV.Y,
+; CM-NEXT: ADD * T1.W, PV.X, T2.X,
+; CM-NEXT: 204(2.858649e-43), 1069064192(1.442383e+00)
+; CM-NEXT: EXP_IEEE T0.X, T1.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x,
+; CM-NEXT: MULADD_IEEE T2.Y, T1.Z, literal.y, T2.Z, BS:VEC_102/SCL_221
+; CM-NEXT: ADD T1.Z, T1.Y, -T0.W,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z,
+; CM-NEXT: 102(1.429324e-43), 967029397(3.122284e-04)
+; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT: SETGT_UINT T4.X, T0.Z, literal.x,
+; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y,
+; CM-NEXT: SETGT_UINT T2.Z, T0.Z, literal.z,
+; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT: -229(nan), 2130706432(1.701412e+38)
+; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T1.X (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE T1.Y (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE T1.Z, T1.W,
+; CM-NEXT: EXP_IEEE * T1.W (MASKED), T1.W,
+; CM-NEXT: ALU clause starting at 109:
+; CM-NEXT: CNDE_INT T5.X, T2.Z, T0.W, T1.Y,
+; CM-NEXT: CNDE_INT T1.Y, T4.X, T3.Y, T2.X,
+; CM-NEXT: FLT_TO_INT T3.Z, T3.X, BS:VEC_120/SCL_212
+; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT: SETGT_INT T2.X, T0.Z, literal.x,
+; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y,
+; CM-NEXT: MUL_IEEE T4.Z, PV.W, literal.z,
+; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w,
+; CM-NEXT: -127(nan), 209715200(1.972152e-31)
+; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
+; CM-NEXT: CNDE_INT T3.X, PV.W, T0.W, PV.Z,
+; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
+; CM-NEXT: CNDE_INT T4.Z, PV.X, T1.Y, T0.Z,
+; CM-NEXT: MAX_INT * T0.W, T3.Z, literal.y,
+; CM-NEXT: 209715200(1.972152e-31), -330(nan)
+; CM-NEXT: ADD_INT T6.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T1.Y, T3.Z, literal.y,
+; CM-NEXT: SETGT_UINT T5.Z, T3.Z, literal.z,
+; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.w, BS:VEC_120/SCL_212
+; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT: -229(nan), 209715200(1.972152e-31)
+; CM-NEXT: MUL_IEEE T7.X, PV.W, literal.x,
+; CM-NEXT: MIN_INT T4.Y, T3.Z, literal.y,
+; CM-NEXT: CNDE_INT T6.Z, PV.Z, PV.X, PV.Y,
+; CM-NEXT: SETGT_INT * T2.W, T3.Z, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
+; CM-NEXT: -127(nan), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, T3.Z,
+; CM-NEXT: MIN_INT T1.Y, T0.Z, literal.x,
+; CM-NEXT: ADD_INT T6.Z, PV.Y, literal.y,
+; CM-NEXT: ADD_INT * T3.W, T3.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: 381(5.338947e-43), -254(nan)
+; CM-NEXT: -127(nan), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T8.X, T1.W, PV.W, PV.Z,
+; CM-NEXT: SETGT_INT T4.Y, T3.Z, literal.x,
+; CM-NEXT: ADD_INT T3.Z, PV.Y, literal.y,
+; CM-NEXT: ADD_INT * T1.W, T0.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: 127(1.779649e-43), -254(nan)
+; CM-NEXT: -127(nan), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T9.X, T2.Z, PV.W, PV.Z,
+; CM-NEXT: SETGT_INT T1.Y, T0.Z, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: CNDE_INT T0.Z, PV.Y, T6.X, PV.X,
+; CM-NEXT: CNDE_INT * T0.W, T5.Z, T7.X, T0.W, BS:VEC_201
+; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T6.X, T2.W, PV.W, T1.Z,
+; CM-NEXT: LSHL T5.Y, PV.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, PV.Y, T4.Z, PV.X,
+; CM-NEXT: CNDE_INT * T0.W, T4.X, T3.Y, T2.Y,
+; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T0.X, T2.X, PV.W, T0.X,
+; CM-NEXT: LSHL T2.Y, PV.Z, literal.x,
+; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, T4.Y, PV.X, T3.X, BS:VEC_021/SCL_122
+; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
+; CM-NEXT: MUL_IEEE T2.X, PV.W, PV.Z,
+; CM-NEXT: SETGT T3.Y, literal.x, KC0[3].W,
+; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, T1.Y, PV.X, T5.X,
+; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
+; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
+; CM-NEXT: SETGT T1.Y, literal.x, KC0[3].Z,
+; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0,
+; CM-NEXT: SETGT * T0.W, KC0[3].W, literal.y,
+; CM-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
+; CM-NEXT: CNDE T2.X, PV.W, PV.Z, literal.x,
+; CM-NEXT: CNDE T1.Y, PV.Y, PV.X, 0.0,
+; CM-NEXT: SETGT T0.Z, KC0[3].Z, literal.y,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
+; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T0.X, PV.W, literal.x,
+; CM-NEXT: CNDE T1.Y, PV.Z, PV.Y, literal.y,
+; CM-NEXT: CNDE T0.Z, T1.X, T0.Y, 0.0,
+; CM-NEXT: SETGT * T0.W, KC0[3].Y, literal.z,
+; CM-NEXT: 2(2.802597e-45), 2139095040(INF)
+; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
+; CM-NEXT: CNDE * T1.X, PV.W, PV.Z, literal.x,
+; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
+; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <3 x float> @llvm.exp.v3f32(<3 x float> %in)
store <3 x float> %result, ptr addrspace(1) %out
; VI-SDAG-LABEL: s_exp_v4f32:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; VI-SDAG-NEXT: s_mov_b32 s8, 0xc2fc0000
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000
+; VI-SDAG-NEXT: v_mov_b32_e32 v6, 0x42b17218
+; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0
-; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v2
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc
-; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v0
-; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v3
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v1, s[0:1]
+; VI-SDAG-NEXT: s_and_b32 s2, s7, 0xfffff000
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, s7, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v4
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_and_b32 s2, s6, 0xfffff000
+; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2
+; VI-SDAG-NEXT: v_sub_f32_e32 v7, s6, v7
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-SDAG-NEXT: v_mul_f32_e32 v8, s2, v4
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7
; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
-; VI-SDAG-NEXT: v_exp_f32_e32 v5, v3
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, v2, v3
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[0:1]
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, v5, v2
-; VI-SDAG-NEXT: v_mul_f32_e32 v5, s5, v0
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
-; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v5
-; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
-; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6
-; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; VI-SDAG-NEXT: v_exp_f32_e32 v5, v5
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v3
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0
+; VI-SDAG-NEXT: s_and_b32 s2, s5, 0xfffff000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v5
+; VI-SDAG-NEXT: v_mov_b32_e32 v9, s2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v6
+; VI-SDAG-NEXT: v_sub_f32_e32 v9, s5, v9
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v1, vcc
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v2, v7
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x39a3b295, v9
+; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3fb8a000, v9
+; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10
+; VI-SDAG-NEXT: v_mul_f32_e32 v10, s2, v4
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v7
+; VI-SDAG-NEXT: v_add_f32_e32 v9, v10, v9
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v9
+; VI-SDAG-NEXT: v_exp_f32_e32 v9, v2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v5
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v6
+; VI-SDAG-NEXT: s_and_b32 s2, s4, 0xfffff000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v9, v7
+; VI-SDAG-NEXT: v_mov_b32_e32 v9, s2
+; VI-SDAG-NEXT: v_sub_f32_e32 v9, s4, v9
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x39a3b295, v9
+; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3fb8a000, v9
+; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v7
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v9
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v4, vcc
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, v5, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v4
-; VI-SDAG-NEXT: v_mov_b32_e32 v5, s3
-; VI-SDAG-NEXT: v_mov_b32_e32 v4, s2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v7
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v5
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v6
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
+; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v5
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v6
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0
; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-SDAG-NEXT: s_endpgm
;
; VI-GISEL-LABEL: s_exp_v4f32:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; VI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8a000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x39a3b295
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218
+; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, s5
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v4, s[0:1]
+; VI-GISEL-NEXT: s_and_b32 s2, s4, 0xfffff000
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, s4, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, s2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0
+; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
+; VI-GISEL-NEXT: s_and_b32 s2, s5, 0xfffff000
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s2
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, s5, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, s2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v7, v1
+; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v5, s[0:1]
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6
-; VI-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v2, v3
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v4, vcc
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; VI-GISEL-NEXT: v_exp_f32_e32 v3, v2
+; VI-GISEL-NEXT: s_and_b32 s2, s6, 0xfffff000
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, s2, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v6
+; VI-GISEL-NEXT: v_mov_b32_e32 v6, s2
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, s6, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v9
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, s2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v9, v6
+; VI-GISEL-NEXT: v_rndne_f32_e32 v9, v8
+; VI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v9
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v8, v6
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9
; VI-GISEL-NEXT: v_exp_f32_e32 v6, v6
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1]
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, v6, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s2
+; VI-GISEL-NEXT: s_and_b32 s2, s7, 0xfffff000
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, s2, v3
+; VI-GISEL-NEXT: v_ldexp_f32 v6, v6, v8
+; VI-GISEL-NEXT: v_mov_b32_e32 v8, s2
+; VI-GISEL-NEXT: v_sub_f32_e32 v8, s7, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3fb8a000, v8
+; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v8
+; VI-GISEL-NEXT: v_rndne_f32_e32 v8, v2
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v4
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v8
+; VI-GISEL-NEXT: v_exp_f32_e32 v8, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s5, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v3, v8, v3
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5
+; VI-GISEL-NEXT: v_mov_b32_e32 v5, s1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s0
; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-GISEL-NEXT: s_endpgm
;
; GFX900-SDAG-LABEL: s_exp_v4f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: s_mov_b32 s8, 0xc2fc0000
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x42800000
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x42b17218
; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s7, v0
-; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v1
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
-; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
-; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v2
-; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1]
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v4, s7, v0, -v2
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_fma_f32 v4, s7, v1, v4
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x1f800000
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v6, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, v1, v3
-; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v6, s[0:1]
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v2, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0
-; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s8, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v5, vcc
-; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s8, v0
-; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v7
-; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[0:1]
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v5
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v5
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v9, 0x7f800000
+; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s6, v0
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v3
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v3, v7
+; GFX900-SDAG-NEXT: v_fma_f32 v3, s6, v0, -v3
+; GFX900-SDAG-NEXT: v_fma_f32 v3, s6, v1, v3
+; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v8, v3
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v8, v3
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, v9, v2, vcc
+; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v8, v7
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s5, v0
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v8, v7
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v8
+; GFX900-SDAG-NEXT: v_fma_f32 v7, s5, v0, -v7
+; GFX900-SDAG-NEXT: v_fma_f32 v7, s5, v1, v7
+; GFX900-SDAG-NEXT: v_add_f32_e32 v7, v10, v7
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v7, v7
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v8
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v5
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v6
+; GFX900-SDAG-NEXT: v_ldexp_f32 v7, v7, v8
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v8, s4, v0
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v10, v8
+; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v8
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v11, v8, v10
+; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v11, v0
; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v6, vcc
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v1, v5
-; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, v6, s[0:1]
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v10
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v5
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v6
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
+; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v8
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v5
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v6
; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v5
-; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc
+; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX900-SDAG-NEXT: s_endpgm
;
; GFX900-GISEL-LABEL: s_exp_v4f32:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX900-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000
+; GFX900-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v1
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4
+; GFX900-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s5, v3
+; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v1
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v8, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v7, s5, v2, v7
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v7
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8
; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v5, s[0:1]
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v2, v3
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v4, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
-; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v6, v2
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v8, s5
+; GFX900-GISEL-NEXT: s_mov_b32 s3, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v7
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8
+; GFX900-GISEL-NEXT: v_fma_f32 v9, s6, v3, -v7
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v10, v7
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, s7, v3
+; GFX900-GISEL-NEXT: v_fma_f32 v9, s6, v2, v9
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v7, v7, v10
+; GFX900-GISEL-NEXT: v_fma_f32 v3, s7, v3, -v8
+; GFX900-GISEL-NEXT: v_add_f32_e32 v7, v7, v9
+; GFX900-GISEL-NEXT: v_fma_f32 v2, s7, v2, v3
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v8
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v9, v10
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v7, v7
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v8, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v8, v2
+; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v7, v7, v9
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v8, v3
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX900-GISEL-NEXT: s_endpgm
;
; SI-SDAG-LABEL: s_exp_v4f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-SDAG-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; SI-SDAG-NEXT: s_mov_b32 s2, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42800000
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218
+; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, s7, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s2, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v0
+; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; SI-SDAG-NEXT: v_fma_f32 v4, s7, v0, -v2
+; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: v_fma_f32 v4, s7, v1, v4
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v5, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v3, v1, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v5, s[0:1]
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, s5, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc
-; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[0:1], s2, v0
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v6
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3
+; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s7, v4
+; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, s6, v0
+; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v3
+; SI-SDAG-NEXT: v_sub_f32_e32 v7, v3, v6
+; SI-SDAG-NEXT: v_fma_f32 v3, s6, v0, -v3
+; SI-SDAG-NEXT: v_fma_f32 v3, s6, v1, v3
+; SI-SDAG-NEXT: v_add_f32_e32 v3, v7, v3
+; SI-SDAG-NEXT: v_exp_f32_e32 v7, v3
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s7, v5
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v2, vcc
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v7, v6
+; SI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v0
+; SI-SDAG-NEXT: v_rndne_f32_e32 v7, v6
+; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v7
+; SI-SDAG-NEXT: v_fma_f32 v6, s5, v0, -v6
+; SI-SDAG-NEXT: v_fma_f32 v6, s5, v1, v6
+; SI-SDAG-NEXT: v_add_f32_e32 v6, v9, v6
+; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v5
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v6, v6, v7
+; SI-SDAG-NEXT: v_mul_f32_e32 v7, s4, v0
+; SI-SDAG-NEXT: v_rndne_f32_e32 v9, v7
+; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, -v7
+; SI-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9
+; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v10, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v5, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v4
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1]
-; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000
-; SI-SDAG-NEXT: s_mov_b32 s10, -1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v4
-; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s5, v4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s5, v5
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v7
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v5
+; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s2, -1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
+; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: s_exp_v4f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
-; SI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9
-; SI-GISEL-NEXT: s_mov_b32 s0, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000
+; SI-GISEL-NEXT: s_mov_b32 s2, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, s5
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v4, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v0, v0, s2, -v1
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v4
+; SI-GISEL-NEXT: s_mov_b32 s2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, s5, v3
+; SI-GISEL-NEXT: v_fma_f32 v7, s5, v3, -v1
+; SI-GISEL-NEXT: v_rndne_f32_e32 v8, v1
+; SI-GISEL-NEXT: v_fma_f32 v7, s5, v2, v7
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v7, v8
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v5, s[0:1]
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6
-; SI-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], v2, v3
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v4, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
-; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6
-; SI-GISEL-NEXT: v_exp_f32_e32 v3, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, v6, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
-; SI-GISEL-NEXT: s_mov_b32 s10, -1
-; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000
-; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v5
+; SI-GISEL-NEXT: v_mov_b32_e32 v8, s5
+; SI-GISEL-NEXT: s_mov_b32 s3, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v7
+; SI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v8
+; SI-GISEL-NEXT: v_fma_f32 v9, s6, v3, -v7
+; SI-GISEL-NEXT: v_rndne_f32_e32 v10, v7
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v8
+; SI-GISEL-NEXT: v_mul_f32_e32 v8, s7, v3
+; SI-GISEL-NEXT: v_fma_f32 v9, s6, v2, v9
+; SI-GISEL-NEXT: v_sub_f32_e32 v7, v7, v10
+; SI-GISEL-NEXT: v_fma_f32 v3, s7, v3, -v8
+; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v9
+; SI-GISEL-NEXT: v_fma_f32 v2, s7, v2, v3
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v8
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v9, v10
+; SI-GISEL-NEXT: v_exp_f32_e32 v7, v7
+; SI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v8, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v8, v2
+; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v7, v7, v9
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s6, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v8, v3
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s7, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s7, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; SI-GISEL-NEXT: s_mov_b32 s2, -1
+; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_exp_v4f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1
+; R600-NEXT: ALU 98, @6, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 98, @105, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 24, @204, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
-; R600-NEXT: ALU clause starting at 4:
-; R600-NEXT: MUL_IEEE * T0.W, KC0[4].X, literal.x,
-; R600-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; R600-NEXT: SETGT T1.W, literal.x, PV.W,
-; R600-NEXT: MUL_IEEE * T2.W, KC0[3].W, literal.y,
-; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00)
-; R600-NEXT: SETGT T0.Z, literal.x, PS,
-; R600-NEXT: MUL_IEEE T3.W, KC0[3].Z, literal.y,
-; R600-NEXT: CNDE * T4.W, PV.W, 0.0, literal.z,
-; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00)
-; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
-; R600-NEXT: ADD T0.Y, T0.W, PS,
-; R600-NEXT: SETGT T1.Z, literal.x, PV.W,
-; R600-NEXT: MUL_IEEE T0.W, KC0[3].Y, literal.y,
-; R600-NEXT: CNDE * T4.W, PV.Z, 0.0, literal.z,
-; R600-NEXT: -1023672320(-1.260000e+02), 1069066811(1.442695e+00)
-; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
-; R600-NEXT: ADD T0.X, T2.W, PS,
-; R600-NEXT: CNDE T1.Y, T1.W, 1.0, literal.x, BS:VEC_120/SCL_212
-; R600-NEXT: SETGT T2.Z, literal.y, PV.W,
-; R600-NEXT: CNDE T1.W, PV.Z, 0.0, literal.z,
-; R600-NEXT: EXP_IEEE * T0.Y, PV.Y,
-; R600-NEXT: 528482304(5.421011e-20), -1023672320(-1.260000e+02)
-; R600-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
-; R600-NEXT: ADD T1.X, T3.W, PV.W,
-; R600-NEXT: CNDE T2.Y, T0.Z, 1.0, literal.x,
-; R600-NEXT: CNDE T0.Z, PV.Z, 0.0, literal.y,
-; R600-NEXT: MUL_IEEE T3.W, PS, PV.Y,
-; R600-NEXT: EXP_IEEE * T0.X, PV.X,
-; R600-NEXT: 528482304(5.421011e-20), 1115684864(6.400000e+01)
-; R600-NEXT: ADD T0.Y, T0.W, PV.Z,
-; R600-NEXT: MUL_IEEE T3.Z, PS, PV.Y,
-; R600-NEXT: CNDE T0.W, T1.Z, 1.0, literal.x,
-; R600-NEXT: EXP_IEEE * T0.X, PV.X,
-; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T3.Y, PS, PV.W,
-; R600-NEXT: CNDE T0.W, T2.Z, 1.0, literal.x,
-; R600-NEXT: EXP_IEEE * T0.X, PV.Y,
-; R600-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T3.X, PS, PV.W,
-; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
-; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; R600-NEXT: ALU clause starting at 6:
+; R600-NEXT: AND_INT * T0.W, KC0[3].Z, literal.x,
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
+; R600-NEXT: ADD T1.W, KC0[3].Z, -PV.W,
+; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x,
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: RNDNE T3.W, PS,
+; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x,
+; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS,
+; R600-NEXT: TRUNC * T4.W, PV.W,
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: FLT_TO_INT T0.Z, PS,
+; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
+; R600-NEXT: ADD * T1.W, T2.W, -T3.W,
+; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT: ADD T1.Z, PS, PV.W,
+; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x,
+; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y,
+; R600-NEXT: -330(nan), 381(5.338947e-43)
+; R600-NEXT: ADD_INT T0.X, PS, literal.x,
+; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y,
+; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
+; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w,
+; R600-NEXT: EXP_IEEE * T1.X, PV.Z,
+; R600-NEXT: -254(nan), 204(2.858649e-43)
+; R600-NEXT: 102(1.429324e-43), -229(nan)
+; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x,
+; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y,
+; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x,
+; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z,
+; R600-NEXT: -127(nan), 254(3.559298e-43)
+; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
+; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y,
+; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
+; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X,
+; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z,
+; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
+; R600-NEXT: AND_INT T2.Y, KC0[4].X, literal.x,
+; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
+; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W,
+; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.y,
+; R600-NEXT: -4096(nan), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T0.X, T1.Y, T3.X, PS,
+; R600-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T1.X,
+; R600-NEXT: LSHL T0.Z, PV.Z, literal.x,
+; R600-NEXT: ADD T0.W, KC0[4].X, -PV.Y,
+; R600-NEXT: MUL_IEEE * T1.W, PV.Y, literal.y,
+; R600-NEXT: 23(3.222986e-44), 1069064192(1.442383e+00)
+; R600-NEXT: RNDNE T1.Y, PS,
+; R600-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
+; R600-NEXT: ADD_INT T2.W, PV.Z, literal.y,
+; R600-NEXT: CNDE_INT * T3.W, T4.W, PV.Y, PV.X,
+; R600-NEXT: 967029397(3.122284e-04), 1065353216(1.000000e+00)
+; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W,
+; R600-NEXT: AND_INT T0.Z, KC0[3].W, literal.x,
+; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.y, PV.Z,
+; R600-NEXT: TRUNC * T2.W, PV.Y,
+; R600-NEXT: -4096(nan), 1069064192(1.442383e+00)
+; R600-NEXT: SETGT T0.X, literal.x, KC0[3].Z,
+; R600-NEXT: FLT_TO_INT T3.Y, PS,
+; R600-NEXT: MULADD_IEEE T1.Z, T2.Y, literal.y, PV.W,
+; R600-NEXT: ADD T0.W, T1.W, -T1.Y,
+; R600-NEXT: MUL_IEEE * T1.W, PV.Z, literal.z,
+; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04)
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: RNDNE T1.X, PS,
+; R600-NEXT: AND_INT T1.Y, KC0[3].Y, literal.x,
+; R600-NEXT: ADD T1.Z, PV.W, PV.Z,
+; R600-NEXT: MAX_INT T0.W, PV.Y, literal.y,
+; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.z,
+; R600-NEXT: -4096(nan), -330(nan)
+; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; R600-NEXT: ADD_INT T2.X, PS, literal.x,
+; R600-NEXT: ADD_INT T2.Y, PV.W, literal.y,
+; R600-NEXT: ADD_INT T2.Z, T3.Y, literal.z,
+; R600-NEXT: SETGT_UINT T0.W, T3.Y, literal.w,
+; R600-NEXT: EXP_IEEE * T1.Z, PV.Z,
+; R600-NEXT: -254(nan), 204(2.858649e-43)
+; R600-NEXT: 102(1.429324e-43), -229(nan)
+; R600-NEXT: ADD_INT T3.X, T3.Y, literal.x,
+; R600-NEXT: SETGT_UINT T4.Y, T3.Y, literal.y,
+; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Y, PV.Z,
+; R600-NEXT: SETGT_INT T2.W, T3.Y, literal.x,
+; R600-NEXT: MUL_IEEE * T3.W, PS, literal.z,
+; R600-NEXT: -127(nan), 254(3.559298e-43)
+; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T4.X, T1.Z, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y,
+; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Z, T3.Y,
+; R600-NEXT: CNDE_INT T4.W, PV.Y, PV.X, T2.X,
+; R600-NEXT: SETGT_INT * T5.W, T3.Y, literal.z,
+; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
+; R600-NEXT: ADD T2.X, KC0[3].W, -T0.Z,
+; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W,
+; R600-NEXT: CNDE_INT * T2.Z, T0.W, PV.Y, T3.W,
+; R600-NEXT: ALU clause starting at 105:
+; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.x,
+; R600-NEXT: ADD * T3.W, KC0[3].Y, -T1.Y,
+; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, T1.Y, literal.y,
+; R600-NEXT: CNDE_INT T3.Z, T4.Y, T4.X, PV.W, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE_INT T0.W, T2.W, T2.Z, T1.Z,
+; R600-NEXT: LSHL * T2.W, T3.Y, literal.z,
+; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00)
+; R600-NEXT: ADD_INT T4.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T3.Y, T5.W, PV.W, PV.Z,
+; R600-NEXT: RNDNE T1.Z, PV.Y,
+; R600-NEXT: MULADD_IEEE T0.W, T3.W, literal.y, PV.X, BS:VEC_120/SCL_212
+; R600-NEXT: MUL_IEEE * T2.W, T2.X, literal.z,
+; R600-NEXT: 1065353216(1.000000e+00), 1069064192(1.442383e+00)
+; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT: MULADD_IEEE T2.X, T2.X, literal.x, PS,
+; R600-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.y, PV.W,
+; R600-NEXT: ADD T2.Z, T2.Y, -PV.Z, BS:VEC_120/SCL_212
+; R600-NEXT: MUL_IEEE T0.W, PV.Y, PV.X,
+; R600-NEXT: SETGT * T2.W, literal.z, KC0[4].X,
+; R600-NEXT: 1069064192(1.442383e+00), 967029397(3.122284e-04)
+; R600-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
+; R600-NEXT: CNDE T3.X, PS, PV.W, 0.0,
+; R600-NEXT: ADD T1.Y, PV.Z, PV.Y,
+; R600-NEXT: TRUNC T1.Z, T1.Z,
+; R600-NEXT: MULADD_IEEE T0.W, T0.Z, literal.x, PV.X, BS:VEC_120/SCL_212
+; R600-NEXT: ADD * T1.W, T1.W, -T1.X,
+; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT: SETGT T2.X, KC0[4].X, literal.x,
+; R600-NEXT: ADD T2.Y, PS, PV.W,
+; R600-NEXT: FLT_TO_INT T0.Z, PV.Z,
+; R600-NEXT: TRUNC T0.W, T1.X,
+; R600-NEXT: EXP_IEEE * T1.X, PV.Y,
+; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T4.X, PS, literal.x,
+; R600-NEXT: FLT_TO_INT T1.Y, PV.W,
+; R600-NEXT: MAX_INT T1.Z, PV.Z, literal.y,
+; R600-NEXT: MUL_IEEE T0.W, PS, literal.z,
+; R600-NEXT: EXP_IEEE * T1.W, PV.Y,
+; R600-NEXT: 2130706432(1.701412e+38), -330(nan)
+; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T5.X, PV.W, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, PS, literal.x,
+; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y,
+; R600-NEXT: ADD_INT T2.W, T0.Z, literal.z,
+; R600-NEXT: MAX_INT * T3.W, PV.Y, literal.w,
+; R600-NEXT: 209715200(1.972152e-31), 204(2.858649e-43)
+; R600-NEXT: 102(1.429324e-43), -330(nan)
+; R600-NEXT: SETGT_UINT T6.X, T0.Z, literal.x,
+; R600-NEXT: ADD_INT T3.Y, PS, literal.y,
+; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.z,
+; R600-NEXT: SETGT_UINT T3.W, T1.Y, literal.x,
+; R600-NEXT: MIN_INT * T4.W, T1.Y, literal.w,
+; R600-NEXT: -229(nan), 204(2.858649e-43)
+; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43)
+; R600-NEXT: ADD_INT T7.X, PS, literal.x,
+; R600-NEXT: ADD_INT T4.Y, T1.Y, literal.y,
+; R600-NEXT: SETGT_UINT T3.Z, T1.Y, literal.z,
+; R600-NEXT: CNDE_INT T4.W, PV.W, PV.Y, PV.Z,
+; R600-NEXT: SETGT_INT * T5.W, T1.Y, literal.y,
+; R600-NEXT: -254(nan), -127(nan)
+; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T8.X, PS, PV.W, T1.Y,
+; R600-NEXT: CNDE_INT T3.Y, PV.Z, PV.Y, PV.X,
+; R600-NEXT: SETGT_INT T2.Z, T1.Y, literal.x,
+; R600-NEXT: CNDE_INT T2.W, T6.X, T1.Z, T2.W,
+; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.y,
+; R600-NEXT: 127(1.779649e-43), -127(nan)
+; R600-NEXT: CNDE_INT T7.X, PS, PV.W, T0.Z,
+; R600-NEXT: CNDE_INT T1.Y, PV.Z, PV.X, PV.Y,
+; R600-NEXT: MIN_INT T1.Z, T0.Z, literal.x,
+; R600-NEXT: MUL_IEEE T2.W, T1.W, literal.y,
+; R600-NEXT: MUL_IEEE * T6.W, T2.Y, literal.z,
+; R600-NEXT: 381(5.338947e-43), 2130706432(1.701412e+38)
+; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T8.X, T3.W, PS, T2.Y,
+; R600-NEXT: MUL_IEEE T2.Y, PV.W, literal.x,
+; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y,
+; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z,
+; R600-NEXT: SETGT_UINT * T6.W, T0.Z, literal.w,
+; R600-NEXT: 2130706432(1.701412e+38), -254(nan)
+; R600-NEXT: -127(nan), 254(3.559298e-43)
+; R600-NEXT: CNDE_INT T9.X, PS, PV.W, PV.Z,
+; R600-NEXT: SETGT_INT T3.Y, T0.Z, literal.x,
+; R600-NEXT: CNDE_INT T0.Z, T3.Z, T2.W, PV.Y, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE_INT T1.W, T5.W, PV.X, T1.W, BS:VEC_021/SCL_122
+; R600-NEXT: LSHL * T2.W, T1.Y, literal.y,
+; R600-NEXT: 127(1.779649e-43), 23(3.222986e-44)
+; R600-NEXT: ADD_INT T8.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T1.Y, T2.Z, PV.W, PV.Z,
+; R600-NEXT: CNDE_INT T0.Z, PV.Y, T7.X, PV.X,
+; R600-NEXT: CNDE_INT * T0.W, T6.X, T5.X, T0.W, BS:VEC_021/SCL_122
+; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE * T1.W, T4.X, literal.x,
+; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T4.X, T6.W, T4.X, PV.W,
+; R600-NEXT: CNDE_INT * T2.Y, T4.W, T0.W, T1.X, BS:VEC_120/SCL_212
+; R600-NEXT: ALU clause starting at 204:
+; R600-NEXT: LSHL T0.Z, T0.Z, literal.x,
+; R600-NEXT: MUL_IEEE T0.W, T1.Y, T8.X,
+; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
+; R600-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02)
+; R600-NEXT: CNDE T1.X, PS, PV.W, 0.0,
+; R600-NEXT: SETGT T1.Y, KC0[3].W, literal.x,
+; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
+; R600-NEXT: CNDE_INT T0.W, T3.Y, T2.Y, T4.X, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE * T1.W, T2.X, T3.X, literal.z,
+; R600-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00)
+; R600-NEXT: 2139095040(INF), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T2.X, PV.W, PV.Z,
+; R600-NEXT: SETGT T2.Y, literal.x, KC0[3].Y,
+; R600-NEXT: CNDE T1.Z, PV.Y, PV.X, literal.y,
+; R600-NEXT: CNDE T0.W, T0.X, T0.Y, 0.0,
+; R600-NEXT: SETGT * T2.W, KC0[3].Z, literal.z,
+; R600-NEXT: -1026650416(-1.032789e+02), 2139095040(INF)
+; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
+; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x,
+; R600-NEXT: CNDE T0.W, PV.Y, PV.X, 0.0,
+; R600-NEXT: SETGT * T2.W, KC0[3].Y, literal.y,
+; R600-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
+; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x,
+; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y,
+; R600-NEXT: 2139095040(INF), 2(2.802597e-45)
;
; CM-LABEL: s_exp_v4f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 49, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T0.X
+; CM-NEXT: ALU 97, @6, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 100, @104, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 36, @205, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
-; CM-NEXT: ALU clause starting at 4:
-; CM-NEXT: MUL_IEEE T0.Z, KC0[3].Z, literal.x,
-; CM-NEXT: MUL_IEEE * T0.W, KC0[4].X, literal.x,
-; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T0.Y, KC0[3].W, literal.x,
-; CM-NEXT: SETGT T1.Z, literal.y, PV.W,
-; CM-NEXT: SETGT * T1.W, literal.y, PV.Z,
-; CM-NEXT: 1069066811(1.442695e+00), -1023672320(-1.260000e+02)
-; CM-NEXT: CNDE T1.Y, PV.W, 0.0, literal.x,
-; CM-NEXT: CNDE T2.Z, PV.Z, 0.0, literal.x,
-; CM-NEXT: SETGT * T2.W, literal.y, PV.Y,
-; CM-NEXT: 1115684864(6.400000e+01), -1023672320(-1.260000e+02)
-; CM-NEXT: CNDE T0.X, T1.Z, 1.0, literal.x,
-; CM-NEXT: CNDE T2.Y, PV.W, 0.0, literal.y,
-; CM-NEXT: MUL_IEEE T1.Z, KC0[3].Y, literal.z,
-; CM-NEXT: ADD * T0.W, T0.W, PV.Z,
-; CM-NEXT: 528482304(5.421011e-20), 1115684864(6.400000e+01)
-; CM-NEXT: 1069066811(1.442695e+00), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 6:
+; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W,
+; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y,
+; CM-NEXT: AND_INT * T2.W, KC0[3].W, literal.z,
+; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: ADD T1.Y, KC0[3].W, -PV.W,
+; CM-NEXT: RNDNE T1.Z, PV.Z,
+; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y,
+; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
+; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z,
+; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT: TRUNC T1.X, T1.Z,
+; CM-NEXT: RNDNE T2.Y, PV.W,
+; CM-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
+; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T1.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT: MULADD_IEEE T2.X, T2.W, literal.x, T0.Z,
+; CM-NEXT: ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212
+; CM-NEXT: FLT_TO_INT T0.Z, T1.X,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
+; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x,
+; CM-NEXT: MUL_IEEE T1.Y, T0.X, literal.y,
+; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.z,
+; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.w,
+; CM-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
+; CM-NEXT: -330(nan), 381(5.338947e-43)
+; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.y,
+; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT: -254(nan), 204(2.858649e-43)
+; CM-NEXT: 102(1.429324e-43), -229(nan)
+; CM-NEXT: ADD_INT T4.X, T0.Z, literal.x,
+; CM-NEXT: SETGT_UINT T4.Y, T0.Z, literal.y,
+; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
+; CM-NEXT: CNDE_INT T3.Y, PV.Y, PV.X, T3.X,
+; CM-NEXT: SETGT_INT T0.Z, T0.Z, literal.x,
+; CM-NEXT: MUL_IEEE * T3.W, T1.Y, literal.y,
+; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T3.X, T4.Y, T1.Y, PV.W,
+; CM-NEXT: AND_INT T1.Y, KC0[3].Z, literal.x,
+; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.X, PV.Y,
+; CM-NEXT: CNDE_INT * T0.W, T1.W, T1.X, T0.W,
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X,
+; CM-NEXT: LSHL T3.Y, PV.Z, literal.x,
+; CM-NEXT: TRUNC T1.Z, T2.Y,
+; CM-NEXT: ADD * T0.W, KC0[3].Z, -PV.Y,
+; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x,
+; CM-NEXT: FLT_TO_INT T2.Y, PV.Z,
+; CM-NEXT: ADD_INT T1.Z, PV.Y, literal.y,
+; CM-NEXT: CNDE_INT * T1.W, T0.Z, PV.X, T3.X,
+; CM-NEXT: 967029397(3.122284e-04), 1065353216(1.000000e+00)
+; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
+; CM-NEXT: MIN_INT T3.Y, PV.Y, literal.x,
+; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.X,
+; CM-NEXT: ADD * T0.W, T0.Y, T2.X,
+; CM-NEXT: 381(5.338947e-43), 1069064192(1.442383e+00)
; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T0.W, T0.W,
-; CM-NEXT: CNDE T1.X, T2.W, 1.0, literal.x,
-; CM-NEXT: SETGT T3.Y, literal.y, T1.Z,
-; CM-NEXT: ADD T2.Z, T0.Y, T2.Y,
-; CM-NEXT: MUL_IEEE * T2.W, PV.W, T0.X,
-; CM-NEXT: 528482304(5.421011e-20), -1023672320(-1.260000e+02)
-; CM-NEXT: EXP_IEEE T0.X, T2.Z,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T2.Z,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T2.Z,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T2.Z,
-; CM-NEXT: CNDE T2.X, T3.Y, 0.0, literal.x,
-; CM-NEXT: CNDE T0.Y, T1.W, 1.0, literal.y,
-; CM-NEXT: MUL_IEEE T2.Z, PV.X, T1.X,
-; CM-NEXT: ADD * T0.W, T0.Z, T1.Y,
-; CM-NEXT: 1115684864(6.400000e+01), 528482304(5.421011e-20)
-; CM-NEXT: EXP_IEEE T0.X, T0.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Y, T0.W,
; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE T2.Y, PV.X, T0.Y,
-; CM-NEXT: CNDE T0.Z, T3.Y, 1.0, literal.x,
-; CM-NEXT: ADD * T0.W, T1.Z, T2.X,
-; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: MULADD_IEEE T1.X, T1.Y, literal.x, T0.Z,
+; CM-NEXT: MUL_IEEE T4.Y, PV.Y, literal.y,
+; CM-NEXT: ADD_INT T0.Z, T3.Y, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: MAX_INT * T0.W, T2.Y, literal.w, BS:VEC_201
+; CM-NEXT: 967029397(3.122284e-04), 2130706432(1.701412e+38)
+; CM-NEXT: -254(nan), -330(nan)
+; CM-NEXT: ADD_INT T2.X, T2.Y, literal.x,
+; CM-NEXT: ADD_INT T3.Y, PV.W, literal.y,
+; CM-NEXT: ADD_INT T1.Z, T2.Y, literal.z,
+; CM-NEXT: SETGT_UINT * T0.W, T2.Y, literal.w,
+; CM-NEXT: -127(nan), 204(2.858649e-43)
+; CM-NEXT: 102(1.429324e-43), -229(nan)
+; CM-NEXT: SETGT_UINT T3.X, T2.Y, literal.x,
+; CM-NEXT: CNDE_INT T3.Y, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT T1.Z, T2.Y, literal.y,
+; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: 254(3.559298e-43), -127(nan)
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T4.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT * T3.Y, PV.Z, PV.Y, T2.Y,
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 104:
+; CM-NEXT: CNDE_INT T0.Z, T3.X, T2.X, T0.Z,
+; CM-NEXT: SETGT_INT * T2.W, T2.Y, literal.x,
+; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T2.X, T1.Y, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.W, T3.Y, PV.Z,
+; CM-NEXT: CNDE_INT T0.Z, T0.W, T4.X, T1.W,
+; CM-NEXT: MUL_IEEE * T0.W, T4.Y, literal.y, BS:VEC_201
+; CM-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38)
+; CM-NEXT: AND_INT T4.X, KC0[4].X, literal.x,
+; CM-NEXT: CNDE_INT T2.Y, T3.X, T4.Y, PV.W,
+; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.Y,
+; CM-NEXT: LSHL * T0.W, PV.Y, literal.y,
+; CM-NEXT: -4096(nan), 23(3.222986e-44)
+; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT T0.Y, T2.W, PV.Z, PV.Y,
+; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.y,
+; CM-NEXT: RNDNE * T0.W, T2.X,
+; CM-NEXT: 1065353216(1.000000e+00), 1069064192(1.442383e+00)
+; CM-NEXT: ADD T2.X, T2.X, -PV.W,
+; CM-NEXT: RNDNE T1.Y, PV.Z,
+; CM-NEXT: MUL_IEEE T1.Z, PV.Y, PV.X,
+; CM-NEXT: SETGT * T1.W, literal.x, KC0[3].W,
+; CM-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
+; CM-NEXT: CNDE T3.X, PV.W, PV.Z, 0.0,
+; CM-NEXT: TRUNC T0.Y, T0.W,
+; CM-NEXT: TRUNC T1.Z, PV.Y,
+; CM-NEXT: ADD * T0.W, PV.X, T1.X,
+; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MUL_IEEE * T2.X, PV.X, T0.Z,
-; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: EXP_IEEE * T0.W, T0.W,
+; CM-NEXT: FLT_TO_INT T1.X, T1.Z,
+; CM-NEXT: FLT_TO_INT T0.Y, T0.Y,
+; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
+; CM-NEXT: ADD * T1.W, KC0[4].X, -T4.X,
+; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T2.X, PV.W, literal.x,
+; CM-NEXT: MUL_IEEE T2.Y, T0.W, literal.y,
+; CM-NEXT: MUL_IEEE T2.Z, PV.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T2.W, PV.Y, literal.w,
+; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
+; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
+; CM-NEXT: CNDE_INT T5.X, PV.W, T1.Z, PV.Z,
+; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
+; CM-NEXT: MULADD_IEEE T1.Z, T1.W, literal.y, PV.X,
+; CM-NEXT: MAX_INT * T1.W, T1.X, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), 1069064192(1.442383e+00)
+; CM-NEXT: -330(nan), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T2.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T4.Y, T1.X, literal.y,
+; CM-NEXT: MULADD_IEEE T1.Z, T4.X, literal.z, PV.Z, BS:VEC_120/SCL_212
+; CM-NEXT: MAX_INT * T1.W, T0.Y, literal.w,
+; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT: 967029397(3.122284e-04), -330(nan)
+; CM-NEXT: ADD T4.X, T0.Z, -T1.Y,
+; CM-NEXT: ADD_INT T1.Y, PV.W, literal.x,
+; CM-NEXT: ADD_INT T0.Z, T0.Y, literal.y,
+; CM-NEXT: SETGT_UINT * T1.W, T0.Y, literal.z,
+; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT: -229(nan), 0(0.000000e+00)
+; CM-NEXT: SETGT_UINT T6.X, T1.X, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT T0.Z, T0.Y, literal.y,
+; CM-NEXT: ADD * T3.W, PV.X, T1.Z,
+; CM-NEXT: -229(nan), -127(nan)
+; CM-NEXT: EXP_IEEE T1.X (MASKED), T3.W,
+; CM-NEXT: EXP_IEEE T1.Y (MASKED), T3.W,
+; CM-NEXT: EXP_IEEE T1.Z, T3.W,
+; CM-NEXT: EXP_IEEE * T1.W (MASKED), T3.W,
+; CM-NEXT: CNDE_INT T4.X, T0.Z, T1.Y, T0.Y,
+; CM-NEXT: CNDE_INT T1.Y, T6.X, T2.X, T4.Y, BS:VEC_120/SCL_212
+; CM-NEXT: SETGT_INT T2.Z, T1.X, literal.x,
+; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.y,
+; CM-NEXT: -127(nan), 209715200(1.972152e-31)
+; CM-NEXT: MUL_IEEE T2.X, T1.Z, literal.x,
+; CM-NEXT: MUL_IEEE T4.Y, PV.W, literal.y,
+; CM-NEXT: CNDE_INT T3.Z, PV.Z, PV.Y, T1.X,
+; CM-NEXT: MIN_INT * T4.W, T1.X, literal.z,
+; CM-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT: MIN_INT T7.X, T0.Y, literal.x,
+; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
+; CM-NEXT: ADD_INT T4.Z, T1.X, literal.z,
+; CM-NEXT: SETGT_UINT * T4.W, T1.X, literal.w,
+; CM-NEXT: 381(5.338947e-43), -254(nan)
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: CNDE_INT T8.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT: SETGT_INT T1.Y, T1.X, literal.x,
+; CM-NEXT: ADD_INT T4.Z, PV.X, literal.y,
+; CM-NEXT: ADD_INT * T5.W, T0.Y, literal.z,
+; CM-NEXT: 127(1.779649e-43), -254(nan)
+; CM-NEXT: -127(nan), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T1.X, T2.W, PV.W, PV.Z,
+; CM-NEXT: CNDE_INT T5.Y, PV.Y, T3.Z, PV.X,
+; CM-NEXT: CNDE_INT T3.Z, T6.X, T4.Y, T3.W,
+; CM-NEXT: MUL_IEEE * T2.W, T2.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT: SETGT_INT T6.X, T0.Y, literal.x,
+; CM-NEXT: CNDE_INT T0.Y, T4.W, T2.X, PV.W,
+; CM-NEXT: CNDE_INT * T1.Z, T2.Z, PV.Z, T1.Z,
+; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 205:
+; CM-NEXT: LSHL * T2.W, T5.Y, literal.x,
+; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T2.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT T0.Y, T1.Y, T1.Z, T0.Y,
+; CM-NEXT: CNDE_INT * T1.Z, T6.X, T4.X, T1.X,
+; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT * T1.W, T1.W, T3.Y, T2.Y,
+; CM-NEXT: CNDE_INT T1.X, T0.Z, PV.W, T0.W,
+; CM-NEXT: LSHL T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: MUL_IEEE T0.Z, T0.Y, T2.X,
+; CM-NEXT: SETGT * T0.W, literal.y, KC0[4].X,
+; CM-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02)
+; CM-NEXT: CNDE T2.X, PV.W, PV.Z, 0.0,
+; CM-NEXT: SETGT T0.Y, KC0[4].X, literal.x,
+; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, T6.X, PV.X, T5.X,
+; CM-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00)
+; CM-NEXT: SETGT T1.X, KC0[3].W, literal.x,
+; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z,
+; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z,
+; CM-NEXT: CNDE * T0.W, PV.Y, PV.X, literal.z,
+; CM-NEXT: 1118925336(8.872284e+01), -1026650416(-1.032789e+02)
+; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
+; CM-NEXT: SETGT T2.X, literal.x, KC0[3].Y,
+; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, 0.0,
+; CM-NEXT: CNDE T0.Z, PV.X, T3.X, literal.y,
+; CM-NEXT: SETGT * T1.W, KC0[3].Z, literal.z,
+; CM-NEXT: -1026650416(-1.032789e+02), 2139095040(INF)
+; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
+; CM-NEXT: CNDE T0.Y, PV.W, PV.Y, literal.x,
+; CM-NEXT: CNDE T1.Z, PV.X, T0.X, 0.0,
+; CM-NEXT: SETGT * T1.W, KC0[3].Y, literal.y,
+; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
+; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
+; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <4 x float> @llvm.exp.v4f32(<4 x float> %in)
store <4 x float> %result, ptr addrspace(1) %out
}
define float @v_exp_f32(float %in) {
-; GCN-SDAG-LABEL: v_exp_f32:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32:
}
define float @v_exp_fabs_f32(float %in) {
-; GCN-SDAG-LABEL: v_exp_fabs_f32:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GCN-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_fabs_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
+; VI-SDAG-NEXT: v_sub_f32_e64 v4, |v0|, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_fabs_f32:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
-; GCN-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_fabs_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e64 v2, |v0|, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_fabs_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_fabs_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fabs_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fabs_f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fabs_f32:
}
define float @v_exp_fneg_fabs_f32(float %in) {
-; GCN-SDAG-LABEL: v_exp_fneg_fabs_f32:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
-; GCN-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_fneg_fabs_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
+; VI-SDAG-NEXT: v_sub_f32_e64 v4, -|v0|, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_fneg_fabs_f32:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
-; GCN-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_fneg_fabs_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_or_b32_e32 v1, 0x80000000, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e64 v2, -|v0|, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_fneg_fabs_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_fneg_fabs_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, -|v0|, s4
+; GFX900-GISEL-NEXT: v_fma_f32 v2, -|v0|, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_fma_f32 v2, -|v0|, v3, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fneg_fabs_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
-; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_fabs_f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e64 v1, -|v0|, s4
+; SI-GISEL-NEXT: v_fma_f32 v2, -|v0|, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_fma_f32 v2, -|v0|, v3, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fneg_fabs_f32:
}
define float @v_exp_fneg_f32(float %in) {
-; GCN-SDAG-LABEL: v_exp_fneg_f32:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_fneg_f32:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
+; VI-SDAG-NEXT: v_sub_f32_e64 v4, -v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_fneg_f32:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
-; GCN-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_fneg_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e64 v2, -v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_fneg_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0xbfb8aa3b, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_fneg_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, -v0, s4
+; GFX900-GISEL-NEXT: v_fma_f32 v2, -v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_fma_f32 v2, -v0, v3, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fneg_f32:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xbfb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e64 v1, -v0, s4
+; SI-GISEL-NEXT: v_fma_f32 v2, -v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_fma_f32 v2, -v0, v3, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fneg_f32:
}
define float @v_exp_f32_fast(float %in) {
-; GCN-LABEL: v_exp_f32_fast:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_fast:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_fast:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_fast:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_fast:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_fast:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_fast:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_fast:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_fast:
; R600: ; %bb.0:
}
define float @v_exp_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
-; GCN-LABEL: v_exp_f32_unsafe_math_attr:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_unsafe_math_attr:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_unsafe_math_attr:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_unsafe_math_attr:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_unsafe_math_attr:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_unsafe_math_attr:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_unsafe_math_attr:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_unsafe_math_attr:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_unsafe_math_attr:
; R600: ; %bb.0:
}
define float @v_exp_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
-; GCN-LABEL: v_exp_f32_approx_fn_attr:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_approx_fn_attr:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_approx_fn_attr:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_approx_fn_attr:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_approx_fn_attr:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_approx_fn_attr:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_approx_fn_attr:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_approx_fn_attr:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_approx_fn_attr:
; R600: ; %bb.0:
}
define float @v_exp_f32_ninf(float %in) {
-; GCN-SDAG-LABEL: v_exp_f32_ninf:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_ninf:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32_ninf:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_ninf:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_ninf:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_ninf:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_ninf:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_ninf:
}
define float @v_exp_f32_afn(float %in) {
-; GCN-LABEL: v_exp_f32_afn:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_afn:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_afn:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_afn:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_afn:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_afn:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_afn:
; R600: ; %bb.0:
}
define float @v_exp_f32_afn_dynamic(float %in) #1 {
-; GCN-LABEL: v_exp_f32_afn_dynamic:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_afn_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_afn_dynamic:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_afn_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_afn_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_afn_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_afn_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_afn_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_afn_dynamic:
; R600: ; %bb.0:
}
define float @v_fabs_exp_f32_afn(float %in) {
-; GCN-SDAG-LABEL: v_fabs_exp_f32_afn:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GCN-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_fabs_exp_f32_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
+; VI-SDAG-NEXT: v_sub_f32_e64 v4, |v0|, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_fabs_exp_f32_afn:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
-; GCN-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_fabs_exp_f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e64 v2, |v0|, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_fabs_exp_f32_afn:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_fabs_exp_f32_afn:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_fabs_exp_f32_afn:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, s4
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_fabs_exp_f32_afn:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
+; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_fabs_exp_f32_afn:
}
define float @v_exp_f32_daz(float %in) #0 {
-; GCN-LABEL: v_exp_f32_daz:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_daz:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_daz:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_daz:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_daz:
; R600: ; %bb.0:
}
define float @v_exp_f32_nnan(float %in) {
-; GCN-SDAG-LABEL: v_exp_f32_nnan:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_nnan:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32_nnan:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_nnan:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_nnan:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_nnan:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_nnan:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_nnan:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_nnan:
}
define float @v_exp_f32_nnan_daz(float %in) #0 {
-; GCN-LABEL: v_exp_f32_nnan_daz:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_nnan_daz:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_exp_f32_nnan_daz:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_nnan_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_nnan_daz:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_exp_f32_nnan_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_nnan_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_nnan_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_nnan_daz:
; R600: ; %bb.0:
}
define float @v_exp_f32_nnan_dynamic(float %in) #1 {
-; GCN-SDAG-LABEL: v_exp_f32_nnan_dynamic:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_nnan_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32_nnan_dynamic:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_nnan_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_nnan_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_nnan_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_nnan_dynamic:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_nnan_dynamic:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_nnan_dynamic:
}
define float @v_exp_f32_ninf_daz(float %in) #0 {
-; GCN-LABEL: v_exp_f32_ninf_daz:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_ninf_daz:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_ninf_daz:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_ninf_daz:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_ninf_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_ninf_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_ninf_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_ninf_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_ninf_daz:
; R600: ; %bb.0:
}
define float @v_exp_f32_ninf_dynamic(float %in) #1 {
-; GCN-SDAG-LABEL: v_exp_f32_ninf_dynamic:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_ninf_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32_ninf_dynamic:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_ninf_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_ninf_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_ninf_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_ninf_dynamic:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_ninf_dynamic:
}
define float @v_exp_f32_nnan_ninf(float %in) {
-; GCN-SDAG-LABEL: v_exp_f32_nnan_ninf:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_nnan_ninf:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32_nnan_ninf:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_nnan_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_nnan_ninf:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_nnan_ninf:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_nnan_ninf:
}
define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
-; GCN-LABEL: v_exp_f32_nnan_ninf_daz:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-LABEL: v_exp_f32_nnan_ninf_daz:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_nnan_ninf_daz:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_nnan_ninf_daz:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_nnan_ninf_daz:
; R600: ; %bb.0:
}
define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
-; GCN-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_nnan_ninf_dynamic:
}
define float @v_exp_f32_dynamic_mode(float %in) #1 {
-; GCN-SDAG-LABEL: v_exp_f32_dynamic_mode:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GCN-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v2
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_dynamic_mode:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32_dynamic_mode:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; GCN-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v1
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_dynamic_mode:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_dynamic_mode:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_dynamic_mode:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_dynamic_mode:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x1f800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_dynamic_mode:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
+; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_dynamic_mode:
}
define float @v_exp_f32_undef() {
-; GCN-SDAG-LABEL: v_exp_f32_undef:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, 0x7fc00000
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_undef:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_rndne_f32_e32 v0, 0
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, 0, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v1, 0x7fc00000, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32_undef:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_undef:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_sub_f32_e64 v0, s4, 0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8a000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x39a3b295
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v2, v0
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_undef:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000
+; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_undef:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
+; GFX900-GISEL-NEXT: v_fma_f32 v1, s4, v2, v1
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v0
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_undef:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, 0x7fc00000
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000
+; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000
+; SI-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_undef:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
+; SI-GISEL-NEXT: v_fma_f32 v1, s4, v2, v1
+; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v0
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_undef:
}
define float @v_exp_f32_from_fpext_f16(i16 %src.i) {
-; GCN-SDAG-LABEL: v_exp_f32_from_fpext_f16:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_from_fpext_f16:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v3
+; VI-SDAG-NEXT: v_rndne_f32_e32 v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v4
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32_from_fpext_f16:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_from_fpext_f16:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v2
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_from_fpext_f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_from_fpext_f16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_from_fpext_f16:
}
define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
-; GCN-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_from_fpext_math_f16:
}
define float @v_exp_f32_from_fpext_bf16(bfloat %src) {
-; GCN-SDAG-LABEL: v_exp_f32_from_fpext_bf16:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_from_fpext_bf16:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_bf16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v2
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_from_fpext_bf16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_from_fpext_bf16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_from_fpext_bf16:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_fast:
}
define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
-; GCN-LABEL: v_exp_f32_from_fpext_math_f16_daz:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_add_f16_e32 v0, v0, v1
-; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-SDAG-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v3
+; VI-SDAG-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_mad_f32 v1, v1, s4, -v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
+; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s4, -v2
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s5, v3
+; GFX900-SDAG-NEXT: v_mad_mix_f32 v0, v0, s4, -v2 op_sel_hi:[1,0,0]
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v3
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v1, s4, -v3
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v3
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v1, v2, v4
+; GFX900-GISEL-NEXT: v_mad_mix_f32 v0, v0, s4, -v3 op_sel_hi:[1,0,0]
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
+; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_from_fpext_math_f16_daz:
; GCN-LABEL: v_exp_f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
-; GCN-NEXT: v_exp_f16_e32 v0, v0
+; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-NEXT: v_exp_f32_e32 v0, v0
+; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
}
define half @v_exp_fabs_f16(half %in) {
-; GCN-SDAG-LABEL: v_exp_fabs_f16:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
-; GCN-SDAG-NEXT: v_mul_f16_e64 v0, |v0|, s4
-; GCN-SDAG-NEXT: v_exp_f16_e32 v0, v0
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN-GISEL-LABEL: v_exp_fabs_f16:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5
-; GCN-GISEL-NEXT: v_mul_f16_e64 v0, |v0|, v1
-; GCN-GISEL-NEXT: v_exp_f16_e32 v0, v0
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_exp_fabs_f16:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-NEXT: v_exp_f32_e32 v0, v0
+; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fabs_f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fabs_f16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GCN-SDAG-LABEL: v_exp_fneg_fabs_f16:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: s_mov_b32 s4, 0xbdc5
-; GCN-SDAG-NEXT: v_mul_f16_e64 v0, |v0|, s4
-; GCN-SDAG-NEXT: v_exp_f16_e32 v0, v0
+; GCN-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GCN-GISEL-LABEL: v_exp_fneg_fabs_f16:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5
-; GCN-GISEL-NEXT: v_mul_f16_e64 v0, -|v0|, v1
-; GCN-GISEL-NEXT: v_exp_f16_e32 v0, v0
+; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fneg_fabs_f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_fabs_f16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GCN-SDAG-LABEL: v_exp_fneg_f16:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f16_e32 v0, 0xbdc5, v0
-; GCN-SDAG-NEXT: v_exp_f16_e32 v0, v0
+; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GCN-GISEL-LABEL: v_exp_fneg_f16:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5
-; GCN-GISEL-NEXT: v_mul_f16_e64 v0, -v0, v1
-; GCN-GISEL-NEXT: v_exp_f16_e32 v0, v0
+; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fneg_f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_f16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f16_fast:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 0x3dc5
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
; VI-SDAG-LABEL: v_exp_v2f16:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5
-; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
-; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
+; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: v_exp_v2f16:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5
-; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0
-; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1
-; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
+; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-SDAG-LABEL: v_exp_v2f16:
-; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
-; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
-; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v0
-; GFX900-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
-; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-GISEL-LABEL: v_exp_v2f16:
-; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5
-; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0
-; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0
-; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
-; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_exp_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_v2f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5]
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_v2f16:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
; VI-SDAG-LABEL: v_exp_fabs_v2f16:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5
-; VI-SDAG-NEXT: v_mul_f16_sdwa v1, |v0|, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SDAG-NEXT: v_mul_f16_e64 v0, |v0|, s4
-; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
+; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5
-; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0
-; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1
-; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
+; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_exp_fabs_v2f16:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
-; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
-; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
-; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v0
-; GFX900-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v0|
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
-; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5
-; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0
-; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0
-; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fabs_v2f16:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5]
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fabs_v2f16:
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v1, v4
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, v2, v1
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fabs_v2f16:
; VI-SDAG-LABEL: v_exp_fneg_fabs_v2f16:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5
-; VI-SDAG-NEXT: v_mul_f16_sdwa v1, -|v0|, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SDAG-NEXT: v_mul_f16_e64 v0, -|v0|, s4
-; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
+; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5
-; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0
-; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1
-; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
+; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_exp_fneg_fabs_v2f16:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbdc5
-; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
-; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v0
-; GFX900-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v0|
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
-; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5
-; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0
-; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0
-; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fneg_fabs_v2f16:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v0
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[4:5]
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_fabs_v2f16:
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v1, v4
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, v2, v1
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fneg_fabs_v2f16:
; VI-SDAG-LABEL: v_exp_fneg_v2f16:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5
-; VI-SDAG-NEXT: v_mul_f16_sdwa v1, -v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SDAG-NEXT: v_mul_f16_e64 v0, -v0, s4
-; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
+; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5
-; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0
-; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1
-; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
+; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_exp_fneg_v2f16:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbdc5
-; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
-; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v0
-; GFX900-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -v0
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
-; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5
-; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0
-; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0
-; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_fneg_v2f16:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v0
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[4:5]
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_fneg_v2f16:
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v1, v4
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, v2, v1
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_fneg_v2f16:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5]
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5]
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_v2f16_fast:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, 0x3dc5
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-LABEL: v_exp_v3f16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_mov_b32_e32 v3, 0x3dc5
-; VI-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
-; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT: v_exp_f16_e32 v2, v2
-; VI-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
-; VI-NEXT: v_exp_f16_e32 v1, v1
+; VI-NEXT: v_cvt_f32_f16_e32 v2, v0
+; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-NEXT: v_exp_f32_e32 v2, v2
+; VI-NEXT: v_exp_f32_e32 v0, v0
+; VI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; VI-NEXT: v_exp_f32_e32 v1, v1
+; VI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-NEXT: v_cvt_f16_f32_e32 v1, v1
; VI-NEXT: v_or_b32_e32 v0, v2, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-SDAG-LABEL: v_exp_v3f16:
-; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
-; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
-; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX900-SDAG-NEXT: v_exp_f16_e32 v2, v2
-; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0
-; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
-; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1
-; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v2, v0
-; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-GISEL-LABEL: v_exp_v3f16:
-; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: s_movk_i32 s4, 0x3dc5
-; GFX900-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
-; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v2
-; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
-; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
-; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
-; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_exp_v3f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX900-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-NEXT: v_pack_b32_f16 v0, v2, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_v3f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v5
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
-; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v2
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[4:5]
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[4:5]
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_v3f16:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: s_mov_b32 s4, 0xc2fc0000
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v5
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x1f800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2fc0000
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v6
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v4, vcc
; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v5, vcc
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v6
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
}
define <3 x half> @v_exp_v3f16_afn(<3 x half> %in) {
-; VI-LABEL: v_exp_v3f16_afn:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_mov_b32_e32 v3, 0x3dc5
-; VI-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
-; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT: v_exp_f16_e32 v2, v2
-; VI-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
-; VI-NEXT: v_exp_f16_e32 v1, v1
-; VI-NEXT: v_or_b32_e32 v0, v2, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp_v3f16_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v0
+; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_exp_v3f16_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3dc5
+; VI-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
+; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_exp_f16_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
+; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1
+; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_exp_v3f16_afn:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
-; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
-; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX900-SDAG-NEXT: v_exp_f16_e32 v2, v2
-; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0
-; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
-; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
-; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x1f800000
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v5
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
-; SI-SDAG-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v2
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[4:5]
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v4, vcc
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
-; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v4, s[4:5]
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_v3f16_afn:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 0x3dc5
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v3
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX900: {{.*}}