When the floating point constants are whole numbers they have no decimal point so look like integers, but mean something very different in something like an 'and' instruction.
Ideally we would just print a decimal point and a 0, but I couldn't see how to make APFloat::toString do that.
llvm-svn: 345488
static void printConstant(const APFloat &Flt, raw_ostream &CS) {
SmallString<32> Str;
- Flt.toString(Str);
+ // Force scientific notation to distinquish from integers.
+ Flt.toString(Str, 0, 0);
CS << Str;
}
; X32-NEXT: cvtdq2ps %xmm0, %xmm1
; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: cmpltps %xmm2, %xmm0
-; X32-NEXT: movaps {{.*#+}} xmm3 = <1,1,u,u>
+; X32-NEXT: movaps {{.*#+}} xmm3 = <1.0E+0,1.0E+0,u,u>
; X32-NEXT: addps %xmm1, %xmm3
; X32-NEXT: movaps %xmm1, %xmm4
; X32-NEXT: blendvps %xmm0, %xmm3, %xmm4
; X64-NEXT: cvtdq2ps %xmm0, %xmm1
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: cmpltps %xmm2, %xmm0
-; X64-NEXT: movaps {{.*#+}} xmm3 = <1,1,u,u>
+; X64-NEXT: movaps {{.*#+}} xmm3 = <1.0E+0,1.0E+0,u,u>
; X64-NEXT: addps %xmm1, %xmm3
; X64-NEXT: movaps %xmm1, %xmm4
; X64-NEXT: blendvps %xmm0, %xmm3, %xmm4
define void @ui_to_fp_conv(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
; CHECK-LABEL: ui_to_fp_conv:
; CHECK: # %bb.0: # %allocas
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0]
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,1.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: movups %xmm1, 16(%rsi)
; CHECK-NEXT: movups %xmm0, (%rsi)
define <16 x float> @fneg(<16 x float> %a) nounwind {
; CHECK-LABEL: fneg:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [-0,-0,-0,-0,-0,-0,-0,-0]
+; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vxorps %ymm2, %ymm1, %ymm1
; CHECK-NEXT: retq
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: _e2:
; X32: ## %bb.0: ## %entry
-; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
+; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3]
; X32-NEXT: retl
;
; X64-LABEL: _e2:
; X64: ## %bb.0: ## %entry
-; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
+; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3]
; X64-NEXT: retq
entry:
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
; AVX2-LABEL: ld0_hi0_lo1_4f64:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1]
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
entry:
; AVX2-LABEL: ld1_hi0_hi1_4f64:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
entry:
; AVX2-LABEL: ld0_hi0_lo1_8f32:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1]
-; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
+; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
entry:
; AVX2-LABEL: ld1_hi0_hi1_8f32:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
-; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
+; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
entry:
; X32-LABEL: test3:
; X32: # %bb.0: # %entry
; X32-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
-; X32-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
+; X32-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; X32-NEXT: vxorps %xmm1, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test3:
; X64: # %bb.0: # %entry
; X64-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
-; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
+; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0
; X64-NEXT: retq
entry:
define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
; X32-AVX2-LABEL: V113:
; X32-AVX2: ## %bb.0: ## %entry
-; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
+; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3]
; X32-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X32-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: V113:
; X64-AVX2: ## %bb.0: ## %entry
-; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
+; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3]
; X64-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: retq
;
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: _e2:
; X32: ## %bb.0:
-; X32-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
+; X32-NEXT: vbroadcastss {{.*#+}} xmm0 = [-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3]
; X32-NEXT: retl
;
; X64-LABEL: _e2:
; X64: ## %bb.0:
-; X64-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
+; X64-NEXT: vbroadcastss {{.*#+}} xmm0 = [-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3]
; X64-NEXT: retq
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
define <8 x float> @test_fxor_8f32(<8 x float> %a) {
; AVX512F-LABEL: test_fxor_8f32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0]
+; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX512F-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
;
; AVX512BW-LABEL: test_fxor_8f32:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0]
+; AVX512BW-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_fxor_8f32:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0]
+; AVX512DQ-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX512DQ-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512DQ-NEXT: retq
;
define <8 x double> @test_mm512_fnmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; CHECK-LABEL: test_mm512_fnmsub_round_pd:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm3 = [-0,-0,-0,-0,-0,-0,-0,-0]
+; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: vpxorq %zmm3, %zmm0, %zmm4
; CHECK-NEXT: vpxorq %zmm3, %zmm2, %zmm0
; CHECK-NEXT: vfmadd231pd {rn-sae}, %zmm4, %zmm1, %zmm0
define <8 x double> @test_mm512_fnmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; CHECK-LABEL: test_mm512_fnmsub_pd:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm3 = [-0,-0,-0,-0,-0,-0,-0,-0]
+; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: vpxorq %zmm3, %zmm0, %zmm4
; CHECK-NEXT: vpxorq %zmm3, %zmm2, %zmm0
; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm0 = (zmm1 * zmm4) + zmm0
define <16 x float> @test_mm512_fnmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; CHECK-LABEL: test_mm512_fnmsub_round_ps:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm3 = [-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0]
+; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: vpxord %zmm3, %zmm0, %zmm4
; CHECK-NEXT: vpxord %zmm3, %zmm2, %zmm0
; CHECK-NEXT: vfmadd231ps {rn-sae}, %zmm4, %zmm1, %zmm0
define <16 x float> @test_mm512_fnmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; CHECK-LABEL: test_mm512_fnmsub_ps:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm3 = [-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0]
+; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: vpxord %zmm3, %zmm0, %zmm4
; CHECK-NEXT: vpxord %zmm3, %zmm2, %zmm0
; CHECK-NEXT: vfmadd231ps {{.*#+}} zmm0 = (zmm1 * zmm4) + zmm0
; X86-NEXT: subl $8, %esp
; X86-NEXT: movb 8(%ebp), %al
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vbroadcastsd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
+; X86-NEXT: vbroadcastsd {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; X86-NEXT: vmovapd %zmm0, %zmm1 {%k1}
; X86-NEXT: vextractf64x4 $1, %zmm1, %ymm0
; X86-NEXT: vmulpd %ymm0, %ymm1, %ymm0
; X64-LABEL: test_mm512_mask_reduce_mul_pd:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vbroadcastsd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1]
+; X64-NEXT: vbroadcastsd {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; X64-NEXT: vmovapd %zmm0, %zmm1 {%k1}
; X64-NEXT: vextractf64x4 $1, %zmm1, %ymm0
; X64-NEXT: vmulpd %ymm0, %ymm1, %ymm0
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vbroadcastss {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; X86-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; X86-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; X86-NEXT: vextractf64x4 $1, %zmm1, %ymm0
; X86-NEXT: vmulps %ymm0, %ymm1, %ymm0
; X64-LABEL: test_mm512_mask_reduce_mul_ps:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vbroadcastss {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; X64-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; X64-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; X64-NEXT: vextractf64x4 $1, %zmm1, %ymm0
; X64-NEXT: vmulps %ymm0, %ymm1, %ymm0
define <4 x float> @f4xf32_f64(<4 x float> %a) {
; AVX-LABEL: f4xf32_f64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [0.0078125018626451492,0.0078125018626451492]
+; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [7.8125018626451492E-3,7.8125018626451492E-3]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0
;
; ALL32-LABEL: f4xf32_f64:
; ALL32: # %bb.0:
-; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = [0.0078125018626451492,0.0078125018626451492]
+; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = [7.8125018626451492E-3,7.8125018626451492E-3]
; ALL32-NEXT: # xmm1 = mem[0,0]
; ALL32-NEXT: vaddps %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vdivps %xmm0, %xmm1, %xmm0
;
; AVX-64-LABEL: f4xf32_f64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [0.0078125018626451492,0.0078125018626451492]
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [7.8125018626451492E-3,7.8125018626451492E-3]
; AVX-64-NEXT: # xmm1 = mem[0,0]
; AVX-64-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-64-NEXT: vdivps %xmm0, %xmm1, %xmm0
define <8 x float> @f8xf32_f64(<8 x float> %a) {
; AVX-LABEL: f8xf32_f64:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
+; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3]
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retl
;
; ALL32-LABEL: f8xf32_f64:
; ALL32: # %bb.0:
-; ALL32-NEXT: vbroadcastsd {{.*#+}} ymm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
+; ALL32-NEXT: vbroadcastsd {{.*#+}} ymm1 = [7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3]
; ALL32-NEXT: vaddps %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vdivps %ymm0, %ymm1, %ymm0
; ALL32-NEXT: retl
;
; AVX-64-LABEL: f8xf32_f64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
+; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3]
; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0
; AVX-64-NEXT: retq
define <8 x float> @f8xf32_f128(<8 x float> %a) {
; AVX-LABEL: f8xf32_f128:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4,1,2,3,4,1,2,3]
+; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX-NEXT: # ymm1 = mem[0,1,0,1]
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0
;
; ALL32-LABEL: f8xf32_f128:
; ALL32: # %bb.0:
-; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4,1,2,3,4,1,2,3]
+; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
; ALL32-NEXT: vaddps %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vdivps %ymm0, %ymm1, %ymm0
;
; AVX-64-LABEL: f8xf32_f128:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4,1,2,3,4,1,2,3]
+; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX-64-NEXT: # ymm1 = mem[0,1,0,1]
; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0
;
; ALL64-LABEL: f8xf32_f128:
; ALL64: # %bb.0:
-; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4,1,2,3,4,1,2,3]
+; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
; ALL64-NEXT: vaddps %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vdivps %ymm0, %ymm1, %ymm0
define <16 x float> @f16xf32_f64(<16 x float> %a) {
; AVX-LABEL: f16xf32_f64:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
+; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3]
; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
;
; AVX2-LABEL: f16xf32_f64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3]
; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0
;
; AVX512-LABEL: f16xf32_f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
+; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm1 = [7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3]
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retl
;
; AVX-64-LABEL: f16xf32_f64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492]
+; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3,7.8125018626451492E-3]
; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
define <16 x float> @f16xf32_f128(<16 x float> %a) {
; AVX-LABEL: f16xf32_f128:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,1,2,3,4,1,2,3]
+; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX-NEXT: # ymm2 = mem[0,1,0,1]
; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
;
; AVX2-LABEL: f16xf32_f128:
; AVX2: # %bb.0:
-; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,1,2,3,4,1,2,3]
+; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
;
; AVX512-LABEL: f16xf32_f128:
; AVX512: # %bb.0:
-; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3]
+; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
;
; AVX-64-LABEL: f16xf32_f128:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,1,2,3,4,1,2,3]
+; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
;
; AVX2-64-LABEL: f16xf32_f128:
; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,1,2,3,4,1,2,3]
+; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
;
; AVX512F-64-LABEL: f16xf32_f128:
; AVX512F-64: # %bb.0:
-; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3]
+; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0
define <16 x float> @f16xf32_f256(<16 x float> %a) {
; AVX-LABEL: f16xf32_f256:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [8,1,2,3,4,5,6,7]
+; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
;
; AVX2-LABEL: f16xf32_f256:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [8,1,2,3,4,5,6,7]
+; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0
;
; AVX512-LABEL: f16xf32_f256:
; AVX512: # %bb.0:
-; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7]
+; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
;
; AVX-64-LABEL: f16xf32_f256:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [8,1,2,3,4,5,6,7]
+; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
;
; AVX2-64-LABEL: f16xf32_f256:
; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vmovaps {{.*#+}} ymm2 = [8,1,2,3,4,5,6,7]
+; AVX2-64-NEXT: vmovaps {{.*#+}} ymm2 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
;
; AVX512F-64-LABEL: f16xf32_f256:
; AVX512F-64: # %bb.0:
-; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7]
+; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0
define <4 x double> @f4xf64_f128(<4 x double> %a) {
; AVX-LABEL: f4xf64_f128:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2,1,2,1]
+; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX-NEXT: # ymm1 = mem[0,1,0,1]
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vdivpd %ymm0, %ymm1, %ymm0
;
; ALL32-LABEL: f4xf64_f128:
; ALL32: # %bb.0:
-; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2,1,2,1]
+; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
; ALL32-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vdivpd %ymm0, %ymm1, %ymm0
;
; AVX-64-LABEL: f4xf64_f128:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2,1,2,1]
+; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX-64-NEXT: # ymm1 = mem[0,1,0,1]
; AVX-64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-64-NEXT: vdivpd %ymm0, %ymm1, %ymm0
;
; ALL64-LABEL: f4xf64_f128:
; ALL64: # %bb.0:
-; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2,1,2,1]
+; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
; ALL64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vdivpd %ymm0, %ymm1, %ymm0
define <8 x double> @f8xf64_f128(<8 x double> %a) {
; AVX-LABEL: f8xf64_f128:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2,1,2,1]
+; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX-NEXT: # ymm2 = mem[0,1,0,1]
; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0
;
; AVX2-LABEL: f8xf64_f128:
; AVX2: # %bb.0:
-; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2,1,2,1]
+; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0
;
; AVX512-LABEL: f8xf64_f128:
; AVX512: # %bb.0:
-; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2,1,2,1,2,1,2,1]
+; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0
;
; AVX-64-LABEL: f8xf64_f128:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2,1,2,1]
+; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
;
; AVX2-64-LABEL: f8xf64_f128:
; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2,1,2,1]
+; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
;
; AVX512F-64-LABEL: f8xf64_f128:
; AVX512F-64: # %bb.0:
-; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2,1,2,1,2,1,2,1]
+; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0
define <8 x double> @f8xf64_f256(<8 x double> %a) {
; AVX-LABEL: f8xf64_f256:
; AVX: # %bb.0:
-; AVX-NEXT: vmovapd {{.*#+}} ymm2 = [4,1,2,3]
+; AVX-NEXT: vmovapd {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0
;
; AVX2-LABEL: f8xf64_f256:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovapd {{.*#+}} ymm2 = [4,1,2,3]
+; AVX2-NEXT: vmovapd {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vdivpd %ymm0, %ymm2, %ymm0
;
; AVX512-LABEL: f8xf64_f256:
; AVX512: # %bb.0:
-; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4,1,2,3,4,1,2,3]
+; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0
;
; AVX-64-LABEL: f8xf64_f256:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vmovapd {{.*#+}} ymm2 = [4,1,2,3]
+; AVX-64-NEXT: vmovapd {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
;
; AVX2-64-LABEL: f8xf64_f256:
; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vmovapd {{.*#+}} ymm2 = [4,1,2,3]
+; AVX2-64-NEXT: vmovapd {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
;
; AVX512F-64-LABEL: f8xf64_f256:
; AVX512F-64: # %bb.0:
-; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4,1,2,3,4,1,2,3]
+; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0]
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0
define <2 x double> @test_negative_zero_2(<2 x double> %A) {
; SSE2-LABEL: test_negative_zero_2:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movapd {{.*#+}} xmm1 = <u,-0>
+; SSE2-NEXT: movapd {{.*#+}} xmm1 = <u,-0.0E+0>
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
define <4 x float> @combine_vec_fabs_constant() {
; SSE-LABEL: combine_vec_fabs_constant:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,2,2]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_fabs_constant:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,2]
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
; AVX-NEXT: retq
%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> <float 0.0, float -0.0, float 2.0, float -2.0>)
ret <4 x float> %1
;
; AVX-LABEL: combine_vec_fcopysign_neg_constant0:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>)
;
; AVX-LABEL: combine_vec_fcopysign_neg_constant1:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>)
;
; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
;
; AVX-LABEL: combine_vec_fcopysign_fabs_mag:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0,-0,-0,-0]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
;
; AVX-LABEL: combine_vec_fcopysign_fneg_mag:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0,-0,-0,-0]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
;
; AVX-LABEL: combine_vec_fcopysign_fcopysign_mag:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0,-0,-0,-0]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
;
; AVX-LABEL: combine_vec_fcopysign_fcopysign_sgn:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
; SSE-NEXT: movaps {{.*#+}} xmm7
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: andps %xmm7, %xmm2
-; SSE-NEXT: movaps {{.*#+}} xmm8 = [-0,-0]
+; SSE-NEXT: movaps {{.*#+}} xmm8 = [-0.0E+0,-0.0E+0]
; SSE-NEXT: andps %xmm8, %xmm4
; SSE-NEXT: orps %xmm4, %xmm2
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; AVX-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vcvtps2pd %xmm1, %ymm1
-; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0,-0,-0,-0]
+; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
; SSE-NEXT: movaps {{.*#+}} xmm5
; SSE-NEXT: andps %xmm5, %xmm0
; SSE-NEXT: cvtsd2ss %xmm1, %xmm6
-; SSE-NEXT: movaps {{.*#+}} xmm4 = [-0,-0,-0,-0]
+; SSE-NEXT: movaps {{.*#+}} xmm4 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; SSE-NEXT: andps %xmm4, %xmm6
; SSE-NEXT: orps %xmm6, %xmm0
; SSE-NEXT: movshdup {{.*#+}} xmm6 = xmm3[1,1,3,3]
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0,-0,-0,-0]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; X32-NEXT: movapd {{.*#+}} xmm1 = [4503599627370496,4503599627370496]
+; X32-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; X32-NEXT: orpd %xmm1, %xmm2
; X32-NEXT: subpd %xmm1, %xmm2
; X32-NEXT: cvtpd2ps %xmm2, %xmm1
; X64-NEXT: movd %esi, %xmm1
; X64-NEXT: movd %edi, %xmm2
; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; X64-NEXT: movdqa {{.*#+}} xmm1 = [4503599627370496,4503599627370496]
+; X64-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; X64-NEXT: por %xmm1, %xmm2
; X64-NEXT: subpd %xmm1, %xmm2
; X64-NEXT: cvtpd2ps %xmm2, %xmm1
; X32: # %bb.0:
; X32-NEXT: xorps %xmm2, %xmm2
; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; X32-NEXT: movaps {{.*#+}} xmm0 = [4503599627370496,4503599627370496]
+; X32-NEXT: movaps {{.*#+}} xmm0 = [4.503599627370496E+15,4.503599627370496E+15]
; X32-NEXT: orps %xmm0, %xmm2
; X32-NEXT: subpd %xmm0, %xmm2
; X32-NEXT: cvtpd2ps %xmm2, %xmm0
; X64: # %bb.0:
; X64-NEXT: xorps %xmm2, %xmm2
; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; X64-NEXT: movaps {{.*#+}} xmm0 = [4503599627370496,4503599627370496]
+; X64-NEXT: movaps {{.*#+}} xmm0 = [4.503599627370496E+15,4.503599627370496E+15]
; X64-NEXT: orps %xmm0, %xmm2
; X64-NEXT: subpd %xmm0, %xmm2
; X64-NEXT: cvtpd2ps %xmm2, %xmm0
;
; KNL-LABEL: test11:
; KNL: # %bb.0: # %entry
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; KNL-NEXT: vxorps %xmm3, %xmm2, %xmm3
; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
; KNL-NEXT: kmovw %edi, %k1
define <4 x float> @test_mm_fnmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test_mm_fnmsub_ps:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovaps {{.*#+}} xmm3 = [-0,-0,-0,-0]
+; CHECK-NEXT: vmovaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: vxorps %xmm3, %xmm0, %xmm4
; CHECK-NEXT: vxorps %xmm3, %xmm2, %xmm0
; CHECK-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0
define <2 x double> @test_mm_fnmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: test_mm_fnmsub_pd:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovapd {{.*#+}} xmm3 = [-0,-0]
+; CHECK-NEXT: vmovapd {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0]
; CHECK-NEXT: vxorpd %xmm3, %xmm0, %xmm4
; CHECK-NEXT: vxorpd %xmm3, %xmm2, %xmm0
; CHECK-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0
define <8 x float> @test_mm256_fnmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
; CHECK-LABEL: test_mm256_fnmsub_ps:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0]
+; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: vxorps %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vxorps %ymm3, %ymm2, %ymm0
; CHECK-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0
define <4 x double> @test_mm256_fnmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
; CHECK-LABEL: test_mm256_fnmsub_pd:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [-0,-0,-0,-0]
+; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: vxorpd %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vxorpd %ymm3, %ymm2, %ymm0
; CHECK-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0
define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; FMA-INFS-NEXT: retq
;
; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
+; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX512-INFS-NEXT: retq
define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA-INFS-NEXT: retq
;
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
+; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX512-INFS-NEXT: retq
define <4 x float> @test_v4f32_mul_y_sub_one_x_undefs(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <1,u,1,1>
+; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <1.0E+0,u,1.0E+0,1.0E+0>
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA-INFS-NEXT: retq
;
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <1,u,1,1>
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <1.0E+0,u,1.0E+0,1.0E+0>
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
+; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX512-INFS-NEXT: retq
define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1,-1,-1,-1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; FMA-INFS-NEXT: retq
;
; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1,-1,-1,-1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1,-1,-1,-1]
+; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX512-INFS-NEXT: retq
define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1,-1,-1,-1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA-INFS-NEXT: retq
;
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1,-1,-1,-1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1,-1,-1,-1]
+; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX512-INFS-NEXT: retq
define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <-1,-1,u,-1>
+; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <-1.0E+0,-1.0E+0,u,-1.0E+0>
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA-INFS-NEXT: retq
;
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <-1,-1,u,-1>
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <-1.0E+0,-1.0E+0,u,-1.0E+0>
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1,-1,-1,-1]
+; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX512-INFS-NEXT: retq
define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
; FMA-INFS-LABEL: test_v4f32_interp:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
; FMA-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
;
; FMA4-INFS-LABEL: test_v4f32_interp:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
; FMA4-INFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
;
; AVX512-INFS-LABEL: test_v4f32_interp:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1]
+; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
; AVX512-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
; FMA-INFS-LABEL: test_v8f32_interp:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
;
; FMA4-INFS-LABEL: test_v8f32_interp:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
; FMA4-INFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
;
; AVX512-INFS-LABEL: test_v8f32_interp:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
; AVX512-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
; FMA-INFS-LABEL: test_v2f64_interp:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1,1]
+; FMA-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
; FMA-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
;
; FMA4-INFS-LABEL: test_v2f64_interp:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1,1]
+; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
; FMA4-INFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
;
; AVX512-INFS-LABEL: test_v2f64_interp:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1,1]
+; AVX512-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
; AVX512-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
; AVX512-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
; FMA-INFS-LABEL: test_v4f64_interp:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1,1,1,1]
+; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
; FMA-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
;
; FMA4-INFS-LABEL: test_v4f64_interp:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1,1,1,1]
+; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
; FMA4-INFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
;
; AVX512-INFS-LABEL: test_v4f64_interp:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1,1,1,1]
+; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
; AVX512-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
; FMA-INFS-LABEL: test_v16f32_mul_add_x_one_y:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
;
; FMA4-INFS-LABEL: test_v16f32_mul_add_x_one_y:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_one:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
+; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
;
; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_one:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
+; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
; FMA-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
;
; FMA4-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
+; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
;
; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
+; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
; FMA-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
;
; FMA4-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
;
; AVX512-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-INFS-NEXT: vsubps %zmm0, %zmm2, %zmm0
; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
; AVX512-INFS-NEXT: retq
define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
+; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
;
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
+; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
;
; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1]
+; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-INFS-NEXT: vsubpd %zmm0, %zmm2, %zmm0
; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
; AVX512-INFS-NEXT: retq
define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
; FMA-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
;
; FMA4-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
;
; AVX512-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm2 = [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1]
+; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; AVX512-INFS-NEXT: vsubps %zmm0, %zmm2, %zmm0
; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
; AVX512-INFS-NEXT: retq
define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
+; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
;
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
+; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
;
; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm2 = [-1,-1,-1,-1,-1,-1,-1,-1]
+; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; AVX512-INFS-NEXT: vsubpd %zmm0, %zmm2, %zmm0
; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
; AVX512-INFS-NEXT: retq
define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
;
; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
+; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
;
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
+; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
;
; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
+; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
;
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
+; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
; FMA-INFS-LABEL: test_v16f32_interp:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm6 = [1,1,1,1,1,1,1,1]
+; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7
; FMA-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6
; FMA-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3
;
; FMA4-INFS-LABEL: test_v16f32_interp:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm6 = [1,1,1,1,1,1,1,1]
+; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7
; FMA4-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6
; FMA4-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3
;
; AVX512-INFS-LABEL: test_v16f32_interp:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-INFS-NEXT: vsubps %zmm2, %zmm3, %zmm3
; AVX512-INFS-NEXT: vmulps %zmm3, %zmm1, %zmm1
; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm2 * zmm0) + zmm1
define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
; FMA-INFS-LABEL: test_v8f64_interp:
; FMA-INFS: # %bb.0:
-; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm6 = [1,1,1,1]
+; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7
; FMA-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6
; FMA-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3
;
; FMA4-INFS-LABEL: test_v8f64_interp:
; FMA4-INFS: # %bb.0:
-; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm6 = [1,1,1,1]
+; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7
; FMA4-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6
; FMA4-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3
;
; AVX512-INFS-LABEL: test_v8f64_interp:
; AVX512-INFS: # %bb.0:
-; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm3 = [1,1,1,1,1,1,1,1]
+; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-INFS-NEXT: vsubpd %zmm2, %zmm3, %zmm3
; AVX512-INFS-NEXT: vmulpd %zmm3, %zmm1, %zmm1
; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm2 * zmm0) + zmm1
; FMA: # %bb.0:
; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
; FMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0
-; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [-0,-0,-0,-0]
+; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; FMA-NEXT: vxorpd %ymm2, %ymm0, %ymm0
; FMA-NEXT: vxorpd %ymm2, %ymm1, %ymm1
; FMA-NEXT: retq
; FMA4: # %bb.0:
; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0
-; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [-0,-0,-0,-0]
+; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; FMA4-NEXT: vxorpd %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vxorpd %ymm2, %ymm1, %ymm1
; FMA4-NEXT: retq
define <4 x float> @constant_fold_fmul_v4f32(<4 x float> %x) {
; CHECK-LABEL: constant_fold_fmul_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,8,8,8]
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8.0E+0,8.0E+0,8.0E+0,8.0E+0]
; CHECK-NEXT: retq
%y = fmul <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, <float 2.0, float 2.0, float 2.0, float 2.0>
ret <4 x float> %y
define <4 x float> @constant_fold_fmul_v4f32_undef(<4 x float> %x) {
; CHECK-LABEL: constant_fold_fmul_v4f32_undef:
; CHECK: # %bb.0:
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,NaN,8,NaN]
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8.0E+0,NaN,8.0E+0,NaN]
; CHECK-NEXT: retq
%y = fmul <4 x float> <float 4.0, float undef, float 4.0, float 4.0>, <float 2.0, float 2.0, float 2.0, float undef>
ret <4 x float> %y
define <4 x float> @test1() {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [-1,0,-1,0]
+; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,-1.0E+0,0.0E+0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = trunc <4 x i3> <i3 -1, i3 -22, i3 7, i3 8> to <4 x i1>
%2 = sitofp <4 x i1> %1 to <4 x float>
; X32SSE2-LABEL: elt1_v4f32:
; X32SSE2: # %bb.0:
; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <42,u,2,3>
+; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; X32SSE2-NEXT: retl
;
; X64SSE2-LABEL: elt1_v4f32:
; X64SSE2: # %bb.0:
-; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <42,u,2,3>
+; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; X64SSE2-NEXT: retq
;
; X32SSE4-LABEL: elt1_v4f32:
; X32SSE4: # %bb.0:
-; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = <42,u,2,3>
+; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
; X32SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; X32SSE4-NEXT: retl
;
; X64SSE4-LABEL: elt1_v4f32:
; X64SSE4: # %bb.0:
-; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = <42,u,2,3>
+; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
; X64SSE4-NEXT: movaps %xmm1, %xmm0
; X64SSE4-NEXT: retq
;
; X32AVX-LABEL: elt1_v4f32:
; X32AVX: # %bb.0:
-; X32AVX-NEXT: vmovaps {{.*#+}} xmm0 = <42,u,2,3>
+; X32AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4.2E+1,u,2.0E+0,3.0E+0>
; X32AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; X32AVX-NEXT: retl
;
; X64AVX-LABEL: elt1_v4f32:
; X64AVX: # %bb.0:
-; X64AVX-NEXT: vmovaps {{.*#+}} xmm1 = <42,u,2,3>
+; X64AVX-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u,2.0E+0,3.0E+0>
; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; X64AVX-NEXT: retq
%ins = insertelement <4 x float> <float 42.0, float 1.0, float 2.0, float 3.0>, float %x, i32 1
define <2 x double> @elt1_v2f64(double %x) {
; X32SSE-LABEL: elt1_v2f64:
; X32SSE: # %bb.0:
-; X32SSE-NEXT: movapd {{.*#+}} xmm0 = <42,u>
+; X32SSE-NEXT: movapd {{.*#+}} xmm0 = <4.2E+1,u>
; X32SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; X32SSE-NEXT: retl
;
; X64SSE-LABEL: elt1_v2f64:
; X64SSE: # %bb.0:
-; X64SSE-NEXT: movaps {{.*#+}} xmm1 = <42,u>
+; X64SSE-NEXT: movaps {{.*#+}} xmm1 = <4.2E+1,u>
; X64SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64SSE-NEXT: movaps %xmm1, %xmm0
; X64SSE-NEXT: retq
;
; X32AVX-LABEL: elt1_v2f64:
; X32AVX: # %bb.0:
-; X32AVX-NEXT: vmovapd {{.*#+}} xmm0 = <42,u>
+; X32AVX-NEXT: vmovapd {{.*#+}} xmm0 = <4.2E+1,u>
; X32AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; X32AVX-NEXT: retl
;
; X64AVX-LABEL: elt1_v2f64:
; X64AVX: # %bb.0:
-; X64AVX-NEXT: vmovaps {{.*#+}} xmm1 = <42,u>
+; X64AVX-NEXT: vmovaps {{.*#+}} xmm1 = <4.2E+1,u>
; X64AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; X64AVX-NEXT: retq
%ins = insertelement <2 x double> <double 42.0, double 1.0>, double %x, i32 1
; X32SSE2-LABEL: elt6_v8f32:
; X32SSE2: # %bb.0:
; X32SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
+; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
-; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
+; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
; X32SSE2-NEXT: retl
;
; X64SSE2-LABEL: elt6_v8f32:
; X64SSE2: # %bb.0:
-; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
+; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
-; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
+; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
; X64SSE2-NEXT: retq
;
; X32SSE4-LABEL: elt6_v8f32:
; X32SSE4: # %bb.0:
-; X32SSE4-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
+; X32SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
; X32SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
-; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
+; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
; X32SSE4-NEXT: retl
;
; X64SSE4-LABEL: elt6_v8f32:
; X64SSE4: # %bb.0:
-; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
+; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = <4.0E+0,5.0E+0,u,7.0E+0>
; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3]
-; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
+; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
; X64SSE4-NEXT: retq
;
; X32AVX-LABEL: elt6_v8f32:
; X32AVX: # %bb.0:
-; X32AVX-NEXT: vmovaps {{.*#+}} ymm0 = <42,1,2,3,4,5,u,7>
+; X32AVX-NEXT: vmovaps {{.*#+}} ymm0 = <4.2E+1,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,u,7.0E+0>
; X32AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; X32AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
; X32AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
;
; X64AVX-LABEL: elt6_v8f32:
; X64AVX: # %bb.0:
-; X64AVX-NEXT: vmovaps {{.*#+}} ymm1 = <42,1,2,3,4,5,u,7>
+; X64AVX-NEXT: vmovaps {{.*#+}} ymm1 = <4.2E+1,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,u,7.0E+0>
; X64AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
; X64AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1],xmm0[0],xmm2[3]
; X64AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
define <8 x double> @elt1_v8f64(double %x) {
; X32SSE-LABEL: elt1_v8f64:
; X32SSE: # %bb.0:
-; X32SSE-NEXT: movapd {{.*#+}} xmm0 = <42,u>
+; X32SSE-NEXT: movapd {{.*#+}} xmm0 = <4.2E+1,u>
; X32SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; X32SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3]
-; X32SSE-NEXT: movaps {{.*#+}} xmm2 = [4,5]
-; X32SSE-NEXT: movaps {{.*#+}} xmm3 = [6,7]
+; X32SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
+; X32SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
+; X32SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
; X32SSE-NEXT: retl
;
; X64SSE-LABEL: elt1_v8f64:
; X64SSE: # %bb.0:
-; X64SSE-NEXT: movaps {{.*#+}} xmm4 = <42,u>
+; X64SSE-NEXT: movaps {{.*#+}} xmm4 = <4.2E+1,u>
; X64SSE-NEXT: movlhps {{.*#+}} xmm4 = xmm4[0],xmm0[0]
-; X64SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3]
-; X64SSE-NEXT: movaps {{.*#+}} xmm2 = [4,5]
-; X64SSE-NEXT: movaps {{.*#+}} xmm3 = [6,7]
+; X64SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0]
+; X64SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0]
+; X64SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0]
; X64SSE-NEXT: movaps %xmm4, %xmm0
; X64SSE-NEXT: retq
;
; X32AVX2-LABEL: elt1_v8f64:
; X32AVX2: # %bb.0:
-; X32AVX2-NEXT: vmovapd {{.*#+}} ymm0 = <42,u,2,3>
+; X32AVX2-NEXT: vmovapd {{.*#+}} ymm0 = <4.2E+1,u,2.0E+0,3.0E+0>
; X32AVX2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm0[0],mem[0]
; X32AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; X32AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,6,7]
+; X32AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; X32AVX2-NEXT: retl
;
; X64AVX2-LABEL: elt1_v8f64:
; X64AVX2: # %bb.0:
-; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <42,u,2,3>
+; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <4.2E+1,u,2.0E+0,3.0E+0>
; X64AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; X64AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,6,7]
+; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; X64AVX2-NEXT: retq
;
; X32AVX512F-LABEL: elt1_v8f64:
; X32AVX512F: # %bb.0:
-; X32AVX512F-NEXT: vmovapd {{.*#+}} zmm0 = <42,u,2,3,4,5,6,7>
+; X32AVX512F-NEXT: vmovapd {{.*#+}} zmm0 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
; X32AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm0[0],mem[0]
; X32AVX512F-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; X32AVX512F-NEXT: retl
;
; X64AVX512F-LABEL: elt1_v8f64:
; X64AVX512F: # %bb.0:
-; X64AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <42,u,2,3,4,5,6,7>
+; X64AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = <4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0>
; X64AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; X64AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
; X64AVX512F-NEXT: retq
; X86-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
; X86-SSE-NEXT: psrlq $63, %xmm4
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
-; X86-SSE-NEXT: movapd {{.*#+}} xmm2 = [4.9406564584124654E-324,-0]
+; X86-SSE-NEXT: movapd {{.*#+}} xmm2 = [4.9406564584124654E-324,-0.0E+0]
; X86-SSE-NEXT: xorpd %xmm2, %xmm0
; X86-SSE-NEXT: psubq %xmm2, %xmm0
; X86-SSE-NEXT: psrlq $63, %xmm3
; CHECK-NEXT: rsqrtps %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm0, %xmm2
; CHECK-NEXT: mulps %xmm1, %xmm2
-; CHECK-NEXT: movaps {{.*#+}} xmm3 = [-0.5,-0.5,-0.5,-0.5]
+; CHECK-NEXT: movaps {{.*#+}} xmm3 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
; CHECK-NEXT: movaps %xmm2, %xmm4
; CHECK-NEXT: mulps %xmm3, %xmm4
; CHECK-NEXT: mulps %xmm1, %xmm2
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-3,-3,-3,-3]
+; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
; CHECK-NEXT: addps %xmm1, %xmm2
; CHECK-NEXT: mulps %xmm4, %xmm2
; CHECK-NEXT: xorps %xmm4, %xmm4
; CHECK-NEXT: subl $28, %esp
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: movaps {{.*#+}} xmm2 = [-0,-0,-0,-0]
+; CHECK-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: xorps %xmm2, %xmm0
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: xorps %xmm2, %xmm1
define <8 x double> @test(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: test:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = <u,0.82071743224100002,0.82071743224100002,0.82071743224100002>
+; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = <u,8.2071743224100002E-1,8.2071743224100002E-1,8.2071743224100002E-1>
; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],ymm2[1],ymm1[3],ymm2[3]
-; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [0.82071743224100002,0.82071743224100002]
+; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [8.2071743224100002E-1,8.2071743224100002E-1]
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; CHECK-NEXT: retq
%1 = shufflevector <4 x double> %a, <4 x double> <double undef, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C>, <8 x i32> <i32 6, i32 5, i32 2, i32 3, i32 5, i32 1, i32 3, i32 7>
define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
; SSE-LABEL: v4f32_no_estimate:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: divps %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v4f32_no_estimate:
; AVX-RECIP: # %bb.0:
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vdivps %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v4f32_no_estimate:
; FMA-RECIP: # %bb.0:
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vdivps %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_no_estimate:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [9:9.50]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-LABEL: v4f32_no_estimate:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v4f32_no_estimate:
; SANDY: # %bb.0:
-; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: v4f32_no_estimate:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:7.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vdivps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v4f32_no_estimate:
; KNL: # %bb.0:
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; KNL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:7.00]
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: v4f32_no_estimate:
; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; SKX-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [11:3.00]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm0, %xmm2
; SSE-NEXT: mulps %xmm2, %xmm0
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: subps %xmm0, %xmm1
; SSE-NEXT: mulps %xmm2, %xmm1
; SSE-NEXT: addps %xmm2, %xmm1
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
;
; BTVER2-LABEL: v4f32_one_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-LABEL: v4f32_one_step:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
; KNL-LABEL: v4f32_one_step:
; KNL: # %bb.0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
; KNL-NEXT: retq # sched: [7:1.00]
; SSE-NEXT: rcpps %xmm0, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm3
; SSE-NEXT: mulps %xmm2, %xmm3
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm1, %xmm4
; SSE-NEXT: subps %xmm3, %xmm4
; SSE-NEXT: mulps %xmm2, %xmm4
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm2
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %xmm2, %xmm3, %xmm2
; AVX-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm2
; AVX-RECIP-NEXT: vaddps %xmm2, %xmm1, %xmm1
; FMA-RECIP-LABEL: v4f32_two_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vmovaps %xmm1, %xmm3
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
; BDVER2-LABEL: v4f32_two_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
;
; BTVER2-LABEL: v4f32_two_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [6:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
; HASWELL-LABEL: v4f32_two_step:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1
; KNL-LABEL: v4f32_two_step:
; KNL: # %bb.0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
; SKX-LABEL: v4f32_two_step:
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
; SSE-LABEL: v8f32_no_estimate:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm2 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm2, %xmm3
; SSE-NEXT: divps %xmm0, %xmm3
; SSE-NEXT: divps %xmm1, %xmm2
;
; AVX-RECIP-LABEL: v8f32_no_estimate:
; AVX-RECIP: # %bb.0:
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vdivps %ymm0, %ymm1, %ymm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v8f32_no_estimate:
; FMA-RECIP: # %bb.0:
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vdivps %ymm0, %ymm1, %ymm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_no_estimate:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [9:19.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-LABEL: v8f32_no_estimate:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [38:38.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_no_estimate:
; SANDY: # %bb.0:
-; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:28.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: v8f32_no_estimate:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:14.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vdivps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v8f32_no_estimate:
; KNL: # %bb.0:
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; KNL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:14.00]
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: v8f32_no_estimate:
; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SKX-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [11:5.00]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm0, %xmm4
; SSE-NEXT: mulps %xmm4, %xmm0
-; SSE-NEXT: movaps {{.*#+}} xmm2 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm2, %xmm3
; SSE-NEXT: subps %xmm0, %xmm3
; SSE-NEXT: mulps %xmm4, %xmm3
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
;
; BTVER2-LABEL: v8f32_one_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-LABEL: v8f32_one_step:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
; KNL-LABEL: v8f32_one_step:
; KNL: # %bb.0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
; KNL-NEXT: retq # sched: [7:1.00]
; SSE-NEXT: rcpps %xmm0, %xmm3
; SSE-NEXT: movaps %xmm0, %xmm4
; SSE-NEXT: mulps %xmm3, %xmm4
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm1, %xmm5
; SSE-NEXT: subps %xmm4, %xmm5
; SSE-NEXT: mulps %xmm3, %xmm5
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm2
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm2, %ymm3, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm2
; AVX-RECIP-NEXT: vaddps %ymm2, %ymm1, %ymm1
; FMA-RECIP-LABEL: v8f32_two_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vmovaps %ymm1, %ymm3
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
; BDVER2-LABEL: v8f32_two_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
;
; BTVER2-LABEL: v8f32_two_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00]
; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
; HASWELL-LABEL: v8f32_two_step:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1
; KNL-LABEL: v8f32_two_step:
; KNL: # %bb.0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
; SKX-LABEL: v8f32_two_step:
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50]
define <16 x float> @v16f32_no_estimate(<16 x float> %x) #0 {
; SSE-LABEL: v16f32_no_estimate:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm4 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm4, %xmm5
; SSE-NEXT: divps %xmm0, %xmm5
; SSE-NEXT: movaps %xmm4, %xmm6
;
; AVX-RECIP-LABEL: v16f32_no_estimate:
; AVX-RECIP: # %bb.0:
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vdivps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vdivps %ymm1, %ymm2, %ymm1
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v16f32_no_estimate:
; FMA-RECIP: # %bb.0:
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vdivps %ymm0, %ymm2, %ymm0
; FMA-RECIP-NEXT: vdivps %ymm1, %ymm2, %ymm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_no_estimate:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [9:19.00]
; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [9:19.00]
; BDVER2-NEXT: retq # sched: [5:1.00]
;
; BTVER2-LABEL: v16f32_no_estimate:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [38:38.00]
; BTVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [38:38.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v16f32_no_estimate:
; SANDY: # %bb.0:
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [29:28.00]
; SANDY-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [29:28.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: v16f32_no_estimate:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [21:14.00]
; HASWELL-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [21:14.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; HASWELL-NO-FMA-LABEL: v16f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vdivps %ymm0, %ymm2, %ymm0
; HASWELL-NO-FMA-NEXT: vdivps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v16f32_no_estimate:
; KNL: # %bb.0:
-; KNL-NEXT: vbroadcastss {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00]
+; KNL-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00]
; KNL-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [21:14.00]
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: v16f32_no_estimate:
; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [8:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50]
; SKX-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [18:10.00]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
; SSE-NEXT: movaps %xmm0, %xmm5
; SSE-NEXT: rcpps %xmm0, %xmm6
; SSE-NEXT: mulps %xmm6, %xmm5
-; SSE-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm3, %xmm0
; SSE-NEXT: subps %xmm5, %xmm0
; SSE-NEXT: mulps %xmm6, %xmm0
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm0
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm3, %ymm0
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm2, %ymm0
; FMA-RECIP-LABEL: v16f32_one_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
; BDVER2-LABEL: v16f32_one_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [5:2.00]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1 # sched: [5:0.50]
;
; BTVER2-LABEL: v16f32_one_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
; BTVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [2:2.00]
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-LABEL: v16f32_one_step:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vrcpps %ymm1, %ymm4 # sched: [11:2.00]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 sched: [5:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0
; SSE-NEXT: rcpps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm1, %xmm6
; SSE-NEXT: mulps %xmm0, %xmm6
-; SSE-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm3, %xmm7
; SSE-NEXT: subps %xmm6, %xmm7
; SSE-NEXT: mulps %xmm0, %xmm7
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm3
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm3, %ymm4, %ymm3
; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm3
; AVX-RECIP-NEXT: vaddps %ymm3, %ymm2, %ymm2
; FMA-RECIP-LABEL: v16f32_two_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vmovaps %ymm2, %ymm4
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
; BDVER2-LABEL: v16f32_two_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
;
; BTVER2-LABEL: v16f32_two_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [2:2.00]
; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
; HASWELL-LABEL: v16f32_two_step:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm3
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3
; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3
; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2
; KNL-LABEL: v16f32_two_step:
; KNL: # %bb.0:
; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00]
+; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00]
; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [5:0.50]
; SKX-LABEL: v16f32_two_step:
; SKX: # %bb.0:
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [8:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50]
; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50]
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm0, %xmm2
; SSE-NEXT: mulps %xmm2, %xmm0
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: subps %xmm0, %xmm1
; SSE-NEXT: mulps %xmm2, %xmm1
; SSE-NEXT: addps %xmm2, %xmm1
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
;
; BTVER2-LABEL: v4f32_one_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-LABEL: v4f32_one_step2:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; KNL-LABEL: v4f32_one_step2:
; KNL: # %bb.0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm0, %xmm1
; SSE-NEXT: mulps %xmm1, %xmm0
-; SSE-NEXT: movaps {{.*#+}} xmm2 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: subps %xmm0, %xmm2
; SSE-NEXT: mulps %xmm1, %xmm2
; SSE-NEXT: addps %xmm1, %xmm2
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,3,4]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
; SSE-NEXT: mulps %xmm2, %xmm0
; SSE-NEXT: mulps %xmm2, %xmm0
; SSE-NEXT: retq
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
;
; BTVER2-LABEL: v4f32_one_step_2_divs:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-LABEL: v4f32_one_step_2_divs:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; KNL-LABEL: v4f32_one_step_2_divs:
; KNL: # %bb.0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50]
; SSE-NEXT: rcpps %xmm0, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm3
; SSE-NEXT: mulps %xmm2, %xmm3
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm1, %xmm4
; SSE-NEXT: subps %xmm3, %xmm4
; SSE-NEXT: mulps %xmm2, %xmm4
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm2
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %xmm2, %xmm3, %xmm2
; AVX-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm2
; AVX-RECIP-NEXT: vaddps %xmm2, %xmm1, %xmm1
; FMA-RECIP-LABEL: v4f32_two_step2:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vmovaps %xmm1, %xmm3
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
; BDVER2-LABEL: v4f32_two_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
;
; BTVER2-LABEL: v4f32_two_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [6:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
; HASWELL-LABEL: v4f32_two_step2:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] sched: [6:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
; KNL-LABEL: v4f32_two_step2:
; KNL: # %bb.0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
; SKX-LABEL: v4f32_two_step2:
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm1, %xmm4
; SSE-NEXT: mulps %xmm4, %xmm1
-; SSE-NEXT: movaps {{.*#+}} xmm2 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm2, %xmm3
; SSE-NEXT: subps %xmm1, %xmm3
; SSE-NEXT: mulps %xmm4, %xmm3
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
;
; BTVER2-LABEL: v8f32_one_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-LABEL: v8f32_one_step2:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; KNL-LABEL: v8f32_one_step2:
; KNL: # %bb.0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm0, %xmm2
; SSE-NEXT: mulps %xmm2, %xmm0
-; SSE-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm3, %xmm4
; SSE-NEXT: subps %xmm0, %xmm4
; SSE-NEXT: mulps %xmm2, %xmm4
; SSE-NEXT: subps %xmm1, %xmm3
; SSE-NEXT: mulps %xmm0, %xmm3
; SSE-NEXT: addps %xmm0, %xmm3
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [5,6,7,8]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [5.0E+0,6.0E+0,7.0E+0,8.0E+0]
; SSE-NEXT: mulps %xmm3, %xmm1
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,3,4]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
; SSE-NEXT: mulps %xmm4, %xmm0
; SSE-NEXT: mulps %xmm4, %xmm0
; SSE-NEXT: mulps %xmm3, %xmm1
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
;
; BTVER2-LABEL: v8f32_one_step_2_divs:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-LABEL: v8f32_one_step_2_divs:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; KNL-LABEL: v8f32_one_step_2_divs:
; KNL: # %bb.0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50]
; SSE-NEXT: rcpps %xmm1, %xmm3
; SSE-NEXT: movaps %xmm1, %xmm4
; SSE-NEXT: mulps %xmm3, %xmm4
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm0, %xmm5
; SSE-NEXT: subps %xmm4, %xmm5
; SSE-NEXT: mulps %xmm3, %xmm5
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm2
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm2, %ymm3, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm2
; AVX-RECIP-NEXT: vaddps %ymm2, %ymm1, %ymm1
; FMA-RECIP-LABEL: v8f32_two_step2:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vmovaps %ymm1, %ymm3
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
; BDVER2-LABEL: v8f32_two_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
;
; BTVER2-LABEL: v8f32_two_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00]
; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
; HASWELL-LABEL: v8f32_two_step2:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
; KNL-LABEL: v8f32_two_step2:
; KNL: # %bb.0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
; SKX-LABEL: v8f32_two_step2:
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50]
; SSE-NEXT: movaps %xmm0, %xmm6
; SSE-NEXT: rcpps %xmm3, %xmm2
; SSE-NEXT: mulps %xmm2, %xmm4
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm0, %xmm3
; SSE-NEXT: subps %xmm4, %xmm3
; SSE-NEXT: mulps %xmm2, %xmm3
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm1
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm1, %ymm3, %ymm1
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
; AVX-RECIP-NEXT: vaddps %ymm1, %ymm2, %ymm1
; FMA-RECIP-LABEL: v16f32_one_step2:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
; BDVER2-LABEL: v16f32_one_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [5:2.00]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm0, %ymm0 # sched: [5:0.50]
;
; BTVER2-LABEL: v16f32_one_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
; BTVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [2:2.00]
; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
; HASWELL-LABEL: v16f32_one_step2:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vrcpps %ymm0, %ymm4 # sched: [11:2.00]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2 sched: [5:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm0, %xmm6
; SSE-NEXT: mulps %xmm6, %xmm0
-; SSE-NEXT: movaps {{.*#+}} xmm4 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm4, %xmm5
; SSE-NEXT: subps %xmm0, %xmm5
; SSE-NEXT: mulps %xmm6, %xmm5
; SSE-NEXT: subps %xmm3, %xmm4
; SSE-NEXT: mulps %xmm0, %xmm4
; SSE-NEXT: addps %xmm0, %xmm4
-; SSE-NEXT: movaps {{.*#+}} xmm3 = [13,14,15,16]
+; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.3E+1,1.4E+1,1.5E+1,1.6E+1]
; SSE-NEXT: mulps %xmm4, %xmm3
-; SSE-NEXT: movaps {{.*#+}} xmm2 = [9,10,11,12]
+; SSE-NEXT: movaps {{.*#+}} xmm2 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1]
; SSE-NEXT: mulps %xmm7, %xmm2
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [5,6,7,8]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [5.0E+0,6.0E+0,7.0E+0,8.0E+0]
; SSE-NEXT: mulps %xmm6, %xmm1
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,3,4]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
; SSE-NEXT: mulps %xmm5, %xmm0
; SSE-NEXT: mulps %xmm5, %xmm0
; SSE-NEXT: mulps %xmm6, %xmm1
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm0
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm3, %ymm0
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm2, %ymm0
; FMA-RECIP-LABEL: v16f32_one_step_2_divs:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
; BDVER2-LABEL: v16f32_one_step_2_divs:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00]
;
; BTVER2-LABEL: v16f32_one_step_2_divs:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vrcpps %ymm1, %ymm4 # sched: [7:2.00]
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
; HASWELL-LABEL: v16f32_one_step_2_divs:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 sched: [5:0.50]
; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm4 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
; SSE-NEXT: movaps %xmm0, %xmm4
; SSE-NEXT: rcpps %xmm3, %xmm2
; SSE-NEXT: mulps %xmm2, %xmm3
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm0, %xmm7
; SSE-NEXT: subps %xmm3, %xmm7
; SSE-NEXT: mulps %xmm2, %xmm7
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm3
-; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
+; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm3, %ymm4, %ymm3
; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm3
; AVX-RECIP-NEXT: vaddps %ymm3, %ymm2, %ymm2
; FMA-RECIP-LABEL: v16f32_two_step2:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
-; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
+; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vmovaps %ymm2, %ymm4
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
; BDVER2-LABEL: v16f32_two_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:0.50]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
;
; BTVER2-LABEL: v16f32_two_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [2:2.00]
; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
; HASWELL-LABEL: v16f32_two_step2:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3 sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50]
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
; KNL-LABEL: v16f32_two_step2:
; KNL: # %bb.0:
; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00]
+; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00]
; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [5:0.50]
; SKX-LABEL: v16f32_two_step2:
; SKX: # %bb.0:
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [8:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50]
; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50]
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: jne .LBB37_1
; CHECK-NEXT: # %bb.2:
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = [118.83,34.539999999999999]
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.1883E+2,3.4539999999999999E+1]
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB37_1:
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = [-20.399999999999999,37.68]
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [-2.0399999999999999E+1,3.768E+1]
; CHECK-NEXT: retq
%sel = select i1 %cond, <2 x double> <double -4.0, double 12.0>, <2 x double> <double 23.3, double 11.0>
%bo = fmul <2 x double> %sel, <double 5.1, double 3.14>
define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
; CHECK-LABEL: splat_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [1,1]
+; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0]
; CHECK-NEXT: # xmm1 = mem[0,0]
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
; CHECK-LABEL: splat_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
; CHECK-LABEL: splat_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
+; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
; CHECK-LABEL: splat_v8f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
+; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
; SSE-NEXT: rsqrtps %xmm0, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: mulps %xmm2, %xmm1
-; SSE-NEXT: movaps {{.*#+}} xmm3 = [-0.5,-0.5,-0.5,-0.5]
+; SSE-NEXT: movaps {{.*#+}} xmm3 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
; SSE-NEXT: mulps %xmm1, %xmm3
; SSE-NEXT: mulps %xmm2, %xmm1
; SSE-NEXT: addps {{.*}}(%rip), %xmm1
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtps %xmm0, %xmm1
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm2
-; AVX512-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3,-3,-3,-3]
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
; AVX512-NEXT: vfmadd231ps {{.*#+}} xmm3 = (xmm2 * xmm1) + xmm3
-; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.5,-0.5,-0.5,-0.5]
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
; AVX512-NEXT: vmulps %xmm3, %xmm1, %xmm1
; AVX512-NEXT: vmulps %xmm1, %xmm2, %xmm1
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
; SSE-LABEL: v4f32_no_estimate:
; SSE: # %bb.0:
; SSE-NEXT: sqrtps %xmm0, %xmm1
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: divps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: v4f32_no_estimate:
; AVX1: # %bb.0:
; AVX1-NEXT: vsqrtps %xmm0, %xmm0
-; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1]
+; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX1-NEXT: vdivps %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: v4f32_no_estimate:
; AVX512: # %bb.0:
; AVX512-NEXT: vsqrtps %xmm0, %xmm0
-; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtps %xmm0, %xmm1
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [-3,-3,-3,-3]
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
-; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.5,-0.5,-0.5,-0.5]
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
; AVX512-NEXT: vmulps %xmm0, %xmm2, %xmm0
; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
; SSE: # %bb.0:
; SSE-NEXT: sqrtps %xmm1, %xmm2
; SSE-NEXT: sqrtps %xmm0, %xmm3
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: divps %xmm3, %xmm0
; SSE-NEXT: divps %xmm2, %xmm1
; AVX1-LABEL: v8f32_no_estimate:
; AVX1: # %bb.0:
; AVX1-NEXT: vsqrtps %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX1-NEXT: vdivps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: v8f32_no_estimate:
; AVX512: # %bb.0:
; AVX512-NEXT: vsqrtps %ymm0, %ymm0
-; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
+; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
; SSE-LABEL: v8f32_estimate:
; SSE: # %bb.0:
; SSE-NEXT: rsqrtps %xmm0, %xmm3
-; SSE-NEXT: movaps {{.*#+}} xmm4 = [-0.5,-0.5,-0.5,-0.5]
+; SSE-NEXT: movaps {{.*#+}} xmm4 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
; SSE-NEXT: movaps %xmm3, %xmm2
; SSE-NEXT: mulps %xmm3, %xmm2
; SSE-NEXT: mulps %xmm0, %xmm2
-; SSE-NEXT: movaps {{.*#+}} xmm0 = [-3,-3,-3,-3]
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
; SSE-NEXT: addps %xmm0, %xmm2
; SSE-NEXT: mulps %xmm4, %xmm2
; SSE-NEXT: mulps %xmm3, %xmm2
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtps %ymm0, %ymm1
; AVX512-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [-3,-3,-3,-3,-3,-3,-3,-3]
+; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
-; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5]
+; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
; AVX512-NEXT: vmulps %ymm0, %ymm2, %ymm0
; AVX512-NEXT: vmulps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
; SSE-NEXT: sqrtps %xmm2, %xmm5
; SSE-NEXT: sqrtps %xmm1, %xmm2
; SSE-NEXT: sqrtps %xmm0, %xmm1
-; SSE-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1]
+; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm3, %xmm0
; SSE-NEXT: divps %xmm1, %xmm0
; SSE-NEXT: movaps %xmm3, %xmm1
; AVX1: # %bb.0:
; AVX1-NEXT: vsqrtps %ymm1, %ymm1
; AVX1-NEXT: vsqrtps %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX1-NEXT: vdivps %ymm0, %ymm2, %ymm0
; AVX1-NEXT: vdivps %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
; AVX512-LABEL: v16f32_no_estimate:
; AVX512: # %bb.0:
; AVX512-NEXT: vsqrtps %zmm0, %zmm0
-; AVX512-NEXT: vbroadcastss {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%sqrt = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> %x)
; SSE-NEXT: movaps %xmm1, %xmm4
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: rsqrtps %xmm0, %xmm5
-; SSE-NEXT: movaps {{.*#+}} xmm6 = [-0.5,-0.5,-0.5,-0.5]
+; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
; SSE-NEXT: movaps %xmm5, %xmm0
; SSE-NEXT: mulps %xmm5, %xmm0
; SSE-NEXT: mulps %xmm1, %xmm0
-; SSE-NEXT: movaps {{.*#+}} xmm7 = [-3,-3,-3,-3]
+; SSE-NEXT: movaps {{.*#+}} xmm7 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
; SSE-NEXT: addps %xmm7, %xmm0
; SSE-NEXT: mulps %xmm6, %xmm0
; SSE-NEXT: mulps %xmm5, %xmm0
; AVX1-LABEL: v16f32_estimate:
; AVX1: # %bb.0:
; AVX1-NEXT: vrsqrtps %ymm0, %ymm2
-; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5]
+; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
; AVX1-NEXT: vmulps %ymm2, %ymm2, %ymm4
; AVX1-NEXT: vmulps %ymm4, %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [-3,-3,-3,-3,-3,-3,-3,-3]
+; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
; AVX1-NEXT: vaddps %ymm4, %ymm0, %ymm0
; AVX1-NEXT: vmulps %ymm0, %ymm3, %ymm0
; AVX1-NEXT: vmulps %ymm0, %ymm2, %ymm0
; SSE-LABEL: test12:
; SSE: # %bb.0:
; SSE-NEXT: movapd 0, %xmm0
-; SSE-NEXT: movapd {{.*#+}} xmm1 = [1,1,1,1]
+; SSE-NEXT: movapd {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; AVX512-LABEL: test12:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps 0, %xmm0
-; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3]
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
define void @fallback_broadcast_v4f64_to_v8f64(<4 x double> %a, <8 x double> %b) {
; X32-AVX-LABEL: fallback_broadcast_v4f64_to_v8f64:
; X32-AVX: # %bb.0: # %entry
-; X32-AVX-NEXT: vmovapd {{.*#+}} ymm3 = [1,2,3,4]
+; X32-AVX-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
; X32-AVX-NEXT: vaddpd %ymm3, %ymm0, %ymm0
; X32-AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm2
; X32-AVX-NEXT: vaddpd %ymm3, %ymm1, %ymm1
;
; X32-AVX512-LABEL: fallback_broadcast_v4f64_to_v8f64:
; X32-AVX512: # %bb.0: # %entry
-; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = [1,2,3,4]
+; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
; X32-AVX512-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; X32-AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm2, %zmm2
; X32-AVX512-NEXT: vaddpd %zmm2, %zmm1, %zmm1
;
; X64-AVX-LABEL: fallback_broadcast_v4f64_to_v8f64:
; X64-AVX: # %bb.0: # %entry
-; X64-AVX-NEXT: vmovapd {{.*#+}} ymm3 = [1,2,3,4]
+; X64-AVX-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
; X64-AVX-NEXT: vaddpd %ymm3, %ymm0, %ymm0
; X64-AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm2
; X64-AVX-NEXT: vaddpd %ymm3, %ymm1, %ymm1
;
; X64-AVX512-LABEL: fallback_broadcast_v4f64_to_v8f64:
; X64-AVX512: # %bb.0: # %entry
-; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = [1,2,3,4]
+; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
; X64-AVX512-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm2, %zmm2
; X64-AVX512-NEXT: vaddpd %zmm2, %zmm1, %zmm1
define <4 x float> @foo() {
; X32-LABEL: foo:
; X32: # %bb.0:
-; X32-NEXT: movaps {{.*#+}} xmm0 = [3.22354245,2.29999995,1.20000005,0.100000001]
+; X32-NEXT: movaps {{.*#+}} xmm0 = [3.22354245E+0,2.29999995E+0,1.20000005E+0,1.00000001E-1]
; X32-NEXT: retl
;
; X64-LABEL: foo:
; X64: # %bb.0:
-; X64-NEXT: movaps {{.*#+}} xmm0 = [3.22354245,2.29999995,1.20000005,0.100000001]
+; X64-NEXT: movaps {{.*#+}} xmm0 = [3.22354245E+0,2.29999995E+0,1.20000005E+0,1.00000001E-1]
; X64-NEXT: retq
ret <4 x float> <float 0x4009C9D0A0000000, float 0x4002666660000000, float 0x3FF3333340000000, float 0x3FB99999A0000000>
}
; CHECK: ## %bb.0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [4503599627370496,4503599627370496]
+; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
; CHECK-WIDE-LABEL: cvt_v2u32_v2f32:
; CHECK-WIDE: ## %bb.0:
; CHECK-WIDE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; CHECK-WIDE-NEXT: vmovdqa {{.*#+}} xmm1 = [4503599627370496,4503599627370496]
+; CHECK-WIDE-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
; CHECK-WIDE-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-WIDE-NEXT: vsubpd %xmm1, %xmm0, %xmm0
; CHECK-WIDE-NEXT: vcvtpd2ps %xmm0, %xmm0
define <2 x double> @const_floor_v2f64() {
; SSE41-LABEL: const_floor_v2f64:
; SSE41: ## %bb.0:
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-2,2]
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-2.0E+0,2.0E+0]
; SSE41-NEXT: retq
;
; AVX-LABEL: const_floor_v2f64:
; AVX: ## %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-2,2]
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-2.0E+0,2.0E+0]
; AVX-NEXT: retq
;
; AVX512-LABEL: const_floor_v2f64:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-2,2]
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-2.0E+0,2.0E+0]
; AVX512-NEXT: retq
%t = call <2 x double> @llvm.floor.v2f64(<2 x double> <double -1.5, double 2.5>)
ret <2 x double> %t
define <4 x float> @const_floor_v4f32() {
; SSE41-LABEL: const_floor_v4f32:
; SSE41: ## %bb.0:
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-4,6,-9,2]
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-4.0E+0,6.0E+0,-9.0E+0,2.0E+0]
; SSE41-NEXT: retq
;
; AVX-LABEL: const_floor_v4f32:
; AVX: ## %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-4,6,-9,2]
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-4.0E+0,6.0E+0,-9.0E+0,2.0E+0]
; AVX-NEXT: retq
;
; AVX512-LABEL: const_floor_v4f32:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-4,6,-9,2]
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-4.0E+0,6.0E+0,-9.0E+0,2.0E+0]
; AVX512-NEXT: retq
%t = call <4 x float> @llvm.floor.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
ret <4 x float> %t
define <2 x double> @const_ceil_v2f64() {
; SSE41-LABEL: const_ceil_v2f64:
; SSE41: ## %bb.0:
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1,3]
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.0E+0,3.0E+0]
; SSE41-NEXT: retq
;
; AVX-LABEL: const_ceil_v2f64:
; AVX: ## %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1,3]
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.0E+0,3.0E+0]
; AVX-NEXT: retq
;
; AVX512-LABEL: const_ceil_v2f64:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-1,3]
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-1.0E+0,3.0E+0]
; AVX512-NEXT: retq
%t = call <2 x double> @llvm.ceil.v2f64(<2 x double> <double -1.5, double 2.5>)
ret <2 x double> %t
define <4 x float> @const_ceil_v4f32() {
; SSE41-LABEL: const_ceil_v4f32:
; SSE41: ## %bb.0:
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3,6,-9,3]
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,3.0E+0]
; SSE41-NEXT: retq
;
; AVX-LABEL: const_ceil_v4f32:
; AVX: ## %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3,6,-9,3]
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,3.0E+0]
; AVX-NEXT: retq
;
; AVX512-LABEL: const_ceil_v4f32:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3,6,-9,3]
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,3.0E+0]
; AVX512-NEXT: retq
%t = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
ret <4 x float> %t
define <2 x double> @const_trunc_v2f64() {
; SSE41-LABEL: const_trunc_v2f64:
; SSE41: ## %bb.0:
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1,2]
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.0E+0,2.0E+0]
; SSE41-NEXT: retq
;
; AVX-LABEL: const_trunc_v2f64:
; AVX: ## %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1,2]
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.0E+0,2.0E+0]
; AVX-NEXT: retq
;
; AVX512-LABEL: const_trunc_v2f64:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-1,2]
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-1.0E+0,2.0E+0]
; AVX512-NEXT: retq
%t = call <2 x double> @llvm.trunc.v2f64(<2 x double> <double -1.5, double 2.5>)
ret <2 x double> %t
define <4 x float> @const_trunc_v4f32() {
; SSE41-LABEL: const_trunc_v4f32:
; SSE41: ## %bb.0:
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3,6,-9,2]
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,2.0E+0]
; SSE41-NEXT: retq
;
; AVX-LABEL: const_trunc_v4f32:
; AVX: ## %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3,6,-9,2]
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,2.0E+0]
; AVX-NEXT: retq
;
; AVX512-LABEL: const_trunc_v4f32:
; AVX512: ## %bb.0:
-; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3,6,-9,2]
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3.0E+0,6.0E+0,-9.0E+0,2.0E+0]
; AVX512-NEXT: retq
%t = call <4 x float> @llvm.trunc.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
ret <4 x float> %t
; AVX1-LABEL: fptoui_4f64_to_2i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovapd %xmm0, %xmm0
-; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2147483648,2147483648,2147483648,2147483648]
+; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
; AVX2-LABEL: fptoui_4f64_to_2i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovapd %xmm0, %xmm0
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2147483648,2147483648,2147483648,2147483648]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
;
; AVX1-LABEL: fptoui_4f64_to_4i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2147483648,2147483648,2147483648,2147483648]
+; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
;
; AVX2-LABEL: fptoui_4f64_to_4i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2147483648,2147483648,2147483648,2147483648]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
define <2 x double> @fpext_fromconst() {
; X32-SSE-LABEL: fpext_fromconst:
; X32-SSE: # %bb.0: # %entry
-; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,-2]
+; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,-2.0E+0]
; X32-SSE-NEXT: # encoding: [0x0f,0x28,0x05,A,A,A,A]
; X32-SSE-NEXT: # fixup A - offset: 3, value: {{\.LCPI.*}}, kind: FK_Data_4
; X32-SSE-NEXT: retl # encoding: [0xc3]
;
; X32-AVX-LABEL: fpext_fromconst:
; X32-AVX: # %bb.0: # %entry
-; X32-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,-2]
+; X32-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.0E+0,-2.0E+0]
; X32-AVX-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
; X32-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X32-AVX-NEXT: retl # encoding: [0xc3]
;
; X32-AVX512VL-LABEL: fpext_fromconst:
; X32-AVX512VL: # %bb.0: # %entry
-; X32-AVX512VL-NEXT: vmovaps {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [1,-2]
+; X32-AVX512VL-NEXT: vmovaps {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [1.0E+0,-2.0E+0]
; X32-AVX512VL-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
; X32-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: fpext_fromconst:
; X64-SSE: # %bb.0: # %entry
-; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,-2]
+; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,-2.0E+0]
; X64-SSE-NEXT: # encoding: [0x0f,0x28,0x05,A,A,A,A]
; X64-SSE-NEXT: # fixup A - offset: 3, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX-LABEL: fpext_fromconst:
; X64-AVX: # %bb.0: # %entry
-; X64-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,-2]
+; X64-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.0E+0,-2.0E+0]
; X64-AVX-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: fpext_fromconst:
; X64-AVX512VL: # %bb.0: # %entry
-; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [1,-2]
+; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [1.0E+0,-2.0E+0]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
; AVX2: # %bb.0:
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65536,65536,65536,65536]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
-; SSE2-NEXT: movapd {{.*#+}} xmm2 = [65536,65536]
+; SSE2-NEXT: movapd {{.*#+}} xmm2 = [6.5536E+4,6.5536E+4]
; SSE2-NEXT: mulpd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrld $16, %xmm1
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
-; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65536,65536]
+; SSE41-NEXT: movapd {{.*#+}} xmm2 = [6.5536E+4,6.5536E+4]
; SSE41-NEXT: mulpd %xmm2, %xmm1
; SSE41-NEXT: pxor %xmm3, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
; AVX2: # %bb.0:
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65536,65536,65536,65536]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
-; SSE2-NEXT: movapd {{.*#+}} xmm2 = [65536,65536]
+; SSE2-NEXT: movapd {{.*#+}} xmm2 = [6.5536E+4,6.5536E+4]
; SSE2-NEXT: mulpd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrld $16, %xmm1
; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1
-; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65536,65536]
+; SSE41-NEXT: movapd {{.*#+}} xmm2 = [6.5536E+4,6.5536E+4]
; SSE41-NEXT: mulpd %xmm2, %xmm1
; SSE41-NEXT: pxor %xmm3, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65536,65536,65536,65536]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
define <2 x double> @test5() nounwind uwtable readnone noinline {
; X32-LABEL: test5:
; X32: ## %bb.0: ## %entry
-; X32-NEXT: movaps {{.*#+}} xmm0 = [128,123.321]
+; X32-NEXT: movaps {{.*#+}} xmm0 = [1.28E+2,1.23321E+2]
; X32-NEXT: retl
;
; X64-LABEL: test5:
; X64: ## %bb.0: ## %entry
-; X64-NEXT: movaps {{.*#+}} xmm0 = [128,123.321]
+; X64-NEXT: movaps {{.*#+}} xmm0 = [1.28E+2,1.23321E+2]
; X64-NEXT: retq
;
; X32_AVX-LABEL: test5:
; X32_AVX: ## %bb.0: ## %entry
-; X32_AVX-NEXT: vmovaps {{.*#+}} xmm0 = [128,123.321]
+; X32_AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.28E+2,1.23321E+2]
; X32_AVX-NEXT: retl
;
; X64_AVX-LABEL: test5:
; X64_AVX: ## %bb.0: ## %entry
-; X64_AVX-NEXT: vmovaps {{.*#+}} xmm0 = [128,123.321]
+; X64_AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.28E+2,1.23321E+2]
; X64_AVX-NEXT: retq
entry:
%0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psrld $16, %xmm2
; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2
-; SSE2-NEXT: movaps {{.*#+}} xmm3 = [65536,65536,65536,65536]
+; SSE2-NEXT: movaps {{.*#+}} xmm3 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
; SSE2-NEXT: mulps %xmm3, %xmm2
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [65535,65535,65535,65535]
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrld $16, %xmm2
; SSE41-NEXT: cvtdq2ps %xmm2, %xmm2
-; SSE41-NEXT: movaps {{.*#+}} xmm3 = [65536,65536,65536,65536]
+; SSE41-NEXT: movaps {{.*#+}} xmm3 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4]
; SSE41-NEXT: mulps %xmm3, %xmm2
; SSE41-NEXT: pxor %xmm4, %xmm4
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
define <2 x double> @constrained_vector_fdiv_v2f64() {
; NO-FMA-LABEL: constrained_vector_fdiv_v2f64:
; NO-FMA: # %bb.0: # %entry
-; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1,2]
+; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
; NO-FMA-NEXT: divpd {{.*}}(%rip), %xmm0
; NO-FMA-NEXT: retq
;
; HAS-FMA-LABEL: constrained_vector_fdiv_v2f64:
; HAS-FMA: # %bb.0: # %entry
-; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1,2]
+; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %xmm0, %xmm0
; HAS-FMA-NEXT: retq
entry:
define <3 x double> @constrained_vector_fdiv_v3f64() {
; NO-FMA-LABEL: constrained_vector_fdiv_v3f64:
; NO-FMA: # %bb.0: # %entry
-; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1,2]
+; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
; NO-FMA-NEXT: divpd {{.*}}(%rip), %xmm0
; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; NO-FMA-NEXT: divsd {{.*}}(%rip), %xmm1
; HAS-FMA: # %bb.0: # %entry
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; HAS-FMA-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0
-; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1,2]
+; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0]
; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %xmm1, %xmm1
; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; HAS-FMA-NEXT: retq
define <4 x double> @constrained_vector_fdiv_v4f64() {
; NO-FMA-LABEL: constrained_vector_fdiv_v4f64:
; NO-FMA: # %bb.0: # %entry
-; NO-FMA-NEXT: movapd {{.*#+}} xmm2 = [10,10]
-; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1,2]
+; NO-FMA-NEXT: movapd {{.*#+}} xmm2 = [1.0E+1,1.0E+1]
+; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
; NO-FMA-NEXT: divpd %xmm2, %xmm0
-; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [3,4]
+; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [3.0E+0,4.0E+0]
; NO-FMA-NEXT: divpd %xmm2, %xmm1
; NO-FMA-NEXT: retq
;
; HAS-FMA-LABEL: constrained_vector_fdiv_v4f64:
; HAS-FMA: # %bb.0: # %entry
-; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1,2,3,4]
+; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %ymm0, %ymm0
; HAS-FMA-NEXT: retq
entry:
; NO-FMA-LABEL: constrained_vector_fmul_v4f64:
; NO-FMA: # %bb.0: # %entry
; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
-; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [2,3]
+; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [2.0E+0,3.0E+0]
; NO-FMA-NEXT: mulpd %xmm1, %xmm0
; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm1
; NO-FMA-NEXT: retq
; NO-FMA-LABEL: constrained_vector_fadd_v4f64:
; NO-FMA: # %bb.0: # %entry
; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
-; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1,0.10000000000000001]
+; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0000000000000001E-1]
; NO-FMA-NEXT: addpd %xmm1, %xmm0
; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm1
; NO-FMA-NEXT: retq
;
; HAS-FMA-LABEL: constrained_vector_fma_v2f64:
; HAS-FMA: # %bb.0: # %entry
-; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.5,0.5]
-; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [3.5,2.5]
+; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.5E+0,5.0E-1]
+; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [3.5E+0,2.5E+0]
; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
; HAS-FMA-NEXT: retq
entry:
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; HAS-FMA-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
-; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [2.5,1.5]
-; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm2 = [5.5,4.5]
+; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [2.5E+0,1.5E+0]
+; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm2 = [5.5E+0,4.5E+0]
; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} xmm2 = (xmm0 * xmm2) + mem
; HAS-FMA-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
; HAS-FMA-NEXT: retq
;
; HAS-FMA-LABEL: constrained_vector_fma_v4f64:
; HAS-FMA: # %bb.0: # %entry
-; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm1 = [3.5,2.5,1.5,0.5]
-; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [7.5,6.5,5.5,4.5]
+; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm1 = [3.5E+0,2.5E+0,1.5E+0,5.0E-1]
+; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [7.5E+0,6.5E+0,5.5E+0,4.5E+0]
; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem
; HAS-FMA-NEXT: retq
entry:
;
; HAS-FMA-LABEL: constrained_vector_fma_v4f32:
; HAS-FMA: # %bb.0: # %entry
-; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm1 = [3.5,2.5,1.5,0.5]
-; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm0 = [7.5,6.5,5.5,4.5]
+; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm1 = [3.5E+0,2.5E+0,1.5E+0,5.0E-1]
+; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm0 = [7.5E+0,6.5E+0,5.5E+0,4.5E+0]
; HAS-FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
; HAS-FMA-NEXT: retq
entry:
;
; HAS-FMA-LABEL: constrained_vector_fma_v8f32:
; HAS-FMA: # %bb.0: # %entry
-; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm1 = [3.5,2.5,1.5,0.5,7.5,6.5,5.5,4.5]
-; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm0 = [7.5,6.5,5.5,4.5,11.5,10.5,9.5,8.5]
+; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm1 = [3.5E+0,2.5E+0,1.5E+0,5.0E-1,7.5E+0,6.5E+0,5.5E+0,4.5E+0]
+; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm0 = [7.5E+0,6.5E+0,5.5E+0,4.5E+0,1.15E+1,1.05E+1,9.5E+0,8.5E+0]
; HAS-FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem
; HAS-FMA-NEXT: retq
entry:
define <2 x double> @constant_fold_vpermilvar_pd() {
; X32-LABEL: constant_fold_vpermilvar_pd:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2,1]
+; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2.0E+0,1.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermilvar_pd:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} xmm0 = [2,1]
+; X64-NEXT: vmovaps {{.*#+}} xmm0 = [2.0E+0,1.0E+0]
; X64-NEXT: retq
%1 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> <double 1.0, double 2.0>, <2 x i64> <i64 2, i64 0>)
ret <2 x double> %1
define <4 x double> @constant_fold_vpermilvar_pd_256() {
; X32-LABEL: constant_fold_vpermilvar_pd_256:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} ymm0 = [2,1,3,4]
+; X32-NEXT: vmovaps {{.*#+}} ymm0 = [2.0E+0,1.0E+0,3.0E+0,4.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermilvar_pd_256:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} ymm0 = [2,1,3,4]
+; X64-NEXT: vmovaps {{.*#+}} ymm0 = [2.0E+0,1.0E+0,3.0E+0,4.0E+0]
; X64-NEXT: retq
%1 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
ret <4 x double> %1
define <4 x float> @constant_fold_vpermilvar_ps() {
; X32-LABEL: constant_fold_vpermilvar_ps:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} xmm0 = [4,1,3,2]
+; X32-NEXT: vmovaps {{.*#+}} xmm0 = [4.0E+0,1.0E+0,3.0E+0,2.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermilvar_ps:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} xmm0 = [4,1,3,2]
+; X64-NEXT: vmovaps {{.*#+}} xmm0 = [4.0E+0,1.0E+0,3.0E+0,2.0E+0]
; X64-NEXT: retq
%1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x i32> <i32 3, i32 0, i32 2, i32 1>)
ret <4 x float> %1
define <8 x float> @constant_fold_vpermilvar_ps_256() {
; X32-LABEL: constant_fold_vpermilvar_ps_256:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} ymm0 = [1,1,3,2,5,6,6,6]
+; X32-NEXT: vmovaps {{.*#+}} ymm0 = [1.0E+0,1.0E+0,3.0E+0,2.0E+0,5.0E+0,6.0E+0,6.0E+0,6.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermilvar_ps_256:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} ymm0 = [1,1,3,2,5,6,6,6]
+; X64-NEXT: vmovaps {{.*#+}} ymm0 = [1.0E+0,1.0E+0,3.0E+0,2.0E+0,5.0E+0,6.0E+0,6.0E+0,6.0E+0]
; X64-NEXT: retq
%1 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x i32> <i32 4, i32 0, i32 2, i32 1, i32 0, i32 1, i32 1, i32 1>)
ret <8 x float> %1
define <8 x float> @constant_fold_permps() {
; CHECK-LABEL: constant_fold_permps:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,3,2,8,2,6,1]
+; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5.0E+0,7.0E+0,3.0E+0,2.0E+0,8.0E+0,2.0E+0,6.0E+0,1.0E+0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x i32> <i32 4, i32 6, i32 2, i32 1, i32 7, i32 1, i32 5, i32 0>)
ret <8 x float> %1
define <2 x double> @constant_fold_vpermil2pd() {
; X32-LABEL: constant_fold_vpermil2pd:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-2,2]
+; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-2.0E+0,2.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermil2pd:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-2,2]
+; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-2.0E+0,2.0E+0]
; X64-NEXT: retq
%1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> <double 1.0, double 2.0>, <2 x double> <double -2.0, double -1.0>, <2 x i64> <i64 4, i64 2>, i8 2)
ret <2 x double> %1
define <4 x double> @constant_fold_vpermil2pd_256() {
; X32-LABEL: constant_fold_vpermil2pd_256:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} ymm0 = [-4,0,4,3]
+; X32-NEXT: vmovaps {{.*#+}} ymm0 = [-4.0E+0,0.0E+0,4.0E+0,3.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermil2pd_256:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} ymm0 = [-4,0,4,3]
+; X64-NEXT: vmovaps {{.*#+}} ymm0 = [-4.0E+0,0.0E+0,4.0E+0,3.0E+0]
; X64-NEXT: retq
%1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, <4 x double> <double -4.0, double -3.0, double -2.0, double -1.0>, <4 x i64> <i64 4, i64 8, i64 2, i64 0>, i8 2)
ret <4 x double> %1
define <4 x float> @constant_fold_vpermil2ps() {
; X32-LABEL: constant_fold_vpermil2ps:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-4,1,3,0]
+; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-4.0E+0,1.0E+0,3.0E+0,0.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermil2ps:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-4,1,3,0]
+; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-4.0E+0,1.0E+0,3.0E+0,0.0E+0]
; X64-NEXT: retq
%1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x float> <float -4.0, float -3.0, float -2.0, float -1.0>, <4 x i32> <i32 4, i32 0, i32 2, i32 8>, i8 2)
ret <4 x float> %1
define <8 x float> @constant_fold_vpermil2ps_256() {
; X32-LABEL: constant_fold_vpermil2ps_256:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} ymm0 = [-8,1,3,0,5,0,5,7]
+; X32-NEXT: vmovaps {{.*#+}} ymm0 = [-8.0E+0,1.0E+0,3.0E+0,0.0E+0,5.0E+0,0.0E+0,5.0E+0,7.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermil2ps_256:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} ymm0 = [-8,1,3,0,5,0,5,7]
+; X64-NEXT: vmovaps {{.*#+}} ymm0 = [-8.0E+0,1.0E+0,3.0E+0,0.0E+0,5.0E+0,0.0E+0,5.0E+0,7.0E+0]
; X64-NEXT: retq
%1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x float> <float -8.0, float -7.0, float -6.0, float -5.0, float -4.0, float -3.0, float -2.0, float -1.0>, <8 x i32> <i32 4, i32 0, i32 2, i32 8, i32 0, i32 8, i32 0, i32 2>, i8 2)
ret <8 x float> %1
define <4 x float> @combine_constant_insertion_v4f32(float %f) {
; SSE2-LABEL: combine_constant_insertion_v4f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: movaps {{.*#+}} xmm1 = <u,4,5,3>
+; SSE2-NEXT: movaps {{.*#+}} xmm1 = <u,4.0E+0,5.0E+0,3.0E+0>
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_constant_insertion_v4f32:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movaps {{.*#+}} xmm1 = <u,4,5,3>
+; SSSE3-NEXT: movaps {{.*#+}} xmm1 = <u,4.0E+0,5.0E+0,3.0E+0>
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: PR30264:
; SSE41: # %bb.0:
-; SSE41-NEXT: movaps {{.*#+}} xmm1 = <u,u,4,1>
+; SSE41-NEXT: movaps {{.*#+}} xmm1 = <u,u,4.0E+0,1.0E+0>
; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm0[0],zero,xmm1[2,3]
; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: PR30264:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} xmm1 = <u,u,4,1>
+; AVX-NEXT: vmovaps {{.*#+}} xmm1 = <u,u,4.0E+0,1.0E+0>
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2,3]
; AVX-NEXT: retq
%shuf1 = shufflevector <4 x float> %x, <4 x float> <float undef, float 0.0, float undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: movq (%rdi,%rsi,8), %rax
-; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [0.5,0.5,0.5,0.5]
+; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [5.0E-1,5.0E-1,5.0E-1,5.0E-1]
; AVX1-NEXT: vblendvpd %ymm0, {{.*}}(%rip), %ymm1, %ymm0
; AVX1-NEXT: vmovupd %ymm0, (%rax)
; AVX1-NEXT: vzeroupper
; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX2-NEXT: movq (%rdi,%rsi,8), %rax
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [-0.5,-0.5,-0.5,-0.5]
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0.5,0.5,0.5,0.5]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [5.0E-1,5.0E-1,5.0E-1,5.0E-1]
; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
; AVX2-NEXT: vmovupd %ymm0, (%rax)
; AVX2-NEXT: vzeroupper
; CHECK-NEXT: movl $1073741824, {{[0-9]+}}(%esp) # imm = 0x40000000
; CHECK-NEXT: movl $1065353216, {{[0-9]+}}(%esp) # imm = 0x3F800000
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = <1976.04004,1976.04004,1976.04004,u>
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = <1.97604004E+3,1.97604004E+3,1.97604004E+3,u>
; CHECK-NEXT: jmp .LBB0_1
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_2: # %forbody
; CHECK-NEXT: .long 1088421888 ## float 7
; CHECK-LABEL: foo2:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4,5,6,7]
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0]
; CHECK-NEXT: movaps %xmm0, (%rdi)
; CHECK-NEXT: retq
%val = uitofp <4 x i32> <i32 4, i32 5, i32 6, i32 7> to <4 x float>
; CHECK-NEXT: .long 1132396544 ## float 255
; CHECK-LABEL: foo4:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,127,128,255]
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,1.27E+2,1.28E+2,2.55E+2]
; CHECK-NEXT: movaps %xmm0, (%rdi)
; CHECK-NEXT: retq
%val = uitofp <4 x i8> <i8 1, i8 127, i8 -128, i8 -1> to <4 x float>