ret void
}
+define arm_aapcs_vfpcc void @masked_v4i16(<4 x i16> *%dest, <4 x i32> %a) {
+; CHECK-LE-LABEL: masked_v4i16:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
+; CHECK-LE-NEXT: vstrht.32 q0, [r0]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: masked_v4i16:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
+; CHECK-BE-NEXT: vstrht.32 q1, [r0]
+; CHECK-BE-NEXT: bx lr
+entry:
+ %c = icmp sgt <4 x i32> %a, zeroinitializer
+ %trunc = trunc <4 x i32> %a to <4 x i16>
+ call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %trunc, <4 x i16>* %dest, i32 2, <4 x i1> %c)
+ ret void
+}
+
+define arm_aapcs_vfpcc void @masked_v4i8(<4 x i8> *%dest, <4 x i32> %a) {
+; CHECK-LE-LABEL: masked_v4i8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
+; CHECK-LE-NEXT: vstrbt.32 q0, [r0]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: masked_v4i8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
+; CHECK-BE-NEXT: vstrbt.32 q1, [r0]
+; CHECK-BE-NEXT: bx lr
+entry:
+ %c = icmp sgt <4 x i32> %a, zeroinitializer
+ %trunc = trunc <4 x i32> %a to <4 x i8>
+ call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %trunc, <4 x i8>* %dest, i32 1, <4 x i1> %c)
+ ret void
+}
+
+define arm_aapcs_vfpcc void @masked_v8i8(<8 x i8> *%dest, <8 x i16> %a) {
+; CHECK-LE-LABEL: masked_v8i8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
+; CHECK-LE-NEXT: vstrbt.16 q0, [r0]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: masked_v8i8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vrev64.16 q1, q0
+; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
+; CHECK-BE-NEXT: vstrbt.16 q1, [r0]
+; CHECK-BE-NEXT: bx lr
+entry:
+ %c = icmp sgt <8 x i16> %a, zeroinitializer
+ %trunc = trunc <8 x i16> %a to <8 x i8>
+ call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %trunc, <8 x i8>* %dest, i32 1, <8 x i1> %c)
+ ret void
+}
+
+define arm_aapcs_vfpcc void @masked_v4i16_align1(<4 x i16> *%dest, <4 x i32> %a) {
+; CHECK-LE-LABEL: masked_v4i16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #4
+; CHECK-LE-NEXT: sub sp, #4
+; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
+; CHECK-LE-NEXT: vmrs r2, p0
+; CHECK-LE-NEXT: and r1, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r1, #0
+; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: itt ne
+; CHECK-LE-NEXT: vmovne r2, s0
+; CHECK-LE-NEXT: strhne r2, [r0]
+; CHECK-LE-NEXT: lsls r2, r1, #30
+; CHECK-LE-NEXT: itt mi
+; CHECK-LE-NEXT: vmovmi r2, s1
+; CHECK-LE-NEXT: strhmi r2, [r0, #2]
+; CHECK-LE-NEXT: lsls r2, r1, #29
+; CHECK-LE-NEXT: itt mi
+; CHECK-LE-NEXT: vmovmi r2, s2
+; CHECK-LE-NEXT: strhmi r2, [r0, #4]
+; CHECK-LE-NEXT: lsls r1, r1, #28
+; CHECK-LE-NEXT: itt mi
+; CHECK-LE-NEXT: vmovmi r1, s3
+; CHECK-LE-NEXT: strhmi r1, [r0, #6]
+; CHECK-LE-NEXT: add sp, #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: masked_v4i16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #4
+; CHECK-BE-NEXT: sub sp, #4
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
+; CHECK-BE-NEXT: vmrs r2, p0
+; CHECK-BE-NEXT: and r1, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r1, #0
+; CHECK-BE-NEXT: movs r1, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
+; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: itt ne
+; CHECK-BE-NEXT: vmovne r2, s4
+; CHECK-BE-NEXT: strhne r2, [r0]
+; CHECK-BE-NEXT: lsls r2, r1, #30
+; CHECK-BE-NEXT: itt mi
+; CHECK-BE-NEXT: vmovmi r2, s5
+; CHECK-BE-NEXT: strhmi r2, [r0, #2]
+; CHECK-BE-NEXT: lsls r2, r1, #29
+; CHECK-BE-NEXT: itt mi
+; CHECK-BE-NEXT: vmovmi r2, s6
+; CHECK-BE-NEXT: strhmi r2, [r0, #4]
+; CHECK-BE-NEXT: lsls r1, r1, #28
+; CHECK-BE-NEXT: itt mi
+; CHECK-BE-NEXT: vmovmi r1, s7
+; CHECK-BE-NEXT: strhmi r1, [r0, #6]
+; CHECK-BE-NEXT: add sp, #4
+; CHECK-BE-NEXT: bx lr
+entry:
+ %c = icmp sgt <4 x i32> %a, zeroinitializer
+ %trunc = trunc <4 x i32> %a to <4 x i16>
+ call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %trunc, <4 x i16>* %dest, i32 1, <4 x i1> %c)
+ ret void
+}
+
+define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> %a) {
+; CHECK-LE-LABEL: masked_v4f16_align4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #4
+; CHECK-LE-NEXT: sub sp, #4
+; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
+; CHECK-LE-NEXT: vcmp.f32 s0, #0
+; CHECK-LE-NEXT: vmov r1, s4
+; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s1
+; CHECK-LE-NEXT: vmov r2, s4
+; CHECK-LE-NEXT: vmov.16 q1[0], r1
+; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s2
+; CHECK-LE-NEXT: vmov.16 q1[1], r2
+; CHECK-LE-NEXT: vmov r1, s8
+; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s3
+; CHECK-LE-NEXT: vmov.16 q1[2], r1
+; CHECK-LE-NEXT: vmov r1, s8
+; CHECK-LE-NEXT: vmov.16 q1[3], r1
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: mov.w r1, #0
+; CHECK-LE-NEXT: vcmp.f32 s1, #0
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r1, #1
+; CHECK-LE-NEXT: cmp r1, #0
+; CHECK-LE-NEXT: cset r1, ne
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: and r1, r1, #1
+; CHECK-LE-NEXT: vcmp.f32 s2, #0
+; CHECK-LE-NEXT: rsb.w r3, r1, #0
+; CHECK-LE-NEXT: mov.w r1, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: mov.w r3, #0
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r3, #1
+; CHECK-LE-NEXT: cmp r3, #0
+; CHECK-LE-NEXT: cset r3, ne
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: and r3, r3, #1
+; CHECK-LE-NEXT: vcmp.f32 s3, #0
+; CHECK-LE-NEXT: rsb.w r3, r3, #0
+; CHECK-LE-NEXT: mov.w r2, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: mov.w r3, #0
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r3, #1
+; CHECK-LE-NEXT: cmp r3, #0
+; CHECK-LE-NEXT: cset r3, ne
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r2, #1
+; CHECK-LE-NEXT: cmp r2, #0
+; CHECK-LE-NEXT: and r3, r3, #1
+; CHECK-LE-NEXT: cset r2, ne
+; CHECK-LE-NEXT: and r2, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: bne .LBB25_5
+; CHECK-LE-NEXT: @ %bb.1: @ %else
+; CHECK-LE-NEXT: lsls r2, r1, #30
+; CHECK-LE-NEXT: bmi .LBB25_6
+; CHECK-LE-NEXT: .LBB25_2: @ %else2
+; CHECK-LE-NEXT: lsls r2, r1, #29
+; CHECK-LE-NEXT: bmi .LBB25_7
+; CHECK-LE-NEXT: .LBB25_3: @ %else4
+; CHECK-LE-NEXT: lsls r1, r1, #28
+; CHECK-LE-NEXT: bmi .LBB25_8
+; CHECK-LE-NEXT: .LBB25_4: @ %else6
+; CHECK-LE-NEXT: add sp, #4
+; CHECK-LE-NEXT: bx lr
+; CHECK-LE-NEXT: .LBB25_5: @ %cond.store
+; CHECK-LE-NEXT: vstr.16 s4, [r0]
+; CHECK-LE-NEXT: lsls r2, r1, #30
+; CHECK-LE-NEXT: bpl .LBB25_2
+; CHECK-LE-NEXT: .LBB25_6: @ %cond.store1
+; CHECK-LE-NEXT: vmovx.f16 s0, s4
+; CHECK-LE-NEXT: vstr.16 s0, [r0, #2]
+; CHECK-LE-NEXT: lsls r2, r1, #29
+; CHECK-LE-NEXT: bpl .LBB25_3
+; CHECK-LE-NEXT: .LBB25_7: @ %cond.store3
+; CHECK-LE-NEXT: vstr.16 s5, [r0, #4]
+; CHECK-LE-NEXT: lsls r1, r1, #28
+; CHECK-LE-NEXT: bpl .LBB25_4
+; CHECK-LE-NEXT: .LBB25_8: @ %cond.store5
+; CHECK-LE-NEXT: vmovx.f16 s0, s5
+; CHECK-LE-NEXT: vstr.16 s0, [r0, #6]
+; CHECK-LE-NEXT: add sp, #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: masked_v4f16_align4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #4
+; CHECK-BE-NEXT: sub sp, #4
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
+; CHECK-BE-NEXT: vcmp.f32 s4, #0
+; CHECK-BE-NEXT: vmov r1, s0
+; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s5
+; CHECK-BE-NEXT: vmov r2, s0
+; CHECK-BE-NEXT: vmov.16 q0[0], r1
+; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s6
+; CHECK-BE-NEXT: vmov.16 q0[1], r2
+; CHECK-BE-NEXT: vmov r1, s8
+; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s7
+; CHECK-BE-NEXT: vmov.16 q0[2], r1
+; CHECK-BE-NEXT: vmov r1, s8
+; CHECK-BE-NEXT: vmov.16 q0[3], r1
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: mov.w r1, #0
+; CHECK-BE-NEXT: vcmp.f32 s5, #0
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r1, #1
+; CHECK-BE-NEXT: cmp r1, #0
+; CHECK-BE-NEXT: cset r1, ne
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: and r1, r1, #1
+; CHECK-BE-NEXT: vcmp.f32 s6, #0
+; CHECK-BE-NEXT: rsb.w r3, r1, #0
+; CHECK-BE-NEXT: mov.w r1, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: mov.w r3, #0
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r3, #1
+; CHECK-BE-NEXT: cmp r3, #0
+; CHECK-BE-NEXT: cset r3, ne
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: and r3, r3, #1
+; CHECK-BE-NEXT: vcmp.f32 s7, #0
+; CHECK-BE-NEXT: rsb.w r3, r3, #0
+; CHECK-BE-NEXT: mov.w r2, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: mov.w r3, #0
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r3, #1
+; CHECK-BE-NEXT: cmp r3, #0
+; CHECK-BE-NEXT: cset r3, ne
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r2, #1
+; CHECK-BE-NEXT: cmp r2, #0
+; CHECK-BE-NEXT: and r3, r3, #1
+; CHECK-BE-NEXT: cset r2, ne
+; CHECK-BE-NEXT: and r2, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: bne .LBB25_5
+; CHECK-BE-NEXT: @ %bb.1: @ %else
+; CHECK-BE-NEXT: lsls r2, r1, #30
+; CHECK-BE-NEXT: bmi .LBB25_6
+; CHECK-BE-NEXT: .LBB25_2: @ %else2
+; CHECK-BE-NEXT: lsls r2, r1, #29
+; CHECK-BE-NEXT: bmi .LBB25_7
+; CHECK-BE-NEXT: .LBB25_3: @ %else4
+; CHECK-BE-NEXT: lsls r1, r1, #28
+; CHECK-BE-NEXT: bmi .LBB25_8
+; CHECK-BE-NEXT: .LBB25_4: @ %else6
+; CHECK-BE-NEXT: add sp, #4
+; CHECK-BE-NEXT: bx lr
+; CHECK-BE-NEXT: .LBB25_5: @ %cond.store
+; CHECK-BE-NEXT: vstr.16 s0, [r0]
+; CHECK-BE-NEXT: lsls r2, r1, #30
+; CHECK-BE-NEXT: bpl .LBB25_2
+; CHECK-BE-NEXT: .LBB25_6: @ %cond.store1
+; CHECK-BE-NEXT: vmovx.f16 s4, s0
+; CHECK-BE-NEXT: vstr.16 s4, [r0, #2]
+; CHECK-BE-NEXT: lsls r2, r1, #29
+; CHECK-BE-NEXT: bpl .LBB25_3
+; CHECK-BE-NEXT: .LBB25_7: @ %cond.store3
+; CHECK-BE-NEXT: vstr.16 s1, [r0, #4]
+; CHECK-BE-NEXT: lsls r1, r1, #28
+; CHECK-BE-NEXT: bpl .LBB25_4
+; CHECK-BE-NEXT: .LBB25_8: @ %cond.store5
+; CHECK-BE-NEXT: vmovx.f16 s0, s1
+; CHECK-BE-NEXT: vstr.16 s0, [r0, #6]
+; CHECK-BE-NEXT: add sp, #4
+; CHECK-BE-NEXT: bx lr
+entry:
+ %c = fcmp ogt <4 x float> %a, zeroinitializer
+ %trunc = fptrunc <4 x float> %a to <4 x half>
+ call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %trunc, <4 x half>* %dest, i32 4, <4 x i1> %c)
+ ret void
+}
+
+define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> %a) {
+; CHECK-LE-LABEL: masked_v4f16_align2:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #4
+; CHECK-LE-NEXT: sub sp, #4
+; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
+; CHECK-LE-NEXT: vcmp.f32 s0, #0
+; CHECK-LE-NEXT: vmov r1, s4
+; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s1
+; CHECK-LE-NEXT: vmov r2, s4
+; CHECK-LE-NEXT: vmov.16 q1[0], r1
+; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s2
+; CHECK-LE-NEXT: vmov.16 q1[1], r2
+; CHECK-LE-NEXT: vmov r1, s8
+; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s3
+; CHECK-LE-NEXT: vmov.16 q1[2], r1
+; CHECK-LE-NEXT: vmov r1, s8
+; CHECK-LE-NEXT: vmov.16 q1[3], r1
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: mov.w r1, #0
+; CHECK-LE-NEXT: vcmp.f32 s1, #0
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r1, #1
+; CHECK-LE-NEXT: cmp r1, #0
+; CHECK-LE-NEXT: cset r1, ne
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: and r1, r1, #1
+; CHECK-LE-NEXT: vcmp.f32 s2, #0
+; CHECK-LE-NEXT: rsb.w r3, r1, #0
+; CHECK-LE-NEXT: mov.w r1, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: mov.w r3, #0
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r3, #1
+; CHECK-LE-NEXT: cmp r3, #0
+; CHECK-LE-NEXT: cset r3, ne
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: and r3, r3, #1
+; CHECK-LE-NEXT: vcmp.f32 s3, #0
+; CHECK-LE-NEXT: rsb.w r3, r3, #0
+; CHECK-LE-NEXT: mov.w r2, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: mov.w r3, #0
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r3, #1
+; CHECK-LE-NEXT: cmp r3, #0
+; CHECK-LE-NEXT: cset r3, ne
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r2, #1
+; CHECK-LE-NEXT: cmp r2, #0
+; CHECK-LE-NEXT: and r3, r3, #1
+; CHECK-LE-NEXT: cset r2, ne
+; CHECK-LE-NEXT: and r2, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: bne .LBB26_5
+; CHECK-LE-NEXT: @ %bb.1: @ %else
+; CHECK-LE-NEXT: lsls r2, r1, #30
+; CHECK-LE-NEXT: bmi .LBB26_6
+; CHECK-LE-NEXT: .LBB26_2: @ %else2
+; CHECK-LE-NEXT: lsls r2, r1, #29
+; CHECK-LE-NEXT: bmi .LBB26_7
+; CHECK-LE-NEXT: .LBB26_3: @ %else4
+; CHECK-LE-NEXT: lsls r1, r1, #28
+; CHECK-LE-NEXT: bmi .LBB26_8
+; CHECK-LE-NEXT: .LBB26_4: @ %else6
+; CHECK-LE-NEXT: add sp, #4
+; CHECK-LE-NEXT: bx lr
+; CHECK-LE-NEXT: .LBB26_5: @ %cond.store
+; CHECK-LE-NEXT: vstr.16 s4, [r0]
+; CHECK-LE-NEXT: lsls r2, r1, #30
+; CHECK-LE-NEXT: bpl .LBB26_2
+; CHECK-LE-NEXT: .LBB26_6: @ %cond.store1
+; CHECK-LE-NEXT: vmovx.f16 s0, s4
+; CHECK-LE-NEXT: vstr.16 s0, [r0, #2]
+; CHECK-LE-NEXT: lsls r2, r1, #29
+; CHECK-LE-NEXT: bpl .LBB26_3
+; CHECK-LE-NEXT: .LBB26_7: @ %cond.store3
+; CHECK-LE-NEXT: vstr.16 s5, [r0, #4]
+; CHECK-LE-NEXT: lsls r1, r1, #28
+; CHECK-LE-NEXT: bpl .LBB26_4
+; CHECK-LE-NEXT: .LBB26_8: @ %cond.store5
+; CHECK-LE-NEXT: vmovx.f16 s0, s5
+; CHECK-LE-NEXT: vstr.16 s0, [r0, #6]
+; CHECK-LE-NEXT: add sp, #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: masked_v4f16_align2:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #4
+; CHECK-BE-NEXT: sub sp, #4
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
+; CHECK-BE-NEXT: vcmp.f32 s4, #0
+; CHECK-BE-NEXT: vmov r1, s0
+; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s5
+; CHECK-BE-NEXT: vmov r2, s0
+; CHECK-BE-NEXT: vmov.16 q0[0], r1
+; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s6
+; CHECK-BE-NEXT: vmov.16 q0[1], r2
+; CHECK-BE-NEXT: vmov r1, s8
+; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s7
+; CHECK-BE-NEXT: vmov.16 q0[2], r1
+; CHECK-BE-NEXT: vmov r1, s8
+; CHECK-BE-NEXT: vmov.16 q0[3], r1
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: mov.w r1, #0
+; CHECK-BE-NEXT: vcmp.f32 s5, #0
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r1, #1
+; CHECK-BE-NEXT: cmp r1, #0
+; CHECK-BE-NEXT: cset r1, ne
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: and r1, r1, #1
+; CHECK-BE-NEXT: vcmp.f32 s6, #0
+; CHECK-BE-NEXT: rsb.w r3, r1, #0
+; CHECK-BE-NEXT: mov.w r1, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: mov.w r3, #0
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r3, #1
+; CHECK-BE-NEXT: cmp r3, #0
+; CHECK-BE-NEXT: cset r3, ne
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: and r3, r3, #1
+; CHECK-BE-NEXT: vcmp.f32 s7, #0
+; CHECK-BE-NEXT: rsb.w r3, r3, #0
+; CHECK-BE-NEXT: mov.w r2, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: mov.w r3, #0
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r3, #1
+; CHECK-BE-NEXT: cmp r3, #0
+; CHECK-BE-NEXT: cset r3, ne
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r2, #1
+; CHECK-BE-NEXT: cmp r2, #0
+; CHECK-BE-NEXT: and r3, r3, #1
+; CHECK-BE-NEXT: cset r2, ne
+; CHECK-BE-NEXT: and r2, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: bne .LBB26_5
+; CHECK-BE-NEXT: @ %bb.1: @ %else
+; CHECK-BE-NEXT: lsls r2, r1, #30
+; CHECK-BE-NEXT: bmi .LBB26_6
+; CHECK-BE-NEXT: .LBB26_2: @ %else2
+; CHECK-BE-NEXT: lsls r2, r1, #29
+; CHECK-BE-NEXT: bmi .LBB26_7
+; CHECK-BE-NEXT: .LBB26_3: @ %else4
+; CHECK-BE-NEXT: lsls r1, r1, #28
+; CHECK-BE-NEXT: bmi .LBB26_8
+; CHECK-BE-NEXT: .LBB26_4: @ %else6
+; CHECK-BE-NEXT: add sp, #4
+; CHECK-BE-NEXT: bx lr
+; CHECK-BE-NEXT: .LBB26_5: @ %cond.store
+; CHECK-BE-NEXT: vstr.16 s0, [r0]
+; CHECK-BE-NEXT: lsls r2, r1, #30
+; CHECK-BE-NEXT: bpl .LBB26_2
+; CHECK-BE-NEXT: .LBB26_6: @ %cond.store1
+; CHECK-BE-NEXT: vmovx.f16 s4, s0
+; CHECK-BE-NEXT: vstr.16 s4, [r0, #2]
+; CHECK-BE-NEXT: lsls r2, r1, #29
+; CHECK-BE-NEXT: bpl .LBB26_3
+; CHECK-BE-NEXT: .LBB26_7: @ %cond.store3
+; CHECK-BE-NEXT: vstr.16 s1, [r0, #4]
+; CHECK-BE-NEXT: lsls r1, r1, #28
+; CHECK-BE-NEXT: bpl .LBB26_4
+; CHECK-BE-NEXT: .LBB26_8: @ %cond.store5
+; CHECK-BE-NEXT: vmovx.f16 s0, s1
+; CHECK-BE-NEXT: vstr.16 s0, [r0, #6]
+; CHECK-BE-NEXT: add sp, #4
+; CHECK-BE-NEXT: bx lr
+entry:
+ %c = fcmp ogt <4 x float> %a, zeroinitializer
+ %trunc = fptrunc <4 x float> %a to <4 x half>
+ call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %trunc, <4 x half>* %dest, i32 2, <4 x i1> %c)
+ ret void
+}
+
+define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> %a) {
+; CHECK-LE-LABEL: masked_v4f16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #20
+; CHECK-LE-NEXT: sub sp, #20
+; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
+; CHECK-LE-NEXT: vcmp.f32 s0, #0
+; CHECK-LE-NEXT: vmov r1, s4
+; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s1
+; CHECK-LE-NEXT: vmov r2, s4
+; CHECK-LE-NEXT: vmov.16 q1[0], r1
+; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s2
+; CHECK-LE-NEXT: vmov.16 q1[1], r2
+; CHECK-LE-NEXT: vmov r1, s8
+; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s3
+; CHECK-LE-NEXT: vmov.16 q1[2], r1
+; CHECK-LE-NEXT: vmov r1, s8
+; CHECK-LE-NEXT: vmov.16 q1[3], r1
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: mov.w r1, #0
+; CHECK-LE-NEXT: vcmp.f32 s1, #0
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r1, #1
+; CHECK-LE-NEXT: cmp r1, #0
+; CHECK-LE-NEXT: cset r1, ne
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: and r1, r1, #1
+; CHECK-LE-NEXT: vcmp.f32 s2, #0
+; CHECK-LE-NEXT: rsb.w r3, r1, #0
+; CHECK-LE-NEXT: mov.w r1, #0
+; CHECK-LE-NEXT: bfi r1, r3, #0, #1
+; CHECK-LE-NEXT: mov.w r3, #0
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r3, #1
+; CHECK-LE-NEXT: cmp r3, #0
+; CHECK-LE-NEXT: cset r3, ne
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: and r3, r3, #1
+; CHECK-LE-NEXT: vcmp.f32 s3, #0
+; CHECK-LE-NEXT: rsb.w r3, r3, #0
+; CHECK-LE-NEXT: mov.w r2, #0
+; CHECK-LE-NEXT: bfi r1, r3, #1, #1
+; CHECK-LE-NEXT: mov.w r3, #0
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r3, #1
+; CHECK-LE-NEXT: cmp r3, #0
+; CHECK-LE-NEXT: cset r3, ne
+; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-LE-NEXT: it gt
+; CHECK-LE-NEXT: movgt r2, #1
+; CHECK-LE-NEXT: cmp r2, #0
+; CHECK-LE-NEXT: and r3, r3, #1
+; CHECK-LE-NEXT: cset r2, ne
+; CHECK-LE-NEXT: and r2, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r1, r2, #3, #1
+; CHECK-LE-NEXT: lsls r2, r1, #31
+; CHECK-LE-NEXT: bne .LBB27_5
+; CHECK-LE-NEXT: @ %bb.1: @ %else
+; CHECK-LE-NEXT: lsls r2, r1, #30
+; CHECK-LE-NEXT: bmi .LBB27_6
+; CHECK-LE-NEXT: .LBB27_2: @ %else2
+; CHECK-LE-NEXT: lsls r2, r1, #29
+; CHECK-LE-NEXT: bmi .LBB27_7
+; CHECK-LE-NEXT: .LBB27_3: @ %else4
+; CHECK-LE-NEXT: lsls r1, r1, #28
+; CHECK-LE-NEXT: bmi .LBB27_8
+; CHECK-LE-NEXT: .LBB27_4: @ %else6
+; CHECK-LE-NEXT: add sp, #20
+; CHECK-LE-NEXT: bx lr
+; CHECK-LE-NEXT: .LBB27_5: @ %cond.store
+; CHECK-LE-NEXT: vstr.16 s4, [sp, #12]
+; CHECK-LE-NEXT: ldrh.w r2, [sp, #12]
+; CHECK-LE-NEXT: strh r2, [r0]
+; CHECK-LE-NEXT: lsls r2, r1, #30
+; CHECK-LE-NEXT: bpl .LBB27_2
+; CHECK-LE-NEXT: .LBB27_6: @ %cond.store1
+; CHECK-LE-NEXT: vmovx.f16 s0, s4
+; CHECK-LE-NEXT: vstr.16 s0, [sp, #8]
+; CHECK-LE-NEXT: ldrh.w r2, [sp, #8]
+; CHECK-LE-NEXT: strh r2, [r0, #2]
+; CHECK-LE-NEXT: lsls r2, r1, #29
+; CHECK-LE-NEXT: bpl .LBB27_3
+; CHECK-LE-NEXT: .LBB27_7: @ %cond.store3
+; CHECK-LE-NEXT: vstr.16 s5, [sp, #4]
+; CHECK-LE-NEXT: ldrh.w r2, [sp, #4]
+; CHECK-LE-NEXT: strh r2, [r0, #4]
+; CHECK-LE-NEXT: lsls r1, r1, #28
+; CHECK-LE-NEXT: bpl .LBB27_4
+; CHECK-LE-NEXT: .LBB27_8: @ %cond.store5
+; CHECK-LE-NEXT: vmovx.f16 s0, s5
+; CHECK-LE-NEXT: vstr.16 s0, [sp]
+; CHECK-LE-NEXT: ldrh.w r1, [sp]
+; CHECK-LE-NEXT: strh r1, [r0, #6]
+; CHECK-LE-NEXT: add sp, #20
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: masked_v4f16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #20
+; CHECK-BE-NEXT: sub sp, #20
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
+; CHECK-BE-NEXT: vcmp.f32 s4, #0
+; CHECK-BE-NEXT: vmov r1, s0
+; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s5
+; CHECK-BE-NEXT: vmov r2, s0
+; CHECK-BE-NEXT: vmov.16 q0[0], r1
+; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s6
+; CHECK-BE-NEXT: vmov.16 q0[1], r2
+; CHECK-BE-NEXT: vmov r1, s8
+; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s7
+; CHECK-BE-NEXT: vmov.16 q0[2], r1
+; CHECK-BE-NEXT: vmov r1, s8
+; CHECK-BE-NEXT: vmov.16 q0[3], r1
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: mov.w r1, #0
+; CHECK-BE-NEXT: vcmp.f32 s5, #0
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r1, #1
+; CHECK-BE-NEXT: cmp r1, #0
+; CHECK-BE-NEXT: cset r1, ne
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: and r1, r1, #1
+; CHECK-BE-NEXT: vcmp.f32 s6, #0
+; CHECK-BE-NEXT: rsb.w r3, r1, #0
+; CHECK-BE-NEXT: mov.w r1, #0
+; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: mov.w r3, #0
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r3, #1
+; CHECK-BE-NEXT: cmp r3, #0
+; CHECK-BE-NEXT: cset r3, ne
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: and r3, r3, #1
+; CHECK-BE-NEXT: vcmp.f32 s7, #0
+; CHECK-BE-NEXT: rsb.w r3, r3, #0
+; CHECK-BE-NEXT: mov.w r2, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
+; CHECK-BE-NEXT: mov.w r3, #0
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r3, #1
+; CHECK-BE-NEXT: cmp r3, #0
+; CHECK-BE-NEXT: cset r3, ne
+; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-BE-NEXT: it gt
+; CHECK-BE-NEXT: movgt r2, #1
+; CHECK-BE-NEXT: cmp r2, #0
+; CHECK-BE-NEXT: and r3, r3, #1
+; CHECK-BE-NEXT: cset r2, ne
+; CHECK-BE-NEXT: and r2, r2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r1, r2, #3, #1
+; CHECK-BE-NEXT: lsls r2, r1, #31
+; CHECK-BE-NEXT: bne .LBB27_5
+; CHECK-BE-NEXT: @ %bb.1: @ %else
+; CHECK-BE-NEXT: lsls r2, r1, #30
+; CHECK-BE-NEXT: bmi .LBB27_6
+; CHECK-BE-NEXT: .LBB27_2: @ %else2
+; CHECK-BE-NEXT: lsls r2, r1, #29
+; CHECK-BE-NEXT: bmi .LBB27_7
+; CHECK-BE-NEXT: .LBB27_3: @ %else4
+; CHECK-BE-NEXT: lsls r1, r1, #28
+; CHECK-BE-NEXT: bmi .LBB27_8
+; CHECK-BE-NEXT: .LBB27_4: @ %else6
+; CHECK-BE-NEXT: add sp, #20
+; CHECK-BE-NEXT: bx lr
+; CHECK-BE-NEXT: .LBB27_5: @ %cond.store
+; CHECK-BE-NEXT: vstr.16 s0, [sp, #12]
+; CHECK-BE-NEXT: ldrh.w r2, [sp, #12]
+; CHECK-BE-NEXT: strh r2, [r0]
+; CHECK-BE-NEXT: lsls r2, r1, #30
+; CHECK-BE-NEXT: bpl .LBB27_2
+; CHECK-BE-NEXT: .LBB27_6: @ %cond.store1
+; CHECK-BE-NEXT: vmovx.f16 s4, s0
+; CHECK-BE-NEXT: vstr.16 s4, [sp, #8]
+; CHECK-BE-NEXT: ldrh.w r2, [sp, #8]
+; CHECK-BE-NEXT: strh r2, [r0, #2]
+; CHECK-BE-NEXT: lsls r2, r1, #29
+; CHECK-BE-NEXT: bpl .LBB27_3
+; CHECK-BE-NEXT: .LBB27_7: @ %cond.store3
+; CHECK-BE-NEXT: vstr.16 s1, [sp, #4]
+; CHECK-BE-NEXT: ldrh.w r2, [sp, #4]
+; CHECK-BE-NEXT: strh r2, [r0, #4]
+; CHECK-BE-NEXT: lsls r1, r1, #28
+; CHECK-BE-NEXT: bpl .LBB27_4
+; CHECK-BE-NEXT: .LBB27_8: @ %cond.store5
+; CHECK-BE-NEXT: vmovx.f16 s0, s1
+; CHECK-BE-NEXT: vstr.16 s0, [sp]
+; CHECK-BE-NEXT: ldrh.w r1, [sp]
+; CHECK-BE-NEXT: strh r1, [r0, #6]
+; CHECK-BE-NEXT: add sp, #20
+; CHECK-BE-NEXT: bx lr
+entry:
+ %c = fcmp ogt <4 x float> %a, zeroinitializer
+ %trunc = fptrunc <4 x float> %a to <4 x half>
+ call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %trunc, <4 x half>* %dest, i32 1, <4 x i1> %c)
+ ret void
+}
+declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>)
+declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>)
+declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
+declare void @llvm.masked.store.v4f16.p0v4f16(<4 x half>, <4 x half>*, i32, <4 x i1>)
declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>)
declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)