; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
-define <16 x i32> @zext_only_used_in_cmp(<16 x i8>* %ptr, <16 x i32> %op) {
-; CHECK-LABEL: zext_only_used_in_cmp:
+define <16 x i32> @no_existing_zext(<16 x i8> %a, <16 x i32> %op) {
+; CHECK-LABEL: no_existing_zext:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: movi.2d v5, #0xffffffffffffffff
-; CHECK-NEXT: ldr q6, [x0]
-; CHECK-NEXT: movi.4s v4, #1, lsl #8
-; CHECK-NEXT: cmgt.16b v5, v6, v5
-; CHECK-NEXT: sub.4s v7, v4, v0
-; CHECK-NEXT: sshll.8h v16, v5, #0
-; CHECK-NEXT: sshll2.8h v5, v5, #0
-; CHECK-NEXT: sub.4s v6, v4, v2
-; CHECK-NEXT: sshll.4s v17, v16, #0
-; CHECK-NEXT: sshll.4s v18, v5, #0
-; CHECK-NEXT: sshll2.4s v16, v16, #0
+; CHECK-NEXT: movi.16b v5, #10
+; CHECK-NEXT: cmhi.16b v0, v0, v5
+; CHECK-NEXT: sshll.8h v5, v0, #0
+; CHECK-NEXT: sshll2.8h v0, v0, #0
+; CHECK-NEXT: sshll.4s v6, v5, #0
+; CHECK-NEXT: sshll.4s v7, v0, #0
+; CHECK-NEXT: sshll2.4s v0, v0, #0
; CHECK-NEXT: sshll2.4s v5, v5, #0
-; CHECK-NEXT: sub.4s v19, v4, v3
-; CHECK-NEXT: sub.4s v4, v4, v1
-; CHECK-NEXT: bif.16b v3, v19, v5
-; CHECK-NEXT: bif.16b v1, v4, v16
-; CHECK-NEXT: bif.16b v0, v7, v17
-; CHECK-NEXT: bif.16b v2, v6, v18
+; CHECK-NEXT: and.16b v4, v4, v0
+; CHECK-NEXT: and.16b v5, v2, v5
+; CHECK-NEXT: and.16b v2, v3, v7
+; CHECK-NEXT: and.16b v0, v1, v6
+; CHECK-NEXT: mov.16b v1, v5
+; CHECK-NEXT: mov.16b v3, v4
; CHECK-NEXT: ret
entry:
- %lv = load <16 x i8>, <16 x i8>* %ptr
- %ext = zext <16 x i8> %lv to <16 x i32>
- %cmp = icmp sgt <16 x i8> %lv, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
- %sub = sub nsw <16 x i32> <i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256>, %op
- %sel = select <16 x i1> %cmp, <16 x i32> %op, <16 x i32> %sub
+ %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+ %sel = select <16 x i1> %cmp, <16 x i32> %op, <16 x i32> zeroinitializer
ret <16 x i32> %sel
}
-define <16 x i32> @second_compare_operand_not_splat(<16 x i8>* %ptr, <16 x i8> %op) {
+define <16 x i32> @second_compare_operand_not_splat(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: second_compare_operand_not_splat:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: ldr q2, [x0]
-; CHECK-NEXT: movi.4s v1, #1, lsl #8
-; CHECK-NEXT: cmgt.16b v0, v2, v0
-; CHECK-NEXT: ushll.8h v3, v2, #0
-; CHECK-NEXT: sshll.8h v16, v0, #0
+; CHECK-NEXT: ushll.8h v2, v0, #0
+; CHECK-NEXT: ushll2.8h v3, v0, #0
+; CHECK-NEXT: cmgt.16b v0, v0, v1
+; CHECK-NEXT: ushll.4s v4, v2, #0
+; CHECK-NEXT: ushll.4s v5, v3, #0
+; CHECK-NEXT: ushll2.4s v1, v2, #0
+; CHECK-NEXT: ushll2.4s v2, v3, #0
+; CHECK-NEXT: sshll.8h v3, v0, #0
; CHECK-NEXT: sshll2.8h v0, v0, #0
-; CHECK-NEXT: ushll2.8h v4, v2, #0
-; CHECK-NEXT: sshll.4s v17, v16, #0
-; CHECK-NEXT: sshll.4s v18, v0, #0
+; CHECK-NEXT: sshll.4s v6, v3, #0
+; CHECK-NEXT: sshll.4s v7, v0, #0
; CHECK-NEXT: sshll2.4s v0, v0, #0
-; CHECK-NEXT: usubw.4s v19, v1, v3
-; CHECK-NEXT: ushll.4s v2, v3, #0
-; CHECK-NEXT: ushll.4s v5, v4, #0
-; CHECK-NEXT: ushll2.4s v7, v4, #0
-; CHECK-NEXT: sshll2.4s v16, v16, #0
-; CHECK-NEXT: usubw2.4s v20, v1, v4
-; CHECK-NEXT: usubw2.4s v21, v1, v3
-; CHECK-NEXT: usubw.4s v4, v1, v4
-; CHECK-NEXT: ushll2.4s v6, v3, #0
-; CHECK-NEXT: mov.16b v3, v0
-; CHECK-NEXT: mov.16b v0, v17
-; CHECK-NEXT: mov.16b v1, v16
-; CHECK-NEXT: bsl.16b v0, v2, v19
-; CHECK-NEXT: mov.16b v2, v18
-; CHECK-NEXT: bsl.16b v3, v7, v20
-; CHECK-NEXT: bsl.16b v1, v6, v21
-; CHECK-NEXT: bsl.16b v2, v5, v4
+; CHECK-NEXT: sshll2.4s v16, v3, #0
+; CHECK-NEXT: and.16b v3, v2, v0
+; CHECK-NEXT: and.16b v1, v1, v16
+; CHECK-NEXT: and.16b v2, v5, v7
+; CHECK-NEXT: and.16b v0, v4, v6
; CHECK-NEXT: ret
entry:
- %lv = load <16 x i8>, <16 x i8>* %ptr
- %ext = zext <16 x i8> %lv to <16 x i32>
- %cmp = icmp sgt <16 x i8> %lv, %op
- %sub = sub nsw <16 x i32> <i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256>, %ext
- %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> %sub
+ %ext = zext <16 x i8> %a to <16 x i32>
+ %cmp = icmp sgt <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
ret <16 x i32> %sel
}
-define <16 x i32> @same_zext_used_in_cmp_and_select(<16 x i8>* %ptr) {
-; CHECK-LABEL: same_zext_used_in_cmp_and_select:
+define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select(<16 x i8> %a) {
+; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff
-; CHECK-NEXT: ldr q1, [x0]
-; CHECK-NEXT: movi.4s v17, #1, lsl #8
-; CHECK-NEXT: ushll.8h v2, v1, #0
-; CHECK-NEXT: cmgt.16b v0, v1, v0
-; CHECK-NEXT: ushll2.8h v3, v1, #0
-; CHECK-NEXT: sshll.8h v7, v0, #0
-; CHECK-NEXT: sshll2.8h v0, v0, #0
+; CHECK-NEXT: movi.16b v1, #10
+; CHECK-NEXT: ushll.8h v2, v0, #0
+; CHECK-NEXT: ushll2.8h v3, v0, #0
; CHECK-NEXT: ushll.4s v4, v2, #0
-; CHECK-NEXT: ushll2.4s v1, v2, #0
-; CHECK-NEXT: usubw.4s v19, v17, v2
-; CHECK-NEXT: usubw2.4s v2, v17, v2
-; CHECK-NEXT: sshll.4s v16, v7, #0
-; CHECK-NEXT: sshll.4s v18, v0, #0
-; CHECK-NEXT: sshll2.4s v7, v7, #0
+; CHECK-NEXT: cmgt.16b v0, v0, v1
+; CHECK-NEXT: ushll.4s v5, v3, #0
+; CHECK-NEXT: ushll2.4s v1, v3, #0
+; CHECK-NEXT: sshll.8h v3, v0, #0
+; CHECK-NEXT: sshll2.8h v0, v0, #0
+; CHECK-NEXT: ushll2.4s v2, v2, #0
+; CHECK-NEXT: sshll.4s v6, v3, #0
+; CHECK-NEXT: sshll.4s v7, v0, #0
; CHECK-NEXT: sshll2.4s v0, v0, #0
-; CHECK-NEXT: usubw2.4s v20, v17, v3
-; CHECK-NEXT: usubw.4s v17, v17, v3
+; CHECK-NEXT: sshll2.4s v16, v3, #0
+; CHECK-NEXT: and.16b v3, v1, v0
+; CHECK-NEXT: and.16b v1, v2, v16
+; CHECK-NEXT: and.16b v2, v5, v7
+; CHECK-NEXT: and.16b v0, v4, v6
+; CHECK-NEXT: ret
+entry:
+ %ext = zext <16 x i8> %a to <16 x i32>
+ %cmp = icmp sgt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+ %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select(<16 x i8> %a) {
+; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: movi.16b v1, #10
+; CHECK-NEXT: ushll.8h v2, v0, #0
+; CHECK-NEXT: ushll2.8h v3, v0, #0
+; CHECK-NEXT: ushll.4s v4, v2, #0
+; CHECK-NEXT: cmhi.16b v0, v0, v1
; CHECK-NEXT: ushll.4s v5, v3, #0
-; CHECK-NEXT: ushll2.4s v6, v3, #0
-; CHECK-NEXT: mov.16b v3, v0
-; CHECK-NEXT: bif.16b v1, v2, v7
-; CHECK-NEXT: mov.16b v0, v16
-; CHECK-NEXT: mov.16b v2, v18
-; CHECK-NEXT: bsl.16b v3, v6, v20
-; CHECK-NEXT: bsl.16b v0, v4, v19
-; CHECK-NEXT: bsl.16b v2, v5, v17
+; CHECK-NEXT: ushll2.4s v1, v3, #0
+; CHECK-NEXT: sshll.8h v3, v0, #0
+; CHECK-NEXT: sshll2.8h v0, v0, #0
+; CHECK-NEXT: ushll2.4s v2, v2, #0
+; CHECK-NEXT: sshll.4s v6, v3, #0
+; CHECK-NEXT: sshll.4s v7, v0, #0
+; CHECK-NEXT: sshll2.4s v0, v0, #0
+; CHECK-NEXT: sshll2.4s v16, v3, #0
+; CHECK-NEXT: and.16b v3, v1, v0
+; CHECK-NEXT: and.16b v1, v2, v16
+; CHECK-NEXT: and.16b v2, v5, v7
+; CHECK-NEXT: and.16b v0, v4, v6
; CHECK-NEXT: ret
entry:
- %lv = load <16 x i8>, <16 x i8>* %ptr
- %ext = zext <16 x i8> %lv to <16 x i32>
- %cmp = icmp sgt <16 x i8> %lv, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
- %sub = sub nsw <16 x i32> <i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256>, %ext
- %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> %sub
+ %ext = zext <16 x i8> %a to <16 x i32>
+ %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+ %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
ret <16 x i32> %sel
}
-define <16 x i32> @same_sext_used_in_cmp_and_select(<16 x i8>* %ptr) {
-; CHECK-LABEL: same_sext_used_in_cmp_and_select:
+define <16 x i32> @same_sext_used_in_cmp_signed_pred_and_select(<16 x i8> %a) {
+; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff
-; CHECK-NEXT: ldr q1, [x0]
-; CHECK-NEXT: movi.4s v17, #1, lsl #8
-; CHECK-NEXT: sshll.8h v2, v1, #0
-; CHECK-NEXT: cmgt.16b v0, v1, v0
-; CHECK-NEXT: sshll2.8h v3, v1, #0
-; CHECK-NEXT: sshll.8h v7, v0, #0
+; CHECK-NEXT: movi.16b v1, #10
+; CHECK-NEXT: sshll.8h v3, v0, #0
+; CHECK-NEXT: sshll2.8h v2, v0, #0
+; CHECK-NEXT: cmgt.16b v0, v0, v1
+; CHECK-NEXT: ext.16b v1, v3, v3, #8
+; CHECK-NEXT: sshll.8h v5, v0, #0
+; CHECK-NEXT: sshll2.8h v0, v0, #0
+; CHECK-NEXT: ext.16b v4, v2, v2, #8
+; CHECK-NEXT: ext.16b v6, v5, v5, #8
+; CHECK-NEXT: ext.16b v7, v0, v0, #8
+; CHECK-NEXT: and.8b v0, v2, v0
+; CHECK-NEXT: sshll.4s v2, v0, #0
+; CHECK-NEXT: and.8b v0, v3, v5
+; CHECK-NEXT: and.8b v1, v1, v6
+; CHECK-NEXT: and.8b v3, v4, v7
+; CHECK-NEXT: sshll.4s v0, v0, #0
+; CHECK-NEXT: sshll.4s v1, v1, #0
+; CHECK-NEXT: sshll.4s v3, v3, #0
+; CHECK-NEXT: ret
+entry:
+ %ext = sext <16 x i8> %a to <16 x i32>
+ %cmp = icmp sgt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+ %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select(<16 x i8> %a) {
+; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: movi.16b v1, #10
+; CHECK-NEXT: sshll.8h v3, v0, #0
+; CHECK-NEXT: sshll2.8h v2, v0, #0
+; CHECK-NEXT: cmhi.16b v0, v0, v1
+; CHECK-NEXT: ext.16b v1, v3, v3, #8
+; CHECK-NEXT: sshll.8h v5, v0, #0
+; CHECK-NEXT: sshll2.8h v0, v0, #0
+; CHECK-NEXT: ext.16b v4, v2, v2, #8
+; CHECK-NEXT: ext.16b v6, v5, v5, #8
+; CHECK-NEXT: ext.16b v7, v0, v0, #8
+; CHECK-NEXT: and.8b v0, v2, v0
+; CHECK-NEXT: sshll.4s v2, v0, #0
+; CHECK-NEXT: and.8b v0, v3, v5
+; CHECK-NEXT: and.8b v1, v1, v6
+; CHECK-NEXT: and.8b v3, v4, v7
+; CHECK-NEXT: sshll.4s v0, v0, #0
+; CHECK-NEXT: sshll.4s v1, v1, #0
+; CHECK-NEXT: sshll.4s v3, v3, #0
+; CHECK-NEXT: ret
+entry:
+ %ext = sext <16 x i8> %a to <16 x i32>
+ %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+ %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred(<16 x i8> %a) {
+; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff
+; CHECK-NEXT: ushll.8h v2, v0, #0
+; CHECK-NEXT: ushll2.8h v3, v0, #0
+; CHECK-NEXT: ushll.4s v4, v2, #0
+; CHECK-NEXT: cmgt.16b v0, v0, v1
+; CHECK-NEXT: ushll.4s v5, v3, #0
+; CHECK-NEXT: ushll2.4s v1, v3, #0
+; CHECK-NEXT: sshll.8h v3, v0, #0
; CHECK-NEXT: sshll2.8h v0, v0, #0
-; CHECK-NEXT: sshll.4s v4, v2, #0
-; CHECK-NEXT: sshll2.4s v1, v2, #0
-; CHECK-NEXT: ssubw.4s v19, v17, v2
-; CHECK-NEXT: ssubw2.4s v2, v17, v2
-; CHECK-NEXT: sshll.4s v16, v7, #0
-; CHECK-NEXT: sshll.4s v18, v0, #0
-; CHECK-NEXT: sshll2.4s v7, v7, #0
+; CHECK-NEXT: ushll2.4s v2, v2, #0
+; CHECK-NEXT: sshll.4s v6, v3, #0
+; CHECK-NEXT: sshll.4s v7, v0, #0
; CHECK-NEXT: sshll2.4s v0, v0, #0
-; CHECK-NEXT: ssubw2.4s v20, v17, v3
-; CHECK-NEXT: ssubw.4s v17, v17, v3
-; CHECK-NEXT: sshll.4s v5, v3, #0
-; CHECK-NEXT: sshll2.4s v6, v3, #0
-; CHECK-NEXT: mov.16b v3, v0
-; CHECK-NEXT: bif.16b v1, v2, v7
-; CHECK-NEXT: mov.16b v0, v16
-; CHECK-NEXT: mov.16b v2, v18
-; CHECK-NEXT: bsl.16b v3, v6, v20
-; CHECK-NEXT: bsl.16b v0, v4, v19
-; CHECK-NEXT: bsl.16b v2, v5, v17
+; CHECK-NEXT: sshll2.4s v16, v3, #0
+; CHECK-NEXT: and.16b v3, v1, v0
+; CHECK-NEXT: and.16b v1, v2, v16
+; CHECK-NEXT: and.16b v2, v5, v7
+; CHECK-NEXT: and.16b v0, v4, v6
; CHECK-NEXT: ret
entry:
- %lv = load <16 x i8>, <16 x i8>* %ptr
- %ext = sext <16 x i8> %lv to <16 x i32>
- %cmp = icmp sgt <16 x i8> %lv, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
- %sub = sub nsw <16 x i32> <i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256, i32 256>, %ext
- %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> %sub
+ %ext = zext <16 x i8> %a to <16 x i32>
+ %cmp = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
ret <16 x i32> %sel
}