--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux"
+
+define <vscale x 2 x i32> @zext.add2(<vscale x 2 x i32> %a0, <vscale x 2 x i32> %a1) #0 {
+; CHECK-LABEL: zext.add2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
+; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
+; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z1.d
+; CHECK-NEXT: mov z1.d, #1 // =0x1
+; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 2 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 2 x i1> %v to <vscale x 2 x i32>
+ %result = add <vscale x 2 x i32> %zero.extend, %a0
+ ret <vscale x 2 x i32> %result
+}
+
+define <vscale x 4 x i32> @zext.add4(<vscale x 4 x i32> %a0, <vscale x 4 x i32> %a1) #0 {
+; CHECK-LABEL: zext.add4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: mov z1.s, #1 // =0x1
+; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 4 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 4 x i1> %v to <vscale x 4 x i32>
+ %result = add <vscale x 4 x i32> %zero.extend, %a0
+ ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 8 x i32> @zext.add8(<vscale x 8 x i32> %a0, <vscale x 8 x i32> %a1) #0 {
+; CHECK-LABEL: zext.add8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmphi p1.s, p0/z, z1.s, z3.s
+; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z2.s
+; CHECK-NEXT: mov z2.s, #1 // =0x1
+; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 8 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 8 x i1> %v to <vscale x 8 x i32>
+ %result = add <vscale x 8 x i32> %zero.extend, %a0
+ ret <vscale x 8 x i32> %result
+}
+
+define <vscale x 16 x i32> @zext.add16(<vscale x 16 x i32> %a0, <vscale x 16 x i32> %a1) #0 {
+; CHECK-LABEL: zext.add16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmphi p1.s, p0/z, z3.s, z7.s
+; CHECK-NEXT: cmphi p2.s, p0/z, z2.s, z6.s
+; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s
+; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z4.s
+; CHECK-NEXT: mov z4.s, #1 // =0x1
+; CHECK-NEXT: add z0.s, p0/m, z0.s, z4.s
+; CHECK-NEXT: add z1.s, p3/m, z1.s, z4.s
+; CHECK-NEXT: add z2.s, p2/m, z2.s, z4.s
+; CHECK-NEXT: add z3.s, p1/m, z3.s, z4.s
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 16 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 16 x i1> %v to <vscale x 16 x i32>
+ %result = add <vscale x 16 x i32> %zero.extend, %a0
+ ret <vscale x 16 x i32> %result
+}
+
+define <vscale x 2 x i32> @zext.sub2(<vscale x 2 x i32> %a0, <vscale x 2 x i32> %a1) #0 {
+; CHECK-LABEL: zext.sub2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
+; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
+; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z1.d
+; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
+; CHECK-NEXT: sub z0.d, z1.d, z0.d
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 2 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 2 x i1> %v to <vscale x 2 x i32>
+ %result = sub <vscale x 2 x i32> %zero.extend, %a0
+ ret <vscale x 2 x i32> %result
+}
+
+define <vscale x 4 x i32> @zext.sub4(<vscale x 4 x i32> %a0, <vscale x 4 x i32> %a1) #0 {
+; CHECK-LABEL: zext.sub4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
+; CHECK-NEXT: sub z0.s, z1.s, z0.s
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 4 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 4 x i1> %v to <vscale x 4 x i32>
+ %result = sub <vscale x 4 x i32> %zero.extend, %a0
+ ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 8 x i32> @zext.sub8(<vscale x 8 x i32> %a0, <vscale x 8 x i32> %a1) #0 {
+; CHECK-LABEL: zext.sub8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z2.s
+; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z3.s
+; CHECK-NEXT: mov z2.s, p0/z, #1 // =0x1
+; CHECK-NEXT: mov z3.s, p1/z, #1 // =0x1
+; CHECK-NEXT: sub z0.s, z3.s, z0.s
+; CHECK-NEXT: sub z1.s, z2.s, z1.s
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 8 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 8 x i1> %v to <vscale x 8 x i32>
+ %result = sub <vscale x 8 x i32> %zero.extend, %a0
+ ret <vscale x 8 x i32> %result
+}
+
+define <vscale x 16 x i32> @zext.sub16(<vscale x 16 x i32> %a0, <vscale x 16 x i32> %a1) #0 {
+; CHECK-LABEL: zext.sub16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmphi p1.s, p0/z, z2.s, z6.s
+; CHECK-NEXT: cmphi p2.s, p0/z, z0.s, z4.s
+; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s
+; CHECK-NEXT: cmphi p0.s, p0/z, z3.s, z7.s
+; CHECK-NEXT: mov z4.s, p0/z, #1 // =0x1
+; CHECK-NEXT: mov z5.s, p1/z, #1 // =0x1
+; CHECK-NEXT: mov z6.s, p3/z, #1 // =0x1
+; CHECK-NEXT: mov z7.s, p2/z, #1 // =0x1
+; CHECK-NEXT: sub z0.s, z7.s, z0.s
+; CHECK-NEXT: sub z1.s, z6.s, z1.s
+; CHECK-NEXT: sub z2.s, z5.s, z2.s
+; CHECK-NEXT: sub z3.s, z4.s, z3.s
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 16 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 16 x i1> %v to <vscale x 16 x i32>
+ %result = sub <vscale x 16 x i32> %zero.extend, %a0
+ ret <vscale x 16 x i32> %result
+}
+
+define <vscale x 2 x i32> @zext.mul2(<vscale x 2 x i32> %a0, <vscale x 2 x i32> %a1) #0 {
+; CHECK-LABEL: zext.mul2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
+; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
+; CHECK-NEXT: cmphi p1.d, p0/z, z2.d, z1.d
+; CHECK-NEXT: mov z1.d, p1/z, #1 // =0x1
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 2 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 2 x i1> %v to <vscale x 2 x i32>
+ %result = mul <vscale x 2 x i32> %zero.extend, %a0
+ ret <vscale x 2 x i32> %result
+}
+
+define <vscale x 4 x i32> @zext.mul4(<vscale x 4 x i32> %a0, <vscale x 4 x i32> %a1) #0 {
+; CHECK-LABEL: zext.mul4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: mov z1.s, p1/z, #1 // =0x1
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 4 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 4 x i1> %v to <vscale x 4 x i32>
+ %result = mul <vscale x 4 x i32> %zero.extend, %a0
+ ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 8 x i32> @zext.mul8(<vscale x 8 x i32> %a0, <vscale x 8 x i32> %a1) #0 {
+; CHECK-LABEL: zext.mul8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z2.s
+; CHECK-NEXT: cmphi p2.s, p0/z, z1.s, z3.s
+; CHECK-NEXT: mov z2.s, p2/z, #1 // =0x1
+; CHECK-NEXT: mov z3.s, p1/z, #1 // =0x1
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z3.s
+; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 8 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 8 x i1> %v to <vscale x 8 x i32>
+ %result = mul <vscale x 8 x i32> %zero.extend, %a0
+ ret <vscale x 8 x i32> %result
+}
+
+define <vscale x 16 x i32> @zext.mul16(<vscale x 16 x i32> %a0, <vscale x 16 x i32> %a1) #0 {
+; CHECK-LABEL: zext.mul16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: cmphi p4.s, p0/z, z3.s, z7.s
+; CHECK-NEXT: cmphi p2.s, p0/z, z0.s, z4.s
+; CHECK-NEXT: mov z4.s, p4/z, #1 // =0x1
+; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: cmphi p1.s, p0/z, z2.s, z6.s
+; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s
+; CHECK-NEXT: mov z5.s, p1/z, #1 // =0x1
+; CHECK-NEXT: mov z6.s, p3/z, #1 // =0x1
+; CHECK-NEXT: mov z7.s, p2/z, #1 // =0x1
+; CHECK-NEXT: mul z1.s, p0/m, z1.s, z6.s
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z7.s
+; CHECK-NEXT: mul z2.s, p0/m, z2.s, z5.s
+; CHECK-NEXT: mul z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %v = icmp ugt <vscale x 16 x i32> %a0, %a1
+ %zero.extend = zext <vscale x 16 x i1> %v to <vscale x 16 x i32>
+ %result = mul <vscale x 16 x i32> %zero.extend, %a0
+ ret <vscale x 16 x i32> %result
+}
+
+attributes #0 = { "target-features"="+sve" }