From c679fbee2a76fce6ad4167c95c2e0276b0a8cc81 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 25 Feb 2022 17:59:44 +0000 Subject: [PATCH] [AArch64] Add tests for tbl + cmp splitting. Additional tests showing potential for follow-ups after D120571. --- llvm/test/CodeGen/AArch64/vselect-ext.ll | 203 +++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/vselect-ext.ll b/llvm/test/CodeGen/AArch64/vselect-ext.ll index 79eb87c..3186df5 100644 --- a/llvm/test/CodeGen/AArch64/vselect-ext.ll +++ b/llvm/test/CodeGen/AArch64/vselect-ext.ll @@ -197,3 +197,206 @@ entry: %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer ret <16 x i32> %sel } + +define void @extension_in_loop_v16i8_to_v16i32(i8* %src, i32* %dst) { +; CHECK-LABEL: extension_in_loop_v16i8_to_v16i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: LBB7_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr q1, [x0, x8] +; CHECK-NEXT: add x8, x8, #16 +; CHECK-NEXT: cmp x8, #128 +; CHECK-NEXT: cmgt.16b v2, v1, v0 +; CHECK-NEXT: ushll2.8h v3, v1, #0 +; CHECK-NEXT: sshll2.8h v4, v2, #0 +; CHECK-NEXT: ushll2.4s v5, v3, #0 +; CHECK-NEXT: ushll.4s v3, v3, #0 +; CHECK-NEXT: sshll2.4s v6, v4, #0 +; CHECK-NEXT: sshll.4s v4, v4, #0 +; CHECK-NEXT: ushll.8h v1, v1, #0 +; CHECK-NEXT: sshll.8h v2, v2, #0 +; CHECK-NEXT: and.16b v5, v5, v6 +; CHECK-NEXT: and.16b v3, v3, v4 +; CHECK-NEXT: stp q3, q5, [x1, #32] +; CHECK-NEXT: sshll2.4s v4, v2, #0 +; CHECK-NEXT: sshll.4s v2, v2, #0 +; CHECK-NEXT: ushll2.4s v3, v1, #0 +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: and.16b v3, v3, v4 +; CHECK-NEXT: and.16b v1, v1, v2 +; CHECK-NEXT: stp q1, q3, [x1], #64 +; CHECK-NEXT: b.ne LBB7_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %src.gep = getelementptr i8, i8* %src, i64 %iv + %src.gep.cast = bitcast i8* %src.gep to <16 x i8>* + %load = load <16 x i8>, <16 x i8>* %src.gep.cast + %cmp = icmp sgt <16 x i8> %load, + %ext = zext <16 x i8> %load to <16 x i32> + %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer + %dst.gep = getelementptr i32, i32* %dst, i64 %iv + %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>* + store <16 x i32> %sel, <16 x i32>* %dst.gep.cast + %iv.next = add nuw i64 %iv, 16 + %ec = icmp eq i64 %iv.next, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @extension_in_loop_as_shuffle_v16i8_to_v16i32(i8* %src, i32* %dst) { +; CHECK-LABEL: extension_in_loop_as_shuffle_v16i8_to_v16i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x9, lCPI8_0@PAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: adrp x10, lCPI8_1@PAGE +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: adrp x11, lCPI8_2@PAGE +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: adrp x12, lCPI8_3@PAGE +; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: movi.2d v3, #0000000000000000 +; CHECK-NEXT: Lloh4: +; CHECK-NEXT: ldr q0, [x9, lCPI8_0@PAGEOFF] +; CHECK-NEXT: Lloh5: +; CHECK-NEXT: ldr q2, [x10, lCPI8_1@PAGEOFF] +; CHECK-NEXT: Lloh6: +; CHECK-NEXT: ldr q5, [x11, lCPI8_2@PAGEOFF] +; CHECK-NEXT: Lloh7: +; CHECK-NEXT: ldr q6, [x12, lCPI8_3@PAGEOFF] +; CHECK-NEXT: LBB8_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr q4, [x0, x8] +; CHECK-NEXT: add x8, x8, #16 +; CHECK-NEXT: cmp x8, #128 +; CHECK-NEXT: cmgt.16b v7, v4, v1 +; CHECK-NEXT: tbl.16b v16, { v3, v4 }, v0 +; CHECK-NEXT: tbl.16b v17, { v3, v4 }, v2 +; CHECK-NEXT: sshll2.8h v20, v7, #0 +; CHECK-NEXT: tbl.16b v18, { v3, v4 }, v5 +; CHECK-NEXT: sshll2.4s v21, v20, #0 +; CHECK-NEXT: sshll.4s v20, v20, #0 +; CHECK-NEXT: tbl.16b v19, { v3, v4 }, v6 +; CHECK-NEXT: sshll.8h v7, v7, #0 +; CHECK-NEXT: and.16b v16, v16, v21 +; CHECK-NEXT: and.16b v17, v17, v20 +; CHECK-NEXT: stp q17, q16, [x1, #32] +; CHECK-NEXT: sshll2.4s v16, v7, #0 +; CHECK-NEXT: sshll.4s v7, v7, #0 +; CHECK-NEXT: and.16b v16, v18, v16 +; CHECK-NEXT: and.16b v7, v19, v7 +; CHECK-NEXT: stp q7, q16, [x1], #64 +; CHECK-NEXT: b.ne LBB8_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh7 +; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh6 +; CHECK-NEXT: .loh AdrpLdr Lloh1, Lloh5 +; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh4 +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %src.gep = getelementptr i8, i8* %src, i64 %iv + %src.gep.cast = bitcast i8* %src.gep to <16 x i8>* + %load = load <16 x i8>, <16 x i8>* %src.gep.cast + %cmp = icmp sgt <16 x i8> %load, + %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> + %ext = bitcast <64 x i8> %ext.shuf to <16 x i32> + %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer + %dst.gep = getelementptr i32, i32* %dst, i64 %iv + %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>* + store <16 x i32> %sel, <16 x i32>* %dst.gep.cast + %iv.next = add nuw i64 %iv, 16 + %ec = icmp eq i64 %iv.next, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @shuffle_in_loop_is_no_extend_v16i8_to_v16i32(i8* %src, i32* %dst) { +; CHECK-LABEL: shuffle_in_loop_is_no_extend_v16i8_to_v16i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: Lloh8: +; CHECK-NEXT: adrp x9, lCPI9_0@PAGE +; CHECK-NEXT: Lloh9: +; CHECK-NEXT: adrp x10, lCPI9_1@PAGE +; CHECK-NEXT: Lloh10: +; CHECK-NEXT: adrp x11, lCPI9_2@PAGE +; CHECK-NEXT: Lloh11: +; CHECK-NEXT: adrp x12, lCPI9_3@PAGE +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: movi.2d v5, #0xffffffffffffffff +; CHECK-NEXT: Lloh12: +; CHECK-NEXT: ldr q0, [x9, lCPI9_0@PAGEOFF] +; CHECK-NEXT: Lloh13: +; CHECK-NEXT: ldr q4, [x10, lCPI9_1@PAGEOFF] +; CHECK-NEXT: Lloh14: +; CHECK-NEXT: ldr q6, [x11, lCPI9_2@PAGEOFF] +; CHECK-NEXT: Lloh15: +; CHECK-NEXT: ldr q7, [x12, lCPI9_3@PAGEOFF] +; CHECK-NEXT: LBB9_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr q1, [x0, x8] +; CHECK-NEXT: add x8, x8, #16 +; CHECK-NEXT: cmp x8, #128 +; CHECK-NEXT: cmgt.16b v16, v1, v5 +; CHECK-NEXT: mov.16b v3, v1 +; CHECK-NEXT: sshll2.8h v17, v16, #0 +; CHECK-NEXT: tbl.16b v18, { v2, v3 }, v0 +; CHECK-NEXT: tbl.16b v19, { v2, v3 }, v4 +; CHECK-NEXT: sshll.8h v16, v16, #0 +; CHECK-NEXT: tbl.16b v20, { v2, v3 }, v6 +; CHECK-NEXT: tbl.16b v21, { v1, v2 }, v7 +; CHECK-NEXT: sshll2.4s v22, v17, #0 +; CHECK-NEXT: sshll.4s v17, v17, #0 +; CHECK-NEXT: sshll2.4s v23, v16, #0 +; CHECK-NEXT: sshll.4s v16, v16, #0 +; CHECK-NEXT: and.16b v18, v18, v22 +; CHECK-NEXT: and.16b v17, v19, v17 +; CHECK-NEXT: stp q17, q18, [x1, #32] +; CHECK-NEXT: and.16b v17, v20, v23 +; CHECK-NEXT: and.16b v16, v21, v16 +; CHECK-NEXT: stp q16, q17, [x1], #64 +; CHECK-NEXT: b.ne LBB9_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh11, Lloh15 +; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh14 +; CHECK-NEXT: .loh AdrpLdr Lloh9, Lloh13 +; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh12 +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %src.gep = getelementptr i8, i8* %src, i64 %iv + %src.gep.cast = bitcast i8* %src.gep to <16 x i8>* + %load = load <16 x i8>, <16 x i8>* %src.gep.cast + %cmp = icmp sgt <16 x i8> %load, + %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> + %ext = bitcast <64 x i8> %ext.shuf to <16 x i32> + %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer + %dst.gep = getelementptr i32, i32* %dst, i64 %iv + %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>* + store <16 x i32> %sel, <16 x i32>* %dst.gep.cast + %iv.next = add nuw i64 %iv, 16 + %ec = icmp eq i64 %iv.next, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} -- 2.7.4