From: Florian Hahn Date: Thu, 15 Sep 2022 13:01:26 +0000 (+0100) Subject: [AArch64] Add big-endian tests for zext-to-tbl.ll X-Git-Tag: upstream/17.0.6~33456 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8f19de848b968bfdd237bdb6ffb65e7412bb6a0c;p=platform%2Fupstream%2Fllvm.git [AArch64] Add big-endian tests for zext-to-tbl.ll Extra tests for D120571. --- diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll index 52fd237..0daf371 100644 --- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -o - %s | FileCheck %s - -target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" -target triple = "arm64-apple-ios" +; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s ; It's profitable to convert the zext to a shuffle, which in turn will be ; lowered to 4 tbl instructions. The masks are materialized outside the loop. @@ -26,6 +24,33 @@ define void @zext_v16i8_to_v16i32_in_loop(i8* %src, i32* %dst) { ; CHECK-NEXT: b.ne LBB0_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB0_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8 +; CHECK-BE-NEXT: add x10, x1, #32 +; CHECK-BE-NEXT: add x8, x8, #16 +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: st1 { v2.4s }, [x9] +; CHECK-BE-NEXT: add x9, x1, #16 +; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-BE-NEXT: st1 { v1.4s }, [x10] +; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-BE-NEXT: st1 { v2.4s }, [x1] +; CHECK-BE-NEXT: add x1, x1, #64 +; CHECK-BE-NEXT: st1 { v0.4s }, [x9] +; CHECK-BE-NEXT: b.ne .LBB0_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -74,6 +99,40 @@ define void @zext_v16i8_to_v16i32_in_loop_not_header(i8* %src, i32* %dst, i1 %c) ; CHECK-NEXT: b LBB1_1 ; CHECK-NEXT: LBB1_4: ; %exit ; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_not_header: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: b .LBB1_2 +; CHECK-BE-NEXT: .LBB1_1: // %loop.latch +; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1 +; CHECK-BE-NEXT: add x8, x8, #16 +; CHECK-BE-NEXT: add x1, x1, #64 +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: b.eq .LBB1_4 +; CHECK-BE-NEXT: .LBB1_2: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: tbz w2, #0, .LBB1_1 +; CHECK-BE-NEXT: // %bb.3: // %then +; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8 +; CHECK-BE-NEXT: add x10, x1, #32 +; CHECK-BE-NEXT: add x11, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-BE-NEXT: st1 { v2.4s }, [x9] +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: st1 { v1.4s }, [x10] +; CHECK-BE-NEXT: st1 { v3.4s }, [x11] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: b .LBB1_1 +; CHECK-BE-NEXT: .LBB1_4: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -115,6 +174,24 @@ define void @zext_v16i8_to_v16i32_no_loop(i8* %src, i32* %dst) { ; CHECK-NEXT: stp q1, q2, [x1, #32] ; CHECK-NEXT: stp q0, q3, [x1] ; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: zext_v16i8_to_v16i32_no_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: add x8, x1, #48 +; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: st1 { v2.4s }, [x8] +; CHECK-BE-NEXT: add x8, x1, #32 +; CHECK-BE-NEXT: st1 { v1.4s }, [x8] +; CHECK-BE-NEXT: add x8, x1, #16 +; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: st1 { v1.4s }, [x8] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret entry: %src.cast = bitcast i8* %src to <16 x i8>* %load = load <16 x i8>, <16 x i8>* %src.cast @@ -145,6 +222,33 @@ define void @zext_v16i8_to_v16i32_in_loop_optsize(i8* %src, i32* %dst) optsize { ; CHECK-NEXT: b.ne LBB3_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_optsize: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB3_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8 +; CHECK-BE-NEXT: add x10, x1, #32 +; CHECK-BE-NEXT: add x8, x8, #16 +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: st1 { v2.4s }, [x9] +; CHECK-BE-NEXT: add x9, x1, #16 +; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-BE-NEXT: st1 { v1.4s }, [x10] +; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-BE-NEXT: st1 { v2.4s }, [x1] +; CHECK-BE-NEXT: add x1, x1, #64 +; CHECK-BE-NEXT: st1 { v0.4s }, [x9] +; CHECK-BE-NEXT: b.ne .LBB3_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -186,6 +290,33 @@ define void @zext_v16i8_to_v16i32_in_loop_minsize(i8* %src, i32* %dst) minsize { ; CHECK-NEXT: b.ne LBB4_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_minsize: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB4_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8 +; CHECK-BE-NEXT: add x10, x1, #32 +; CHECK-BE-NEXT: add x8, x8, #16 +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: st1 { v2.4s }, [x9] +; CHECK-BE-NEXT: add x9, x1, #16 +; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-BE-NEXT: st1 { v1.4s }, [x10] +; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-BE-NEXT: st1 { v2.4s }, [x1] +; CHECK-BE-NEXT: add x1, x1, #64 +; CHECK-BE-NEXT: st1 { v0.4s }, [x9] +; CHECK-BE-NEXT: b.ne .LBB4_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -223,6 +354,25 @@ define void @zext_v16i8_to_v16i16_in_loop(i8* %src, i16* %dst) { ; CHECK-NEXT: b.ne LBB5_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: zext_v16i8_to_v16i16_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB5_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8 +; CHECK-BE-NEXT: add x8, x8, #16 +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #16 +; CHECK-BE-NEXT: ushll v1.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0 +; CHECK-BE-NEXT: st1 { v1.8h }, [x1] +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.8h }, [x9] +; CHECK-BE-NEXT: b.ne .LBB5_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -259,6 +409,26 @@ define void @zext_v8i8_to_v8i32_in_loop(i8* %src, i32* %dst) { ; CHECK-NEXT: b.ne LBB6_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: zext_v8i8_to_v8i32_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB6_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8 +; CHECK-BE-NEXT: add x8, x8, #16 +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: ld1 { v0.8b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #16 +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-BE-NEXT: st1 { v1.4s }, [x1] +; CHECK-BE-NEXT: add x1, x1, #64 +; CHECK-BE-NEXT: st1 { v0.4s }, [x9] +; CHECK-BE-NEXT: b.ne .LBB6_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -309,6 +479,49 @@ define void @zext_v16i8_to_v16i64_in_loop(i8* %src, i64* %dst) { ; CHECK-NEXT: b.ne LBB7_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: zext_v16i8_to_v16i64_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB7_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8 +; CHECK-BE-NEXT: add x10, x1, #96 +; CHECK-BE-NEXT: add x8, x8, #16 +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #112 +; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: ushll2 v3.2d, v2.4s, #0 +; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-BE-NEXT: st1 { v3.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #80 +; CHECK-BE-NEXT: ushll2 v3.2d, v1.4s, #0 +; CHECK-BE-NEXT: st1 { v2.2d }, [x10] +; CHECK-BE-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-BE-NEXT: add x10, x1, #48 +; CHECK-BE-NEXT: st1 { v3.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #64 +; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: ushll2 v4.2d, v2.4s, #0 +; CHECK-BE-NEXT: st1 { v1.2d }, [x9] +; CHECK-BE-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-BE-NEXT: add x9, x1, #16 +; CHECK-BE-NEXT: st1 { v4.2d }, [x10] +; CHECK-BE-NEXT: add x10, x1, #32 +; CHECK-BE-NEXT: st1 { v1.2d }, [x1] +; CHECK-BE-NEXT: add x1, x1, #128 +; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-BE-NEXT: st1 { v0.2d }, [x9] +; CHECK-BE-NEXT: st1 { v2.2d }, [x10] +; CHECK-BE-NEXT: b.ne .LBB7_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop