From afca54f0cfc3164e1ebbc7ddf27c8e95f0cd12b6 Mon Sep 17 00:00:00 2001 From: Biplob Mishra Date: Tue, 5 Apr 2022 12:06:17 +0100 Subject: [PATCH] [ARM][AArch64] Optimize pattern for converting a half word byte swap in a 64-bit input to a rev16 instruction. Differential Revision: https://reviews.llvm.org/D122643 --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 ++++ llvm/test/CodeGen/AArch64/arm64-rev.ll | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 64f227b..da272450 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2086,6 +2086,10 @@ def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; +def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)), + (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))), + (REV16Xr GPR64:$Rn)>; + //===----------------------------------------------------------------------===// // Bitfield immediate extraction instruction. //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll index fe05c30..3f85306 100644 --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -679,3 +679,26 @@ define void @test_bswap32_narrow(i32* %p0, i16* %p1) nounwind { ret void } declare i32 @gid_tbl_len(...) + +; 64-bit REV16 is *not* a swap then a 16-bit rotation: +; 01234567 ->(bswap) 76543210 ->(rotr) 10765432 +; 01234567 ->(rev16) 10325476 +; Optimize patterns where rev16 can be generated for a 64-bit input. +define i64 @test_rev16_x_hwbyteswaps(i64 %a) nounwind { +; CHECK-LABEL: test_rev16_x_hwbyteswaps: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev16 x0, x0 +; CHECK-NEXT: ret +; +; GISEL-LABEL: test_rev16_x_hwbyteswaps: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: rev16 x0, x0 +; GISEL-NEXT: ret +entry: + %0 = lshr i64 %a, 8 + %1 = and i64 %0, 71777214294589695 + %2 = shl i64 %a, 8 + %3 = and i64 %2, -71777214294589696 + %4 = or i64 %1, %3 + ret i64 %4 +} -- 2.7.4