From 3dd319ecf3be64598ea84d1730033854cade7123 Mon Sep 17 00:00:00 2001 From: Weining Lu Date: Tue, 20 Jun 2023 21:50:54 +0800 Subject: [PATCH] [LoongArch] Optimize conditional selection of integer This patch optimizes code generation by leveraging the zeroing behavior of the `maskeqz`/`masknez` instructions. ``` int sel(int a, int b) { return (a < b) ? a : 0; } ``` ``` slt $a1,$a0,$a1 masknez $a2,$r0,$a1 maskeqz $a0,$a0,$a1 or $a0,$a0,$a2 ``` => ``` slt $a1,$a0,$a1 maskeqz $a0,$a0,$a1 ``` Reviewed By: SixWeining Differential Revision: https://reviews.llvm.org/D153193 --- llvm/lib/Target/LoongArch/LoongArchInstrInfo.td | 2 ++ .../CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll | 8 ------ .../LoongArch/ir-instruction/select-bare-int.ll | 32 ++++++++++++++++++++++ 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 9129193..2bdc291 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1201,6 +1201,8 @@ def : Pat<(setle GPR:$rj, GPR:$rk), (XORI (SLT GPR:$rk, GPR:$rj), 1)>; /// Select +def : Pat<(select GPR:$cond, GPR:$t, 0), (MASKEQZ GPR:$t, GPR:$cond)>; +def : Pat<(select GPR:$cond, 0, GPR:$f), (MASKNEZ GPR:$f, GPR:$cond)>; def : Pat<(select GPR:$cond, GPR:$t, GPR:$f), (OR (MASKEQZ GPR:$t, GPR:$cond), (MASKNEZ GPR:$f, GPR:$cond))>; diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll index 5b16604..6782334 100644 --- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll @@ -22,8 +22,6 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; LA64-NEXT: addi.d $a5, $a5, 1 ; LA64-NEXT: xori $a6, $a6, 1 ; LA64-NEXT: masknez $a5, $a5, $a6 -; LA64-NEXT: maskeqz $a6, $zero, $a6 -; LA64-NEXT: or $a5, $a6, $a5 ; LA64-NEXT: andi $a5, $a5, 255 ; LA64-NEXT: sll.w $a5, $a5, $a0 ; LA64-NEXT: and $a6, $a3, $a4 @@ -77,8 +75,6 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; LA64-NEXT: addi.d $a5, $a5, 1 ; LA64-NEXT: xori $a6, $a6, 1 ; LA64-NEXT: masknez $a5, $a5, $a6 -; LA64-NEXT: maskeqz $a6, $zero, $a6 -; LA64-NEXT: or $a5, $a6, $a5 ; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0 ; LA64-NEXT: sll.w $a5, $a5, $a0 ; LA64-NEXT: and $a6, $a3, $a4 @@ -123,8 +119,6 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; LA64-NEXT: xori $a1, $a1, 1 ; LA64-NEXT: addi.d $a4, $a3, 1 ; LA64-NEXT: masknez $a4, $a4, $a1 -; LA64-NEXT: maskeqz $a1, $zero, $a1 -; LA64-NEXT: or $a4, $a1, $a4 ; LA64-NEXT: .LBB2_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB2_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 @@ -164,8 +158,6 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; LA64-NEXT: xori $a2, $a2, 1 ; LA64-NEXT: addi.d $a4, $a3, 1 ; LA64-NEXT: masknez $a4, $a4, $a2 -; LA64-NEXT: maskeqz $a2, $zero, $a2 -; LA64-NEXT: or $a4, $a2, $a4 ; LA64-NEXT: .LBB3_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB3_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll index ddbc4ad..ad0a241 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll @@ -106,3 +106,35 @@ define i64 @bare_select_i64(i1 %a, i64 %b, i64 %c) { %res = select i1 %a, i64 %b, i64 %c ret i64 %res } + +define i16 @bare_select_zero_i16(i1 %a, i16 %b) { +; LA32-LABEL: bare_select_zero_i16: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 1 +; LA32-NEXT: masknez $a0, $a1, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: bare_select_zero_i16: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1 +; LA64-NEXT: masknez $a0, $a1, $a0 +; LA64-NEXT: ret + %res = select i1 %a, i16 0, i16 %b + ret i16 %res +} + +define i32 @bare_select_zero_i32(i1 %a, i32 %b) { +; LA32-LABEL: bare_select_zero_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 1 +; LA32-NEXT: maskeqz $a0, $a1, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: bare_select_zero_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: ret + %res = select i1 %a, i32 %b, i32 0 + ret i32 %res +} -- 2.7.4