From 0874281d6054d8f5645bb066271b6f73acde7e80 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 20 Mar 2021 15:09:15 -0700 Subject: [PATCH] [RISCV] Add Zba command lines to xaluo.ll. NFC Some of the patterns end up with 32 to 64 bit zero extends on RV64 which can be handled by zext.w. --- llvm/test/CodeGen/RISCV/xaluo.ll | 1461 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 1461 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index facc0f2..758cf4c 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv32 -mattr=+m -verify-machineinstrs | FileCheck %s --check-prefix=RV32 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m -verify-machineinstrs | FileCheck %s --check-prefix=RV64 +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+experimental-zba -verify-machineinstrs | FileCheck %s --check-prefix=RV32ZBA +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+experimental-zba -verify-machineinstrs | FileCheck %s --check-prefix=RV64ZBA ; ; Get the actual value of the overflow bit. @@ -25,6 +27,26 @@ define zeroext i1 @saddo1.i32(i32 %v1, i32 %v2, i32* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sw a3, 0(a2) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo1.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a3, a0, a1 +; RV32ZBA-NEXT: slt a0, a3, a0 +; RV32ZBA-NEXT: slti a1, a1, 0 +; RV32ZBA-NEXT: xor a0, a1, a0 +; RV32ZBA-NEXT: sw a3, 0(a2) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo1.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a1, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: add a3, a0, a1 +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: xor a0, a0, a3 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sw a3, 0(a2) +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -51,6 +73,23 @@ define zeroext i1 @saddo2.i32(i32 %v1, i32* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sw a2, 0(a1) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo2.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi a2, a0, 4 +; RV32ZBA-NEXT: slt a0, a2, a0 +; RV32ZBA-NEXT: sw a2, 0(a1) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo2.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: addi a2, a0, 4 +; RV64ZBA-NEXT: addiw a0, a0, 4 +; RV64ZBA-NEXT: xor a0, a0, a2 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sw a2, 0(a1) +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 4) %val = extractvalue {i32, i1} %t, 0 @@ -78,6 +117,24 @@ define zeroext i1 @saddo3.i32(i32 %v1, i32* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sw a2, 0(a1) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo3.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi a2, a0, -4 +; RV32ZBA-NEXT: slt a0, a2, a0 +; RV32ZBA-NEXT: xori a0, a0, 1 +; RV32ZBA-NEXT: sw a2, 0(a1) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo3.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: addi a2, a0, -4 +; RV64ZBA-NEXT: addiw a0, a0, -4 +; RV64ZBA-NEXT: xor a0, a0, a2 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sw a2, 0(a1) +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 -4) %val = extractvalue {i32, i1} %t, 0 @@ -108,6 +165,27 @@ define zeroext i1 @saddo4.i32(i32 %v1, i32* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sw a3, 0(a1) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo4.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: lui a2, 4096 +; RV32ZBA-NEXT: addi a2, a2, -1 +; RV32ZBA-NEXT: add a2, a0, a2 +; RV32ZBA-NEXT: slt a0, a2, a0 +; RV32ZBA-NEXT: sw a2, 0(a1) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo4.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: lui a2, 4096 +; RV64ZBA-NEXT: addiw a2, a2, -1 +; RV64ZBA-NEXT: add a3, a0, a2 +; RV64ZBA-NEXT: addw a0, a0, a2 +; RV64ZBA-NEXT: xor a0, a0, a3 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sw a3, 0(a1) +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215) %val = extractvalue {i32, i1} %t, 0 @@ -140,6 +218,30 @@ define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, i64* %res) { ; RV64-NEXT: xor a0, a1, a0 ; RV64-NEXT: sd a3, 0(a2) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo1.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a5, a1, a3 +; RV32ZBA-NEXT: add a2, a0, a2 +; RV32ZBA-NEXT: sltu a0, a2, a0 +; RV32ZBA-NEXT: add a5, a5, a0 +; RV32ZBA-NEXT: xor a0, a1, a5 +; RV32ZBA-NEXT: xor a1, a1, a3 +; RV32ZBA-NEXT: not a1, a1 +; RV32ZBA-NEXT: and a0, a1, a0 +; RV32ZBA-NEXT: slti a0, a0, 0 +; RV32ZBA-NEXT: sw a2, 0(a4) +; RV32ZBA-NEXT: sw a5, 4(a4) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo1.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: add a3, a0, a1 +; RV64ZBA-NEXT: slt a0, a3, a0 +; RV64ZBA-NEXT: slti a1, a1, 0 +; RV64ZBA-NEXT: xor a0, a1, a0 +; RV64ZBA-NEXT: sd a3, 0(a2) +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -168,6 +270,26 @@ define zeroext i1 @saddo2.i64(i64 %v1, i64* %res) { ; RV64-NEXT: slt a0, a2, a0 ; RV64-NEXT: sd a2, 0(a1) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo2.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi a3, a0, 4 +; RV32ZBA-NEXT: sltu a0, a3, a0 +; RV32ZBA-NEXT: add a4, a1, a0 +; RV32ZBA-NEXT: xor a0, a1, a4 +; RV32ZBA-NEXT: not a1, a1 +; RV32ZBA-NEXT: and a0, a1, a0 +; RV32ZBA-NEXT: slti a0, a0, 0 +; RV32ZBA-NEXT: sw a3, 0(a2) +; RV32ZBA-NEXT: sw a4, 4(a2) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo2.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addi a2, a0, 4 +; RV64ZBA-NEXT: slt a0, a2, a0 +; RV64ZBA-NEXT: sd a2, 0(a1) +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 4) %val = extractvalue {i64, i1} %t, 0 @@ -197,6 +319,27 @@ define zeroext i1 @saddo3.i64(i64 %v1, i64* %res) { ; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: sd a2, 0(a1) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo3.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi a3, a0, -4 +; RV32ZBA-NEXT: sltu a0, a3, a0 +; RV32ZBA-NEXT: add a0, a1, a0 +; RV32ZBA-NEXT: addi a4, a0, -1 +; RV32ZBA-NEXT: xor a0, a1, a4 +; RV32ZBA-NEXT: and a0, a1, a0 +; RV32ZBA-NEXT: slti a0, a0, 0 +; RV32ZBA-NEXT: sw a3, 0(a2) +; RV32ZBA-NEXT: sw a4, 4(a2) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo3.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addi a2, a0, -4 +; RV64ZBA-NEXT: slt a0, a2, a0 +; RV64ZBA-NEXT: xori a0, a0, 1 +; RV64ZBA-NEXT: sd a2, 0(a1) +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -4) %val = extractvalue {i64, i1} %t, 0 @@ -222,6 +365,23 @@ define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) { ; RV64-NEXT: sw a0, 0(a2) ; RV64-NEXT: mv a0, a3 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a1, a0, a1 +; RV32ZBA-NEXT: sltu a0, a1, a0 +; RV32ZBA-NEXT: sw a1, 0(a2) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addw a3, a0, a1 +; RV64ZBA-NEXT: sext.w a4, a0 +; RV64ZBA-NEXT: sltu a3, a3, a4 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sw a0, 0(a2) +; RV64ZBA-NEXT: mv a0, a3 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -251,6 +411,27 @@ define zeroext i1 @uaddo.i64(i64 %v1, i64 %v2, i64* %res) { ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: sd a1, 0(a2) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a3, a1, a3 +; RV32ZBA-NEXT: add a2, a0, a2 +; RV32ZBA-NEXT: sltu a0, a2, a0 +; RV32ZBA-NEXT: add a3, a3, a0 +; RV32ZBA-NEXT: beq a3, a1, .LBB8_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: sltu a0, a3, a1 +; RV32ZBA-NEXT: .LBB8_2: # %entry +; RV32ZBA-NEXT: sw a2, 0(a4) +; RV32ZBA-NEXT: sw a3, 4(a4) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: add a1, a0, a1 +; RV64ZBA-NEXT: sltu a0, a1, a0 +; RV64ZBA-NEXT: sd a1, 0(a2) +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -279,6 +460,26 @@ define zeroext i1 @ssubo1.i32(i32 %v1, i32 %v2, i32* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sw a3, 0(a2) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: ssubo1.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sgtz a3, a1 +; RV32ZBA-NEXT: sub a1, a0, a1 +; RV32ZBA-NEXT: slt a0, a1, a0 +; RV32ZBA-NEXT: xor a0, a3, a0 +; RV32ZBA-NEXT: sw a1, 0(a2) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: ssubo1.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a1, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: sub a3, a0, a1 +; RV64ZBA-NEXT: subw a0, a0, a1 +; RV64ZBA-NEXT: xor a0, a0, a3 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sw a3, 0(a2) +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -304,6 +505,23 @@ define zeroext i1 @ssubo2.i32(i32 %v1, i32* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sw a2, 0(a1) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: ssubo2.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi a2, a0, 4 +; RV32ZBA-NEXT: slt a0, a2, a0 +; RV32ZBA-NEXT: sw a2, 0(a1) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: ssubo2.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: addi a2, a0, 4 +; RV64ZBA-NEXT: addiw a0, a0, 4 +; RV64ZBA-NEXT: xor a0, a0, a2 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sw a2, 0(a1) +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 -4) %val = extractvalue {i32, i1} %t, 0 @@ -336,6 +554,30 @@ define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) { ; RV64-NEXT: xor a0, a3, a0 ; RV64-NEXT: sd a1, 0(a2) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: ssubo.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sltu a6, a0, a2 +; RV32ZBA-NEXT: sub a5, a1, a3 +; RV32ZBA-NEXT: sub a5, a5, a6 +; RV32ZBA-NEXT: xor a6, a1, a5 +; RV32ZBA-NEXT: xor a1, a1, a3 +; RV32ZBA-NEXT: and a1, a1, a6 +; RV32ZBA-NEXT: slti a1, a1, 0 +; RV32ZBA-NEXT: sub a0, a0, a2 +; RV32ZBA-NEXT: sw a0, 0(a4) +; RV32ZBA-NEXT: sw a5, 4(a4) +; RV32ZBA-NEXT: mv a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: ssubo.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sgtz a3, a1 +; RV64ZBA-NEXT: sub a1, a0, a1 +; RV64ZBA-NEXT: slt a0, a1, a0 +; RV64ZBA-NEXT: xor a0, a3, a0 +; RV64ZBA-NEXT: sd a1, 0(a2) +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -361,6 +603,23 @@ define zeroext i1 @usubo.i32(i32 %v1, i32 %v2, i32* %res) { ; RV64-NEXT: sw a0, 0(a2) ; RV64-NEXT: mv a0, a3 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: usubo.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sub a1, a0, a1 +; RV32ZBA-NEXT: sltu a0, a0, a1 +; RV32ZBA-NEXT: sw a1, 0(a2) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: usubo.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: subw a3, a0, a1 +; RV64ZBA-NEXT: sext.w a4, a0 +; RV64ZBA-NEXT: sltu a3, a4, a3 +; RV64ZBA-NEXT: sub a0, a0, a1 +; RV64ZBA-NEXT: sw a0, 0(a2) +; RV64ZBA-NEXT: mv a0, a3 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -393,6 +652,30 @@ define zeroext i1 @usubo.i64(i64 %v1, i64 %v2, i64* %res) { ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: sd a1, 0(a2) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: usubo.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sltu a5, a0, a2 +; RV32ZBA-NEXT: sub a3, a1, a3 +; RV32ZBA-NEXT: sub a3, a3, a5 +; RV32ZBA-NEXT: sub a2, a0, a2 +; RV32ZBA-NEXT: beq a3, a1, .LBB13_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: sltu a0, a1, a3 +; RV32ZBA-NEXT: j .LBB13_3 +; RV32ZBA-NEXT: .LBB13_2: +; RV32ZBA-NEXT: sltu a0, a0, a2 +; RV32ZBA-NEXT: .LBB13_3: # %entry +; RV32ZBA-NEXT: sw a2, 0(a4) +; RV32ZBA-NEXT: sw a3, 4(a4) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: usubo.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sub a1, a0, a1 +; RV64ZBA-NEXT: sltu a0, a0, a1 +; RV64ZBA-NEXT: sd a1, 0(a2) +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -422,6 +705,27 @@ define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sw a3, 0(a2) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: smulo.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mulh a3, a0, a1 +; RV32ZBA-NEXT: mul a1, a0, a1 +; RV32ZBA-NEXT: srai a0, a1, 31 +; RV32ZBA-NEXT: xor a0, a3, a0 +; RV32ZBA-NEXT: snez a0, a0 +; RV32ZBA-NEXT: sw a1, 0(a2) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: smulo.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a1, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: mul a3, a0, a1 +; RV64ZBA-NEXT: mulw a0, a0, a1 +; RV64ZBA-NEXT: xor a0, a0, a3 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sw a3, 0(a2) +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -452,6 +756,28 @@ define zeroext i1 @smulo2.i32(i32 %v1, i32* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sw a3, 0(a1) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: smulo2.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi a2, zero, 13 +; RV32ZBA-NEXT: mulh a3, a0, a2 +; RV32ZBA-NEXT: mul a2, a0, a2 +; RV32ZBA-NEXT: srai a0, a2, 31 +; RV32ZBA-NEXT: xor a0, a3, a0 +; RV32ZBA-NEXT: snez a0, a0 +; RV32ZBA-NEXT: sw a2, 0(a1) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: smulo2.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: addi a2, zero, 13 +; RV64ZBA-NEXT: mul a3, a0, a2 +; RV64ZBA-NEXT: mulw a0, a0, a2 +; RV64ZBA-NEXT: xor a0, a0, a3 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sw a3, 0(a1) +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 13) %val = extractvalue {i32, i1} %t, 0 @@ -492,6 +818,38 @@ define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sd a1, 0(a2) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: smulo.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi sp, sp, -16 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 +; RV32ZBA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: .cfi_offset s0, -8 +; RV32ZBA-NEXT: mv s0, a4 +; RV32ZBA-NEXT: sw zero, 4(sp) +; RV32ZBA-NEXT: addi a4, sp, 4 +; RV32ZBA-NEXT: call __mulodi4@plt +; RV32ZBA-NEXT: lw a2, 4(sp) +; RV32ZBA-NEXT: snez a2, a2 +; RV32ZBA-NEXT: sw a1, 4(s0) +; RV32ZBA-NEXT: sw a0, 0(s0) +; RV32ZBA-NEXT: mv a0, a2 +; RV32ZBA-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 16 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: smulo.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: mulh a3, a0, a1 +; RV64ZBA-NEXT: mul a1, a0, a1 +; RV64ZBA-NEXT: srai a0, a1, 63 +; RV64ZBA-NEXT: xor a0, a3, a0 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sd a1, 0(a2) +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -535,6 +893,41 @@ define zeroext i1 @smulo2.i64(i64 %v1, i64* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sd a2, 0(a1) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: smulo2.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi sp, sp, -16 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 +; RV32ZBA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: .cfi_offset s0, -8 +; RV32ZBA-NEXT: mv s0, a2 +; RV32ZBA-NEXT: sw zero, 4(sp) +; RV32ZBA-NEXT: addi a2, zero, 13 +; RV32ZBA-NEXT: addi a4, sp, 4 +; RV32ZBA-NEXT: mv a3, zero +; RV32ZBA-NEXT: call __mulodi4@plt +; RV32ZBA-NEXT: lw a2, 4(sp) +; RV32ZBA-NEXT: snez a2, a2 +; RV32ZBA-NEXT: sw a1, 4(s0) +; RV32ZBA-NEXT: sw a0, 0(s0) +; RV32ZBA-NEXT: mv a0, a2 +; RV32ZBA-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 16 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: smulo2.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addi a2, zero, 13 +; RV64ZBA-NEXT: mulh a3, a0, a2 +; RV64ZBA-NEXT: mul a2, a0, a2 +; RV64ZBA-NEXT: srai a0, a2, 63 +; RV64ZBA-NEXT: xor a0, a3, a0 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sd a2, 0(a1) +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 13) %val = extractvalue {i64, i1} %t, 0 @@ -562,6 +955,25 @@ define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sw a1, 0(a2) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: umulo.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mulhu a3, a0, a1 +; RV32ZBA-NEXT: snez a3, a3 +; RV32ZBA-NEXT: mul a0, a0, a1 +; RV32ZBA-NEXT: sw a0, 0(a2) +; RV32ZBA-NEXT: mv a0, a3 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: umulo.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: slli a1, a1, 32 +; RV64ZBA-NEXT: slli a0, a0, 32 +; RV64ZBA-NEXT: mulhu a1, a0, a1 +; RV64ZBA-NEXT: srli a0, a1, 32 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sw a1, 0(a2) +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -591,6 +1003,26 @@ define zeroext i1 @umulo2.i32(i32 %v1, i32* %res) { ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sw a2, 0(a1) ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: umulo2.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi a3, zero, 13 +; RV32ZBA-NEXT: mulhu a2, a0, a3 +; RV32ZBA-NEXT: snez a2, a2 +; RV32ZBA-NEXT: mul a0, a0, a3 +; RV32ZBA-NEXT: sw a0, 0(a1) +; RV32ZBA-NEXT: mv a0, a2 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: umulo2.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: addi a2, zero, 13 +; RV64ZBA-NEXT: mul a2, a0, a2 +; RV64ZBA-NEXT: srli a0, a2, 32 +; RV64ZBA-NEXT: snez a0, a0 +; RV64ZBA-NEXT: sw a2, 0(a1) +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 13) %val = extractvalue {i32, i1} %t, 0 @@ -632,6 +1064,39 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) { ; RV64-NEXT: sd a0, 0(a2) ; RV64-NEXT: mv a0, a3 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: umulo.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mul a6, a3, a0 +; RV32ZBA-NEXT: mul a5, a1, a2 +; RV32ZBA-NEXT: add a6, a5, a6 +; RV32ZBA-NEXT: mulhu a5, a0, a2 +; RV32ZBA-NEXT: add a6, a5, a6 +; RV32ZBA-NEXT: sltu a7, a6, a5 +; RV32ZBA-NEXT: snez t0, a3 +; RV32ZBA-NEXT: snez a5, a1 +; RV32ZBA-NEXT: and a5, a5, t0 +; RV32ZBA-NEXT: mulhu a1, a1, a2 +; RV32ZBA-NEXT: snez a1, a1 +; RV32ZBA-NEXT: or a1, a5, a1 +; RV32ZBA-NEXT: mulhu a3, a3, a0 +; RV32ZBA-NEXT: snez a3, a3 +; RV32ZBA-NEXT: or a1, a1, a3 +; RV32ZBA-NEXT: or a1, a1, a7 +; RV32ZBA-NEXT: mul a0, a0, a2 +; RV32ZBA-NEXT: sw a0, 0(a4) +; RV32ZBA-NEXT: sw a6, 4(a4) +; RV32ZBA-NEXT: mv a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: umulo.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: mulhu a3, a0, a1 +; RV64ZBA-NEXT: snez a3, a3 +; RV64ZBA-NEXT: mul a0, a0, a1 +; RV64ZBA-NEXT: sd a0, 0(a2) +; RV64ZBA-NEXT: mv a0, a3 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -666,6 +1131,32 @@ define zeroext i1 @umulo2.i64(i64 %v1, i64* %res) { ; RV64-NEXT: sd a0, 0(a1) ; RV64-NEXT: mv a0, a2 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: umulo2.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi a3, zero, 13 +; RV32ZBA-NEXT: mul a4, a1, a3 +; RV32ZBA-NEXT: mulhu a5, a0, a3 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: sltu a5, a4, a5 +; RV32ZBA-NEXT: mulhu a1, a1, a3 +; RV32ZBA-NEXT: snez a1, a1 +; RV32ZBA-NEXT: or a1, a1, a5 +; RV32ZBA-NEXT: mul a0, a0, a3 +; RV32ZBA-NEXT: sw a0, 0(a2) +; RV32ZBA-NEXT: sw a4, 4(a2) +; RV32ZBA-NEXT: mv a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: umulo2.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addi a3, zero, 13 +; RV64ZBA-NEXT: mulhu a2, a0, a3 +; RV64ZBA-NEXT: snez a2, a2 +; RV64ZBA-NEXT: mul a0, a0, a3 +; RV64ZBA-NEXT: sd a0, 0(a1) +; RV64ZBA-NEXT: mv a0, a2 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 13) %val = extractvalue {i64, i1} %t, 0 @@ -701,6 +1192,29 @@ define i32 @saddo.select.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB22_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo.select.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a2, a0, a1 +; RV32ZBA-NEXT: slt a2, a2, a0 +; RV32ZBA-NEXT: slti a3, a1, 0 +; RV32ZBA-NEXT: bne a3, a2, .LBB22_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: mv a0, a1 +; RV32ZBA-NEXT: .LBB22_2: # %entry +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo.select.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a2, a1 +; RV64ZBA-NEXT: sext.w a3, a0 +; RV64ZBA-NEXT: add a4, a3, a2 +; RV64ZBA-NEXT: addw a2, a3, a2 +; RV64ZBA-NEXT: bne a2, a4, .LBB22_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB22_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -727,6 +1241,25 @@ define i1 @saddo.not.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: xor a0, a0, a2 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo.not.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a2, a0, a1 +; RV32ZBA-NEXT: slt a0, a2, a0 +; RV32ZBA-NEXT: slti a1, a1, 0 +; RV32ZBA-NEXT: xor a0, a1, a0 +; RV32ZBA-NEXT: xori a0, a0, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo.not.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a1, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: add a2, a0, a1 +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: xor a0, a0, a2 +; RV64ZBA-NEXT: seqz a0, a0 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -762,6 +1295,34 @@ define i64 @saddo.select.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB24_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo.select.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a4, a1, a3 +; RV32ZBA-NEXT: add a5, a0, a2 +; RV32ZBA-NEXT: sltu a5, a5, a0 +; RV32ZBA-NEXT: add a4, a4, a5 +; RV32ZBA-NEXT: xor a4, a1, a4 +; RV32ZBA-NEXT: xor a5, a1, a3 +; RV32ZBA-NEXT: not a5, a5 +; RV32ZBA-NEXT: and a4, a5, a4 +; RV32ZBA-NEXT: bltz a4, .LBB24_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: mv a0, a2 +; RV32ZBA-NEXT: mv a1, a3 +; RV32ZBA-NEXT: .LBB24_2: # %entry +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo.select.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: add a2, a0, a1 +; RV64ZBA-NEXT: slt a2, a2, a0 +; RV64ZBA-NEXT: slti a3, a1, 0 +; RV64ZBA-NEXT: bne a3, a2, .LBB24_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB24_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -792,6 +1353,29 @@ define i1 @saddo.not.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: xor a0, a1, a0 ; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo.not.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a4, a1, a3 +; RV32ZBA-NEXT: add a2, a0, a2 +; RV32ZBA-NEXT: sltu a0, a2, a0 +; RV32ZBA-NEXT: add a0, a4, a0 +; RV32ZBA-NEXT: xor a0, a1, a0 +; RV32ZBA-NEXT: xor a1, a1, a3 +; RV32ZBA-NEXT: not a1, a1 +; RV32ZBA-NEXT: and a0, a1, a0 +; RV32ZBA-NEXT: addi a1, zero, -1 +; RV32ZBA-NEXT: slt a0, a1, a0 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo.not.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: add a2, a0, a1 +; RV64ZBA-NEXT: slt a0, a2, a0 +; RV64ZBA-NEXT: slti a1, a1, 0 +; RV64ZBA-NEXT: xor a0, a1, a0 +; RV64ZBA-NEXT: xori a0, a0, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -818,6 +1402,25 @@ define i32 @uaddo.select.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB26_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.select.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a2, a0, a1 +; RV32ZBA-NEXT: bltu a2, a0, .LBB26_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: mv a0, a1 +; RV32ZBA-NEXT: .LBB26_2: # %entry +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.select.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addw a2, a0, a1 +; RV64ZBA-NEXT: sext.w a3, a0 +; RV64ZBA-NEXT: bltu a2, a3, .LBB26_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB26_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -840,6 +1443,21 @@ define i1 @uaddo.not.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.not.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a1, a0, a1 +; RV32ZBA-NEXT: sltu a0, a1, a0 +; RV32ZBA-NEXT: xori a0, a0, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.not.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addw a1, a0, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: sltu a0, a1, a0 +; RV64ZBA-NEXT: xori a0, a0, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -875,6 +1493,34 @@ define i64 @uaddo.select.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB28_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.select.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a5, a1, a3 +; RV32ZBA-NEXT: add a4, a0, a2 +; RV32ZBA-NEXT: sltu a4, a4, a0 +; RV32ZBA-NEXT: add a5, a5, a4 +; RV32ZBA-NEXT: bne a5, a1, .LBB28_3 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: beqz a4, .LBB28_4 +; RV32ZBA-NEXT: .LBB28_2: # %entry +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB28_3: # %entry +; RV32ZBA-NEXT: sltu a4, a5, a1 +; RV32ZBA-NEXT: bnez a4, .LBB28_2 +; RV32ZBA-NEXT: .LBB28_4: # %entry +; RV32ZBA-NEXT: mv a0, a2 +; RV32ZBA-NEXT: mv a1, a3 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.select.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: add a2, a0, a1 +; RV64ZBA-NEXT: bltu a2, a0, .LBB28_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB28_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -902,6 +1548,26 @@ define i1 @uaddo.not.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.not.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a3, a1, a3 +; RV32ZBA-NEXT: add a2, a0, a2 +; RV32ZBA-NEXT: sltu a0, a2, a0 +; RV32ZBA-NEXT: add a2, a3, a0 +; RV32ZBA-NEXT: beq a2, a1, .LBB29_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: sltu a0, a2, a1 +; RV32ZBA-NEXT: .LBB29_2: # %entry +; RV32ZBA-NEXT: xori a0, a0, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.not.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: add a1, a0, a1 +; RV64ZBA-NEXT: sltu a0, a1, a0 +; RV64ZBA-NEXT: xori a0, a0, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -932,6 +1598,29 @@ define i32 @ssubo.select.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB30_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: ssubo.select.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sgtz a2, a1 +; RV32ZBA-NEXT: sub a3, a0, a1 +; RV32ZBA-NEXT: slt a3, a3, a0 +; RV32ZBA-NEXT: bne a2, a3, .LBB30_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: mv a0, a1 +; RV32ZBA-NEXT: .LBB30_2: # %entry +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: ssubo.select.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a2, a1 +; RV64ZBA-NEXT: sext.w a3, a0 +; RV64ZBA-NEXT: sub a4, a3, a2 +; RV64ZBA-NEXT: subw a2, a3, a2 +; RV64ZBA-NEXT: bne a2, a4, .LBB30_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB30_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -958,6 +1647,25 @@ define i1 @ssubo.not.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: xor a0, a0, a2 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: ssubo.not.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sgtz a2, a1 +; RV32ZBA-NEXT: sub a1, a0, a1 +; RV32ZBA-NEXT: slt a0, a1, a0 +; RV32ZBA-NEXT: xor a0, a2, a0 +; RV32ZBA-NEXT: xori a0, a0, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: ssubo.not.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a1, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: sub a2, a0, a1 +; RV64ZBA-NEXT: subw a0, a0, a1 +; RV64ZBA-NEXT: xor a0, a0, a2 +; RV64ZBA-NEXT: seqz a0, a0 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -991,6 +1699,32 @@ define i64 @ssubo.select.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB32_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: ssubo.select.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sltu a4, a0, a2 +; RV32ZBA-NEXT: sub a5, a1, a3 +; RV32ZBA-NEXT: sub a4, a5, a4 +; RV32ZBA-NEXT: xor a4, a1, a4 +; RV32ZBA-NEXT: xor a5, a1, a3 +; RV32ZBA-NEXT: and a4, a5, a4 +; RV32ZBA-NEXT: bltz a4, .LBB32_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: mv a0, a2 +; RV32ZBA-NEXT: mv a1, a3 +; RV32ZBA-NEXT: .LBB32_2: # %entry +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: ssubo.select.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sgtz a2, a1 +; RV64ZBA-NEXT: sub a3, a0, a1 +; RV64ZBA-NEXT: slt a3, a3, a0 +; RV64ZBA-NEXT: bne a2, a3, .LBB32_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB32_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1019,6 +1753,27 @@ define i1 @ssub.not.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: xor a0, a2, a0 ; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: ssub.not.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sltu a0, a0, a2 +; RV32ZBA-NEXT: sub a2, a1, a3 +; RV32ZBA-NEXT: sub a0, a2, a0 +; RV32ZBA-NEXT: xor a0, a1, a0 +; RV32ZBA-NEXT: xor a1, a1, a3 +; RV32ZBA-NEXT: and a0, a1, a0 +; RV32ZBA-NEXT: addi a1, zero, -1 +; RV32ZBA-NEXT: slt a0, a1, a0 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: ssub.not.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sgtz a2, a1 +; RV64ZBA-NEXT: sub a1, a0, a1 +; RV64ZBA-NEXT: slt a0, a1, a0 +; RV64ZBA-NEXT: xor a0, a2, a0 +; RV64ZBA-NEXT: xori a0, a0, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1045,6 +1800,25 @@ define i32 @usubo.select.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB34_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: usubo.select.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sub a2, a0, a1 +; RV32ZBA-NEXT: bltu a0, a2, .LBB34_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: mv a0, a1 +; RV32ZBA-NEXT: .LBB34_2: # %entry +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: usubo.select.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: subw a2, a0, a1 +; RV64ZBA-NEXT: sext.w a3, a0 +; RV64ZBA-NEXT: bltu a3, a2, .LBB34_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB34_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -1067,6 +1841,21 @@ define i1 @usubo.not.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: usubo.not.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sub a1, a0, a1 +; RV32ZBA-NEXT: sltu a0, a0, a1 +; RV32ZBA-NEXT: xori a0, a0, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: usubo.not.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: subw a1, a0, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: sltu a0, a0, a1 +; RV64ZBA-NEXT: xori a0, a0, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -1103,6 +1892,35 @@ define i64 @usubo.select.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB36_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: usubo.select.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sltu a4, a0, a2 +; RV32ZBA-NEXT: sub a5, a1, a3 +; RV32ZBA-NEXT: sub a4, a5, a4 +; RV32ZBA-NEXT: beq a4, a1, .LBB36_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: sltu a4, a1, a4 +; RV32ZBA-NEXT: beqz a4, .LBB36_3 +; RV32ZBA-NEXT: j .LBB36_4 +; RV32ZBA-NEXT: .LBB36_2: +; RV32ZBA-NEXT: sub a4, a0, a2 +; RV32ZBA-NEXT: sltu a4, a0, a4 +; RV32ZBA-NEXT: bnez a4, .LBB36_4 +; RV32ZBA-NEXT: .LBB36_3: # %entry +; RV32ZBA-NEXT: mv a0, a2 +; RV32ZBA-NEXT: mv a1, a3 +; RV32ZBA-NEXT: .LBB36_4: # %entry +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: usubo.select.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sub a2, a0, a1 +; RV64ZBA-NEXT: bltu a0, a2, .LBB36_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB36_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1133,6 +1951,29 @@ define i1 @usubo.not.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: usubo.not.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sltu a4, a0, a2 +; RV32ZBA-NEXT: sub a3, a1, a3 +; RV32ZBA-NEXT: sub a3, a3, a4 +; RV32ZBA-NEXT: beq a3, a1, .LBB37_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: sltu a0, a1, a3 +; RV32ZBA-NEXT: xori a0, a0, 1 +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB37_2: +; RV32ZBA-NEXT: sub a1, a0, a2 +; RV32ZBA-NEXT: sltu a0, a0, a1 +; RV32ZBA-NEXT: xori a0, a0, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: usubo.not.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sub a1, a0, a1 +; RV64ZBA-NEXT: sltu a0, a0, a1 +; RV64ZBA-NEXT: xori a0, a0, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1163,6 +2004,29 @@ define i32 @smulo.select.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB38_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: smulo.select.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mulh a2, a0, a1 +; RV32ZBA-NEXT: mul a3, a0, a1 +; RV32ZBA-NEXT: srai a3, a3, 31 +; RV32ZBA-NEXT: bne a2, a3, .LBB38_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: mv a0, a1 +; RV32ZBA-NEXT: .LBB38_2: # %entry +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: smulo.select.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a2, a1 +; RV64ZBA-NEXT: sext.w a3, a0 +; RV64ZBA-NEXT: mul a4, a3, a2 +; RV64ZBA-NEXT: mulw a2, a3, a2 +; RV64ZBA-NEXT: bne a2, a4, .LBB38_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB38_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -1189,6 +2053,25 @@ define i1 @smulo.not.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: xor a0, a0, a2 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: smulo.not.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mulh a2, a0, a1 +; RV32ZBA-NEXT: mul a0, a0, a1 +; RV32ZBA-NEXT: srai a0, a0, 31 +; RV32ZBA-NEXT: xor a0, a2, a0 +; RV32ZBA-NEXT: seqz a0, a0 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: smulo.not.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a1, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: mul a2, a0, a1 +; RV64ZBA-NEXT: mulw a0, a0, a1 +; RV64ZBA-NEXT: xor a0, a0, a2 +; RV64ZBA-NEXT: seqz a0, a0 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -1244,6 +2127,54 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB40_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: smulo.select.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi sp, sp, -32 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 32 +; RV32ZBA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: .cfi_offset s0, -8 +; RV32ZBA-NEXT: .cfi_offset s1, -12 +; RV32ZBA-NEXT: .cfi_offset s2, -16 +; RV32ZBA-NEXT: .cfi_offset s3, -20 +; RV32ZBA-NEXT: mv s2, a3 +; RV32ZBA-NEXT: mv s3, a2 +; RV32ZBA-NEXT: mv s0, a1 +; RV32ZBA-NEXT: mv s1, a0 +; RV32ZBA-NEXT: sw zero, 8(sp) +; RV32ZBA-NEXT: addi a4, sp, 8 +; RV32ZBA-NEXT: call __mulodi4@plt +; RV32ZBA-NEXT: lw a0, 8(sp) +; RV32ZBA-NEXT: bnez a0, .LBB40_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: mv s1, s3 +; RV32ZBA-NEXT: mv s0, s2 +; RV32ZBA-NEXT: .LBB40_2: # %entry +; RV32ZBA-NEXT: mv a0, s1 +; RV32ZBA-NEXT: mv a1, s0 +; RV32ZBA-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 32 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: smulo.select.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: mulh a2, a0, a1 +; RV64ZBA-NEXT: mul a3, a0, a1 +; RV64ZBA-NEXT: srai a3, a3, 63 +; RV64ZBA-NEXT: bne a2, a3, .LBB40_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB40_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1275,6 +2206,30 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: xor a0, a2, a0 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: smulo.not.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi sp, sp, -16 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 +; RV32ZBA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: sw zero, 8(sp) +; RV32ZBA-NEXT: addi a4, sp, 8 +; RV32ZBA-NEXT: call __mulodi4@plt +; RV32ZBA-NEXT: lw a0, 8(sp) +; RV32ZBA-NEXT: seqz a0, a0 +; RV32ZBA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 16 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: smulo.not.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: mulh a2, a0, a1 +; RV64ZBA-NEXT: mul a0, a0, a1 +; RV64ZBA-NEXT: srai a0, a0, 63 +; RV64ZBA-NEXT: xor a0, a2, a0 +; RV64ZBA-NEXT: seqz a0, a0 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1303,6 +2258,27 @@ define i32 @umulo.select.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB42_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: umulo.select.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mulhu a2, a0, a1 +; RV32ZBA-NEXT: bnez a2, .LBB42_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: mv a0, a1 +; RV32ZBA-NEXT: .LBB42_2: # %entry +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: umulo.select.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: slli a2, a1, 32 +; RV64ZBA-NEXT: slli a3, a0, 32 +; RV64ZBA-NEXT: mulhu a2, a3, a2 +; RV64ZBA-NEXT: srli a2, a2, 32 +; RV64ZBA-NEXT: bnez a2, .LBB42_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB42_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -1325,6 +2301,21 @@ define i1 @umulo.not.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: umulo.not.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mulhu a0, a0, a1 +; RV32ZBA-NEXT: seqz a0, a0 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: umulo.not.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: slli a1, a1, 32 +; RV64ZBA-NEXT: slli a0, a0, 32 +; RV64ZBA-NEXT: mulhu a0, a0, a1 +; RV64ZBA-NEXT: srli a0, a0, 32 +; RV64ZBA-NEXT: seqz a0, a0 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) %obit = extractvalue {i32, i1} %t, 1 @@ -1366,6 +2357,40 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB44_2: # %entry ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: umulo.select.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mul a4, a3, a0 +; RV32ZBA-NEXT: mul a5, a1, a2 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: mulhu a5, a0, a2 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: snez a5, a3 +; RV32ZBA-NEXT: snez a4, a1 +; RV32ZBA-NEXT: and a4, a4, a5 +; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: snez a5, a5 +; RV32ZBA-NEXT: or a4, a4, a5 +; RV32ZBA-NEXT: mulhu a5, a3, a0 +; RV32ZBA-NEXT: snez a5, a5 +; RV32ZBA-NEXT: or a4, a4, a5 +; RV32ZBA-NEXT: or a4, a4, a6 +; RV32ZBA-NEXT: bnez a4, .LBB44_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: mv a0, a2 +; RV32ZBA-NEXT: mv a1, a3 +; RV32ZBA-NEXT: .LBB44_2: # %entry +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: umulo.select.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: mulhu a2, a0, a1 +; RV64ZBA-NEXT: bnez a2, .LBB44_2 +; RV64ZBA-NEXT: # %bb.1: # %entry +; RV64ZBA-NEXT: mv a0, a1 +; RV64ZBA-NEXT: .LBB44_2: # %entry +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1400,6 +2425,33 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: mulhu a0, a0, a1 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: umulo.not.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mul a4, a3, a0 +; RV32ZBA-NEXT: mul a5, a1, a2 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: mulhu a5, a0, a2 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: snez a5, a3 +; RV32ZBA-NEXT: snez a4, a1 +; RV32ZBA-NEXT: and a4, a4, a5 +; RV32ZBA-NEXT: mulhu a1, a1, a2 +; RV32ZBA-NEXT: snez a1, a1 +; RV32ZBA-NEXT: or a1, a4, a1 +; RV32ZBA-NEXT: mulhu a0, a3, a0 +; RV32ZBA-NEXT: snez a0, a0 +; RV32ZBA-NEXT: or a0, a1, a0 +; RV32ZBA-NEXT: or a0, a0, a6 +; RV32ZBA-NEXT: xori a0, a0, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: umulo.not.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: mulhu a0, a0, a1 +; RV64ZBA-NEXT: seqz a0, a0 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1438,6 +2490,33 @@ define zeroext i1 @saddo.br.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: .LBB46_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo.br.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a2, a0, a1 +; RV32ZBA-NEXT: slt a0, a2, a0 +; RV32ZBA-NEXT: slti a1, a1, 0 +; RV32ZBA-NEXT: beq a1, a0, .LBB46_2 +; RV32ZBA-NEXT: # %bb.1: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB46_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo.br.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a1, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: add a2, a0, a1 +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: beq a0, a2, .LBB46_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB46_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -1482,6 +2561,37 @@ define zeroext i1 @saddo.br.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: .LBB47_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: saddo.br.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a4, a1, a3 +; RV32ZBA-NEXT: add a2, a0, a2 +; RV32ZBA-NEXT: sltu a0, a2, a0 +; RV32ZBA-NEXT: add a0, a4, a0 +; RV32ZBA-NEXT: xor a0, a1, a0 +; RV32ZBA-NEXT: xor a1, a1, a3 +; RV32ZBA-NEXT: not a1, a1 +; RV32ZBA-NEXT: and a0, a1, a0 +; RV32ZBA-NEXT: bgez a0, .LBB47_2 +; RV32ZBA-NEXT: # %bb.1: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB47_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: saddo.br.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: add a2, a0, a1 +; RV64ZBA-NEXT: slt a0, a2, a0 +; RV64ZBA-NEXT: slti a1, a1, 0 +; RV64ZBA-NEXT: beq a1, a0, .LBB47_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB47_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -1518,6 +2628,29 @@ define zeroext i1 @uaddo.br.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: .LBB48_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.br.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a1, a0, a1 +; RV32ZBA-NEXT: bgeu a1, a0, .LBB48_2 +; RV32ZBA-NEXT: # %bb.1: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB48_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.br.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addw a1, a0, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: bgeu a1, a0, .LBB48_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB48_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -1560,6 +2693,35 @@ define zeroext i1 @uaddo.br.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: .LBB49_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.br.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a3, a1, a3 +; RV32ZBA-NEXT: add a2, a0, a2 +; RV32ZBA-NEXT: sltu a0, a2, a0 +; RV32ZBA-NEXT: add a2, a3, a0 +; RV32ZBA-NEXT: beq a2, a1, .LBB49_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: sltu a0, a2, a1 +; RV32ZBA-NEXT: .LBB49_2: # %entry +; RV32ZBA-NEXT: beqz a0, .LBB49_4 +; RV32ZBA-NEXT: # %bb.3: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB49_4: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.br.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: add a1, a0, a1 +; RV64ZBA-NEXT: bgeu a1, a0, .LBB49_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB49_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -1600,6 +2762,33 @@ define zeroext i1 @ssubo.br.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: .LBB50_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: ssubo.br.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sgtz a2, a1 +; RV32ZBA-NEXT: sub a1, a0, a1 +; RV32ZBA-NEXT: slt a0, a1, a0 +; RV32ZBA-NEXT: beq a2, a0, .LBB50_2 +; RV32ZBA-NEXT: # %bb.1: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB50_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: ssubo.br.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a1, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: sub a2, a0, a1 +; RV64ZBA-NEXT: subw a0, a0, a1 +; RV64ZBA-NEXT: beq a0, a2, .LBB50_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB50_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -1642,6 +2831,35 @@ define zeroext i1 @ssubo.br.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: .LBB51_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: ssubo.br.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sltu a0, a0, a2 +; RV32ZBA-NEXT: sub a2, a1, a3 +; RV32ZBA-NEXT: sub a0, a2, a0 +; RV32ZBA-NEXT: xor a0, a1, a0 +; RV32ZBA-NEXT: xor a1, a1, a3 +; RV32ZBA-NEXT: and a0, a1, a0 +; RV32ZBA-NEXT: bgez a0, .LBB51_2 +; RV32ZBA-NEXT: # %bb.1: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB51_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: ssubo.br.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sgtz a2, a1 +; RV64ZBA-NEXT: sub a1, a0, a1 +; RV64ZBA-NEXT: slt a0, a1, a0 +; RV64ZBA-NEXT: beq a2, a0, .LBB51_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB51_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -1678,6 +2896,29 @@ define zeroext i1 @usubo.br.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: .LBB52_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: usubo.br.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sub a1, a0, a1 +; RV32ZBA-NEXT: bgeu a0, a1, .LBB52_2 +; RV32ZBA-NEXT: # %bb.1: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB52_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: usubo.br.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: subw a1, a0, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: bgeu a0, a1, .LBB52_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB52_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -1722,6 +2963,37 @@ define zeroext i1 @usubo.br.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: .LBB53_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: usubo.br.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sltu a4, a0, a2 +; RV32ZBA-NEXT: sub a3, a1, a3 +; RV32ZBA-NEXT: sub a3, a3, a4 +; RV32ZBA-NEXT: beq a3, a1, .LBB53_3 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: sltu a0, a1, a3 +; RV32ZBA-NEXT: bnez a0, .LBB53_4 +; RV32ZBA-NEXT: .LBB53_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB53_3: +; RV32ZBA-NEXT: sub a1, a0, a2 +; RV32ZBA-NEXT: sltu a0, a0, a1 +; RV32ZBA-NEXT: beqz a0, .LBB53_2 +; RV32ZBA-NEXT: .LBB53_4: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: usubo.br.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sub a1, a0, a1 +; RV64ZBA-NEXT: bgeu a0, a1, .LBB53_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB53_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -1762,6 +3034,33 @@ define zeroext i1 @smulo.br.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: .LBB54_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: smulo.br.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mulh a2, a0, a1 +; RV32ZBA-NEXT: mul a0, a0, a1 +; RV32ZBA-NEXT: srai a0, a0, 31 +; RV32ZBA-NEXT: beq a2, a0, .LBB54_2 +; RV32ZBA-NEXT: # %bb.1: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB54_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: smulo.br.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sext.w a1, a1 +; RV64ZBA-NEXT: sext.w a0, a0 +; RV64ZBA-NEXT: mul a2, a0, a1 +; RV64ZBA-NEXT: mulw a0, a0, a1 +; RV64ZBA-NEXT: beq a0, a2, .LBB54_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB54_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -1809,6 +3108,40 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: .LBB55_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: smulo.br.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi sp, sp, -16 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 +; RV32ZBA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: sw zero, 8(sp) +; RV32ZBA-NEXT: addi a4, sp, 8 +; RV32ZBA-NEXT: call __mulodi4@plt +; RV32ZBA-NEXT: lw a0, 8(sp) +; RV32ZBA-NEXT: beqz a0, .LBB55_2 +; RV32ZBA-NEXT: # %bb.1: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: j .LBB55_3 +; RV32ZBA-NEXT: .LBB55_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: .LBB55_3: # %overflow +; RV32ZBA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 16 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: smulo.br.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: mulh a2, a0, a1 +; RV64ZBA-NEXT: mul a0, a0, a1 +; RV64ZBA-NEXT: srai a0, a0, 63 +; RV64ZBA-NEXT: beq a2, a0, .LBB55_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB55_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -1859,6 +3192,43 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) { ; RV64-NEXT: .LBB56_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: smulo2.br.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi sp, sp, -16 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 +; RV32ZBA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: sw zero, 8(sp) +; RV32ZBA-NEXT: addi a2, zero, -13 +; RV32ZBA-NEXT: addi a3, zero, -1 +; RV32ZBA-NEXT: addi a4, sp, 8 +; RV32ZBA-NEXT: call __mulodi4@plt +; RV32ZBA-NEXT: lw a0, 8(sp) +; RV32ZBA-NEXT: beqz a0, .LBB56_2 +; RV32ZBA-NEXT: # %bb.1: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: j .LBB56_3 +; RV32ZBA-NEXT: .LBB56_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: .LBB56_3: # %overflow +; RV32ZBA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 16 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: smulo2.br.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addi a1, zero, -13 +; RV64ZBA-NEXT: mulh a2, a0, a1 +; RV64ZBA-NEXT: mul a0, a0, a1 +; RV64ZBA-NEXT: srai a0, a0, 63 +; RV64ZBA-NEXT: beq a2, a0, .LBB56_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB56_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 -13) %val = extractvalue {i64, i1} %t, 0 @@ -1897,6 +3267,31 @@ define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) { ; RV64-NEXT: .LBB57_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: umulo.br.i32: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mulhu a0, a0, a1 +; RV32ZBA-NEXT: beqz a0, .LBB57_2 +; RV32ZBA-NEXT: # %bb.1: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB57_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: umulo.br.i32: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: slli a1, a1, 32 +; RV64ZBA-NEXT: slli a0, a0, 32 +; RV64ZBA-NEXT: mulhu a0, a0, a1 +; RV64ZBA-NEXT: srli a0, a0, 32 +; RV64ZBA-NEXT: beqz a0, .LBB57_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB57_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) %val = extractvalue {i32, i1} %t, 0 @@ -1947,6 +3342,43 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) { ; RV64-NEXT: .LBB58_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: umulo.br.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mul a4, a3, a0 +; RV32ZBA-NEXT: mul a5, a1, a2 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: mulhu a5, a0, a2 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: snez a5, a3 +; RV32ZBA-NEXT: snez a4, a1 +; RV32ZBA-NEXT: and a4, a4, a5 +; RV32ZBA-NEXT: mulhu a1, a1, a2 +; RV32ZBA-NEXT: snez a1, a1 +; RV32ZBA-NEXT: or a1, a4, a1 +; RV32ZBA-NEXT: mulhu a0, a3, a0 +; RV32ZBA-NEXT: snez a0, a0 +; RV32ZBA-NEXT: or a0, a1, a0 +; RV32ZBA-NEXT: or a0, a0, a6 +; RV32ZBA-NEXT: beqz a0, .LBB58_2 +; RV32ZBA-NEXT: # %bb.1: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB58_2: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: umulo.br.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: mulhu a0, a0, a1 +; RV64ZBA-NEXT: beqz a0, .LBB58_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB58_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) %val = extractvalue {i64, i1} %t, 0 @@ -1989,6 +3421,35 @@ define zeroext i1 @umulo2.br.i64(i64 %v1) { ; RV64-NEXT: .LBB59_2: # %continue ; RV64-NEXT: addi a0, zero, 1 ; RV64-NEXT: ret +; +; RV32ZBA-LABEL: umulo2.br.i64: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a2, a0, a0 +; RV32ZBA-NEXT: sltu a0, a2, a0 +; RV32ZBA-NEXT: add a2, a1, a1 +; RV32ZBA-NEXT: add a2, a2, a0 +; RV32ZBA-NEXT: beq a2, a1, .LBB59_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: sltu a0, a2, a1 +; RV32ZBA-NEXT: .LBB59_2: # %entry +; RV32ZBA-NEXT: beqz a0, .LBB59_4 +; RV32ZBA-NEXT: # %bb.3: # %overflow +; RV32ZBA-NEXT: mv a0, zero +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB59_4: # %continue +; RV32ZBA-NEXT: addi a0, zero, 1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: umulo2.br.i64: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: add a1, a0, a0 +; RV64ZBA-NEXT: bgeu a1, a0, .LBB59_2 +; RV64ZBA-NEXT: # %bb.1: # %overflow +; RV64ZBA-NEXT: mv a0, zero +; RV64ZBA-NEXT: ret +; RV64ZBA-NEXT: .LBB59_2: # %continue +; RV64ZBA-NEXT: addi a0, zero, 1 +; RV64ZBA-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2) %val = extractvalue {i64, i1} %t, 0 -- 2.7.4