From 75b18ba14d07d38f7031f282f0d216fa6153fd81 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Tue, 4 Oct 2022 10:08:49 +0100 Subject: [PATCH] Revert "[AArch64][GlobalISel] Fold away lowered vector sign-extend of vector compares." This reverts commit dcd02a524befcec71f46ab76d725b2a8f8623d69. We should instead use the generic combine. --- llvm/lib/Target/AArch64/AArch64Combine.td | 9 +- .../AArch64/GISel/AArch64PostLegalizerCombiner.cpp | 36 -- .../CodeGen/AArch64/GlobalISel/combine-udiv.ll | 100 ++-- .../postlegalizer-combiner-sext-of-vcmp.mir | 146 ----- llvm/test/CodeGen/AArch64/arm64-vabs.ll | 22 +- llvm/test/CodeGen/AArch64/min-max.ll | 88 ++- .../CodeGen/AArch64/neon-compare-instructions.ll | 630 +++++++++++++++++++++ 7 files changed, 774 insertions(+), 257 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-sext-of-vcmp.mir diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 1518d30..fff6921 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -203,13 +203,6 @@ def split_store_zero_128 : GICombineRule< (apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }]) >; -def sext_via_shifts_of_vcmp : GICombineRule< - (defs root:$d, register_matchinfo:$info), - (match (wip_match_opcode G_ASHR):$d, - [{ return matchSextViaShiftsOfVCmp(*${d}, MRI, ${info}); }]), - (apply [{ Helper.replaceSingleDefInstWithReg(*${d}, ${info}); }]) ->; - // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -236,6 +229,6 @@ def AArch64PostLegalizerCombinerHelper constant_fold, identity_combines, ptr_add_immed_chain, overlapping_and, split_store_zero_128, undef_combines, - select_to_minmax, sext_via_shifts_of_vcmp]> { + select_to_minmax]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 750bd51..fbeff13 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -32,9 +32,7 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" @@ -331,40 +329,6 @@ static void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI, Store.eraseFromParent(); } -// Match a legalized vector sext of a vector compare. Vector compares always -// sign-extend the low bit anyway. Unfortunately we have to match the G_SEXT -// after it's been legalized to shifts since this is after legalization. -static bool matchSextViaShiftsOfVCmp(MachineInstr &MI, MachineRegisterInfo &MRI, - Register &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_ASHR && "Expected G_ASHR"); - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - if (!DstTy.isVector()) - return false; - - Register ShlLHS; - int64_t ShlRHS; - if (!mi_match(MI.getOperand(1).getReg(), MRI, - m_GShl(m_Reg(ShlLHS), m_ICstOrSplat(ShlRHS)))) - return false; - - // Check the shift amount is correct for a sext. - if (ShlRHS != DstTy.getScalarSizeInBits() - 1) - return false; - if (!mi_match(MI.getOperand(2).getReg(), MRI, m_SpecificICstSplat(ShlRHS))) - return false; - - // Check we're trying to extend a vector compare. - if (auto *Cmp = getOpcodeDef(ShlLHS, MRI)) { - MatchInfo = Cmp->getReg(0); - return true; - } - if (auto *Cmp = getOpcodeDef(ShlLHS, MRI)) { - MatchInfo = Cmp->getReg(0); - return true; - } - return false; -} - #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll index 9ca9f1b..da06d82 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll @@ -57,9 +57,10 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; GISEL-LABEL: combine_vec_udiv_nonuniform: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI1_4 -; GISEL-NEXT: adrp x9, .LCPI1_5 +; GISEL-NEXT: adrp x9, .LCPI1_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4] ; GISEL-NEXT: adrp x8, .LCPI1_3 +; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0] ; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3] ; GISEL-NEXT: adrp x8, .LCPI1_2 @@ -67,20 +68,21 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2] -; GISEL-NEXT: adrp x8, .LCPI1_1 +; GISEL-NEXT: adrp x8, .LCPI1_5 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h ; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h ; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h ; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_1] -; GISEL-NEXT: adrp x8, .LCPI1_0 -; GISEL-NEXT: neg v3.8h, v3.8h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5] +; GISEL-NEXT: adrp x8, .LCPI1_1 +; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI1_5] -; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI1_0] +; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI1_1] +; GISEL-NEXT: shl v3.8h, v3.8h, #15 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h -; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h +; GISEL-NEXT: neg v2.8h, v4.8h +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -106,23 +108,25 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) { ; GISEL-LABEL: combine_vec_udiv_nonuniform2: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI2_3 -; GISEL-NEXT: adrp x9, .LCPI2_1 +; GISEL-NEXT: adrp x9, .LCPI2_4 +; GISEL-NEXT: adrp x10, .LCPI2_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3] ; GISEL-NEXT: adrp x8, .LCPI2_2 -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI2_1] +; GISEL-NEXT: ldr q4, [x10, :lo12:.LCPI2_0] ; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] -; GISEL-NEXT: adrp x8, .LCPI2_4 +; GISEL-NEXT: adrp x8, .LCPI2_1 ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h -; GISEL-NEXT: neg v4.8h, v4.8h ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h +; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_1] ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_4] -; GISEL-NEXT: adrp x8, .LCPI2_0 +; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI2_4] +; GISEL-NEXT: cmeq v2.8h, v2.8h, v4.8h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_0] -; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h -; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h +; GISEL-NEXT: neg v3.8h, v5.8h +; GISEL-NEXT: shl v2.8h, v2.8h, #15 +; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h +; GISEL-NEXT: sshr v2.8h, v2.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -147,21 +151,23 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) { ; GISEL-LABEL: combine_vec_udiv_nonuniform3: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI3_2 -; GISEL-NEXT: adrp x9, .LCPI3_3 +; GISEL-NEXT: adrp x9, .LCPI3_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2] -; GISEL-NEXT: adrp x8, .LCPI3_1 -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_3] +; GISEL-NEXT: adrp x8, .LCPI3_3 +; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI3_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] -; GISEL-NEXT: adrp x8, .LCPI3_0 -; GISEL-NEXT: neg v2.8h, v2.8h -; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h -; GISEL-NEXT: usra v1.8h, v3.8h, #1 -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_0] -; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h -; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] +; GISEL-NEXT: adrp x8, .LCPI3_1 +; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h +; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_1] +; GISEL-NEXT: shl v2.8h, v2.8h, #15 +; GISEL-NEXT: usra v1.8h, v4.8h, #1 +; GISEL-NEXT: neg v3.8h, v3.8h +; GISEL-NEXT: sshr v2.8h, v2.8h, #15 +; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -191,19 +197,21 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; GISEL-LABEL: combine_vec_udiv_nonuniform4: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI4_2 -; GISEL-NEXT: adrp x9, .LCPI4_1 +; GISEL-NEXT: adrp x9, .LCPI4_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2] ; GISEL-NEXT: adrp x8, .LCPI4_3 -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_1] +; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0] ; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3] ; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b -; GISEL-NEXT: adrp x8, .LCPI4_0 -; GISEL-NEXT: neg v4.16b, v4.16b +; GISEL-NEXT: adrp x8, .LCPI4_1 +; GISEL-NEXT: cmeq v3.16b, v3.16b, v4.16b ; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] -; GISEL-NEXT: cmeq v2.16b, v3.16b, v2.16b -; GISEL-NEXT: ushl v1.16b, v1.16b, v4.16b +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_1] +; GISEL-NEXT: shl v3.16b, v3.16b, #7 +; GISEL-NEXT: neg v2.16b, v2.16b +; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b +; GISEL-NEXT: sshr v2.16b, v3.16b, #7 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %div = udiv <16 x i8> %x, @@ -240,26 +248,28 @@ define <8 x i16> @pr38477(<8 x i16> %a0) { ; GISEL-LABEL: pr38477: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI5_3 -; GISEL-NEXT: adrp x9, .LCPI5_4 +; GISEL-NEXT: adrp x9, .LCPI5_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] ; GISEL-NEXT: adrp x8, .LCPI5_2 +; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] -; GISEL-NEXT: adrp x8, .LCPI5_1 +; GISEL-NEXT: adrp x8, .LCPI5_4 ; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h ; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h ; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_1] -; GISEL-NEXT: adrp x8, .LCPI5_0 -; GISEL-NEXT: neg v3.8h, v3.8h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4] +; GISEL-NEXT: adrp x8, .LCPI5_1 +; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI5_4] -; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI5_0] +; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1] +; GISEL-NEXT: shl v3.8h, v3.8h, #15 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h -; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h +; GISEL-NEXT: neg v2.8h, v4.8h +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %a0, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-sext-of-vcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-sext-of-vcmp.mir deleted file mode 100644 index 0c58d2c..0000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-sext-of-vcmp.mir +++ /dev/null @@ -1,146 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s ---- -name: sext_of_fcmp_v4s32 -legalized: true -liveins: - - { reg: '$q0' } - - { reg: '$x0' } - - { reg: '$x1' } - - { reg: '$x2' } -body: | - bb.1: - liveins: $q0, $x0, $x1, $x2 - - ; CHECK-LABEL: name: sext_of_fcmp_v4s32 - ; CHECK: liveins: $q0, $x0, $x1, $x2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(ogt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]] - ; CHECK-NEXT: $q0 = COPY [[FCMP]](<4 x s32>) - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:_(<4 x s32>) = COPY $q0 - %6:_(s32) = G_FCONSTANT float 6.000000e+00 - %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32) - %17:_(<4 x s32>) = G_FCMP floatpred(ogt), %0(<4 x s32>), %5 - %19:_(s32) = G_CONSTANT i32 31 - %20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32) - %18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>) - %11:_(<4 x s32>) = G_ASHR %18, %20(<4 x s32>) - $q0 = COPY %11(<4 x s32>) - RET_ReallyLR implicit $q0 - -... ---- -name: sext_of_icmp_v4s32 -legalized: true -liveins: - - { reg: '$q0' } - - { reg: '$x0' } - - { reg: '$x1' } - - { reg: '$x2' } -body: | - bb.1: - liveins: $q0, $x0, $x1, $x2 - - ; CHECK-LABEL: name: sext_of_icmp_v4s32 - ; CHECK: liveins: $q0, $x0, $x1, $x2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[BUILD_VECTOR]] - ; CHECK-NEXT: $q0 = COPY [[ICMP]](<4 x s32>) - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:_(<4 x s32>) = COPY $q0 - %6:_(s32) = G_CONSTANT i32 42 - %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32) - %17:_(<4 x s32>) = G_ICMP intpred(eq), %0(<4 x s32>), %5 - %19:_(s32) = G_CONSTANT i32 31 - %20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32) - %18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>) - %11:_(<4 x s32>) = G_ASHR %18, %20(<4 x s32>) - $q0 = COPY %11(<4 x s32>) - RET_ReallyLR implicit $q0 - -... ---- -name: sext_of_fcmp_wrong_shift -legalized: true -liveins: - - { reg: '$q0' } - - { reg: '$x0' } - - { reg: '$x1' } - - { reg: '$x2' } -body: | - bb.1: - liveins: $q0, $x0, $x1, $x2 - - ; CHECK-LABEL: name: sext_of_fcmp_wrong_shift - ; CHECK: liveins: $q0, $x0, $x1, $x2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(ogt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[FCMP]], [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<4 x s32>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: $q0 = COPY [[ASHR]](<4 x s32>) - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:_(<4 x s32>) = COPY $q0 - %6:_(s32) = G_FCONSTANT float 6.000000e+00 - %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32) - %17:_(<4 x s32>) = G_FCMP floatpred(ogt), %0(<4 x s32>), %5 - %19:_(s32) = G_CONSTANT i32 29 - %20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32) - %18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>) - %11:_(<4 x s32>) = G_ASHR %18, %20(<4 x s32>) - $q0 = COPY %11(<4 x s32>) - RET_ReallyLR implicit $q0 - -... ---- -name: sext_of_fcmp_mismatch_shift -legalized: true -liveins: - - { reg: '$q0' } - - { reg: '$x0' } - - { reg: '$x1' } - - { reg: '$x2' } -body: | - bb.1: - liveins: $q0, $x0, $x1, $x2 - - ; CHECK-LABEL: name: sext_of_fcmp_mismatch_shift - ; CHECK: liveins: $q0, $x0, $x1, $x2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(<4 x s32>) = G_FCMP floatpred(ogt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: %cst2:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: %cstv2:_(<4 x s32>) = G_BUILD_VECTOR %cst2(s32), %cst2(s32), %cst2(s32), %cst2(s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[FCMP]], [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<4 x s32>) = G_ASHR [[SHL]], %cstv2(<4 x s32>) - ; CHECK-NEXT: $q0 = COPY [[ASHR]](<4 x s32>) - ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:_(<4 x s32>) = COPY $q0 - %6:_(s32) = G_FCONSTANT float 6.000000e+00 - %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32) - %17:_(<4 x s32>) = G_FCMP floatpred(ogt), %0(<4 x s32>), %5 - %19:_(s32) = G_CONSTANT i32 29 - %20:_(<4 x s32>) = G_BUILD_VECTOR %19(s32), %19(s32), %19(s32), %19(s32) - %cst2:_(s32) = G_CONSTANT i32 31 - %cstv2:_(<4 x s32>) = G_BUILD_VECTOR %cst2(s32), %cst2(s32), %cst2(s32), %cst2(s32) - %18:_(<4 x s32>) = G_SHL %17, %20(<4 x s32>) - %11:_(<4 x s32>) = G_ASHR %18, %cstv2(<4 x s32>) - $q0 = COPY %11(<4 x s32>) - RET_ReallyLR implicit $q0 - -... diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index f7e8d2e..cbe9dea 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -305,6 +305,8 @@ define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) { ; GISEL-NEXT: usubl.4s v0, v0, v1 ; GISEL-NEXT: cmgt.4s v1, v2, v0 ; GISEL-NEXT: neg.4s v2, v0 +; GISEL-NEXT: shl.4s v1, v1, #31 +; GISEL-NEXT: sshr.4s v1, v1, #31 ; GISEL-NEXT: bit.16b v0, v2, v1 ; GISEL-NEXT: addv.4s s0, v0 ; GISEL-NEXT: fmov w0, s0 @@ -376,6 +378,8 @@ define i64 @uabdl2d_rdx_i64(<2 x i32> %a, <2 x i32> %b) { ; GISEL-NEXT: usubl.2d v0, v0, v1 ; GISEL-NEXT: cmgt.2d v1, v2, v0 ; GISEL-NEXT: neg.2d v2, v0 +; GISEL-NEXT: shl.2d v1, v1, #63 +; GISEL-NEXT: sshr.2d v1, v1, #63 ; GISEL-NEXT: bit.16b v0, v2, v1 ; GISEL-NEXT: addp.2d d0, v0 ; GISEL-NEXT: fmov x0, d0 @@ -1571,6 +1575,8 @@ define <2 x i32> @abspattern1(<2 x i32> %a) nounwind { ; GISEL-NEXT: movi.2d v1, #0000000000000000 ; GISEL-NEXT: neg.2s v2, v0 ; GISEL-NEXT: cmge.2s v1, v0, v1 +; GISEL-NEXT: shl.2s v1, v1, #31 +; GISEL-NEXT: sshr.2s v1, v1, #31 ; GISEL-NEXT: bif.8b v0, v2, v1 ; GISEL-NEXT: ret @@ -1591,6 +1597,8 @@ define <4 x i16> @abspattern2(<4 x i16> %a) nounwind { ; GISEL-NEXT: movi.2d v1, #0000000000000000 ; GISEL-NEXT: neg.4h v2, v0 ; GISEL-NEXT: cmgt.4h v1, v0, v1 +; GISEL-NEXT: shl.4h v1, v1, #15 +; GISEL-NEXT: sshr.4h v1, v1, #15 ; GISEL-NEXT: bif.8b v0, v2, v1 ; GISEL-NEXT: ret ; For GlobalISel, this generates terrible code until we can pattern match this to abs. @@ -1612,6 +1620,8 @@ define <8 x i8> @abspattern3(<8 x i8> %a) nounwind { ; GISEL-NEXT: movi.2d v1, #0000000000000000 ; GISEL-NEXT: neg.8b v2, v0 ; GISEL-NEXT: cmgt.8b v1, v1, v0 +; GISEL-NEXT: shl.8b v1, v1, #7 +; GISEL-NEXT: sshr.8b v1, v1, #7 ; GISEL-NEXT: bit.8b v0, v2, v1 ; GISEL-NEXT: ret @@ -1632,6 +1642,8 @@ define <4 x i32> @abspattern4(<4 x i32> %a) nounwind { ; GISEL-NEXT: movi.2d v1, #0000000000000000 ; GISEL-NEXT: neg.4s v2, v0 ; GISEL-NEXT: cmge.4s v1, v0, v1 +; GISEL-NEXT: shl.4s v1, v1, #31 +; GISEL-NEXT: sshr.4s v1, v1, #31 ; GISEL-NEXT: bif.16b v0, v2, v1 ; GISEL-NEXT: ret @@ -1652,6 +1664,8 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind { ; GISEL-NEXT: movi.2d v1, #0000000000000000 ; GISEL-NEXT: neg.8h v2, v0 ; GISEL-NEXT: cmgt.8h v1, v0, v1 +; GISEL-NEXT: shl.8h v1, v1, #15 +; GISEL-NEXT: sshr.8h v1, v1, #15 ; GISEL-NEXT: bif.16b v0, v2, v1 ; GISEL-NEXT: ret @@ -1672,6 +1686,8 @@ define <16 x i8> @abspattern6(<16 x i8> %a) nounwind { ; GISEL-NEXT: movi.2d v1, #0000000000000000 ; GISEL-NEXT: neg.16b v2, v0 ; GISEL-NEXT: cmgt.16b v1, v1, v0 +; GISEL-NEXT: shl.16b v1, v1, #7 +; GISEL-NEXT: sshr.16b v1, v1, #7 ; GISEL-NEXT: bit.16b v0, v2, v1 ; GISEL-NEXT: ret @@ -1692,6 +1708,8 @@ define <2 x i64> @abspattern7(<2 x i64> %a) nounwind { ; GISEL-NEXT: movi.2d v1, #0000000000000000 ; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: cmge.2d v1, v1, v0 +; GISEL-NEXT: shl.2d v1, v1, #63 +; GISEL-NEXT: sshr.2d v1, v1, #63 ; GISEL-NEXT: bit.16b v0, v2, v1 ; GISEL-NEXT: ret @@ -1713,6 +1731,8 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) { ; GISEL-NEXT: ssubl.2d v0, v0, v1 ; GISEL-NEXT: cmgt.2d v1, v2, v0 ; GISEL-NEXT: neg.2d v2, v0 +; GISEL-NEXT: shl.2d v1, v1, #63 +; GISEL-NEXT: sshr.2d v1, v1, #63 ; GISEL-NEXT: bit.16b v0, v2, v1 ; GISEL-NEXT: ret %aext = sext <2 x i32> %a to <2 x i64> @@ -1762,5 +1782,3 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) { %absel = select <2 x i1> %abcmp, <2 x i128> %ababs, <2 x i128> %abdiff ret <2 x i128> %absel } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; FALLBACK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/min-max.ll b/llvm/test/CodeGen/AArch64/min-max.ll index 2cec030..28311b4 100644 --- a/llvm/test/CodeGen/AArch64/min-max.ll +++ b/llvm/test/CodeGen/AArch64/min-max.ll @@ -224,11 +224,19 @@ define <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) { declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone define <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: smax2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-ISEL-LABEL: smax2i64: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: cmgt v2.2d, v0.2d, v1.2d +; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: smax2i64: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: cmgt v2.2d, v0.2d, v1.2d +; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63 +; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-GLOBAL-NEXT: ret %c = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %c } @@ -249,6 +257,10 @@ define void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) { ; CHECK-GLOBAL: // %bb.0: ; CHECK-GLOBAL-NEXT: cmgt v4.2d, v0.2d, v2.2d ; CHECK-GLOBAL-NEXT: cmgt v5.2d, v1.2d, v3.2d +; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63 +; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63 ; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b ; CHECK-GLOBAL-NEXT: stp q0, q1, [x0] @@ -476,11 +488,19 @@ define <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) { declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone define <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: umax2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-ISEL-LABEL: umax2i64: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: cmhi v2.2d, v0.2d, v1.2d +; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: umax2i64: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: cmhi v2.2d, v0.2d, v1.2d +; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63 +; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-GLOBAL-NEXT: ret %c = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %c } @@ -501,6 +521,10 @@ define void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) { ; CHECK-GLOBAL: // %bb.0: ; CHECK-GLOBAL-NEXT: cmhi v4.2d, v0.2d, v2.2d ; CHECK-GLOBAL-NEXT: cmhi v5.2d, v1.2d, v3.2d +; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63 +; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63 ; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b ; CHECK-GLOBAL-NEXT: stp q0, q1, [x0] @@ -728,11 +752,19 @@ define <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) { declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone define <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: smin2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-ISEL-LABEL: smin2i64: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: cmgt v2.2d, v1.2d, v0.2d +; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: smin2i64: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: cmgt v2.2d, v1.2d, v0.2d +; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63 +; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-GLOBAL-NEXT: ret %c = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %c } @@ -753,6 +785,10 @@ define void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) { ; CHECK-GLOBAL: // %bb.0: ; CHECK-GLOBAL-NEXT: cmgt v4.2d, v2.2d, v0.2d ; CHECK-GLOBAL-NEXT: cmgt v5.2d, v3.2d, v1.2d +; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63 +; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63 ; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b ; CHECK-GLOBAL-NEXT: stp q0, q1, [x0] @@ -980,11 +1016,19 @@ define <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) { declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone define <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: umin2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: cmhi v2.2d, v1.2d, v0.2d -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-ISEL-LABEL: umin2i64: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: cmhi v2.2d, v1.2d, v0.2d +; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: umin2i64: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: cmhi v2.2d, v1.2d, v0.2d +; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63 +; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-GLOBAL-NEXT: ret %c = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %c } @@ -1005,6 +1049,10 @@ define void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) { ; CHECK-GLOBAL: // %bb.0: ; CHECK-GLOBAL-NEXT: cmhi v4.2d, v2.2d, v0.2d ; CHECK-GLOBAL-NEXT: cmhi v5.2d, v3.2d, v1.2d +; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63 +; CHECK-GLOBAL-NEXT: shl v5.2d, v5.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v4.2d, v4.2d, #63 +; CHECK-GLOBAL-NEXT: sshr v5.2d, v5.2d, #63 ; CHECK-GLOBAL-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-GLOBAL-NEXT: bif v1.16b, v3.16b, v5.16b ; CHECK-GLOBAL-NEXT: stp q0, q1, [x0] diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll index 9b2d8be..ec210b4 100644 --- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -11,6 +11,8 @@ define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) { ; GISEL-LABEL: cmeq8xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp eq <8 x i8> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -26,6 +28,8 @@ define <16 x i8> @cmeq16xi8(<16 x i8> %A, <16 x i8> %B) { ; GISEL-LABEL: cmeq16xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp eq <16 x i8> %A, %B; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -41,6 +45,8 @@ define <4 x i16> @cmeq4xi16(<4 x i16> %A, <4 x i16> %B) { ; GISEL-LABEL: cmeq4xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.4h, v0.4h, v1.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp eq <4 x i16> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -56,6 +62,8 @@ define <8 x i16> @cmeq8xi16(<8 x i16> %A, <8 x i16> %B) { ; GISEL-LABEL: cmeq8xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.8h, v0.8h, v1.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp eq <8 x i16> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -71,6 +79,8 @@ define <2 x i32> @cmeq2xi32(<2 x i32> %A, <2 x i32> %B) { ; GISEL-LABEL: cmeq2xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp eq <2 x i32> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -86,6 +96,8 @@ define <4 x i32> @cmeq4xi32(<4 x i32> %A, <4 x i32> %B) { ; GISEL-LABEL: cmeq4xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp eq <4 x i32> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -101,6 +113,8 @@ define <2 x i64> @cmeq2xi64(<2 x i64> %A, <2 x i64> %B) { ; GISEL-LABEL: cmeq2xi64: ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp eq <2 x i64> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -118,6 +132,8 @@ define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.8b, v0.8b, v1.8b ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ne <8 x i8> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -135,6 +151,8 @@ define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.16b, v0.16b, v1.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ne <16 x i8> %A, %B; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -152,6 +170,8 @@ define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.4h, v0.4h, v1.4h ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ne <4 x i16> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -169,6 +189,8 @@ define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.8h, v0.8h, v1.8h ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ne <8 x i16> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -186,6 +208,8 @@ define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.2s, v0.2s, v1.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ne <2 x i32> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -203,6 +227,8 @@ define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.4s, v0.4s, v1.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ne <4 x i32> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -220,6 +246,8 @@ define <2 x i64> @cmne2xi64(<2 x i64> %A, <2 x i64> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: cmeq v0.2d, v0.2d, v1.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp ne <2 x i64> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -235,6 +263,8 @@ define <8 x i8> @cmgt8xi8(<8 x i8> %A, <8 x i8> %B) { ; GISEL-LABEL: cmgt8xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sgt <8 x i8> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -250,6 +280,8 @@ define <16 x i8> @cmgt16xi8(<16 x i8> %A, <16 x i8> %B) { ; GISEL-LABEL: cmgt16xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sgt <16 x i8> %A, %B; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -265,6 +297,8 @@ define <4 x i16> @cmgt4xi16(<4 x i16> %A, <4 x i16> %B) { ; GISEL-LABEL: cmgt4xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.4h, v0.4h, v1.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sgt <4 x i16> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -280,6 +314,8 @@ define <8 x i16> @cmgt8xi16(<8 x i16> %A, <8 x i16> %B) { ; GISEL-LABEL: cmgt8xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.8h, v0.8h, v1.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sgt <8 x i16> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -295,6 +331,8 @@ define <2 x i32> @cmgt2xi32(<2 x i32> %A, <2 x i32> %B) { ; GISEL-LABEL: cmgt2xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sgt <2 x i32> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -310,6 +348,8 @@ define <4 x i32> @cmgt4xi32(<4 x i32> %A, <4 x i32> %B) { ; GISEL-LABEL: cmgt4xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sgt <4 x i32> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -325,6 +365,8 @@ define <2 x i64> @cmgt2xi64(<2 x i64> %A, <2 x i64> %B) { ; GISEL-LABEL: cmgt2xi64: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp sgt <2 x i64> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -341,6 +383,8 @@ define <8 x i8> @cmlt8xi8(<8 x i8> %A, <8 x i8> %B) { ; GISEL-LABEL: cmlt8xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.8b, v1.8b, v0.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp slt <8 x i8> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -357,6 +401,8 @@ define <16 x i8> @cmlt16xi8(<16 x i8> %A, <16 x i8> %B) { ; GISEL-LABEL: cmlt16xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.16b, v1.16b, v0.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp slt <16 x i8> %A, %B; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -373,6 +419,8 @@ define <4 x i16> @cmlt4xi16(<4 x i16> %A, <4 x i16> %B) { ; GISEL-LABEL: cmlt4xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.4h, v1.4h, v0.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp slt <4 x i16> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -389,6 +437,8 @@ define <8 x i16> @cmlt8xi16(<8 x i16> %A, <8 x i16> %B) { ; GISEL-LABEL: cmlt8xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.8h, v1.8h, v0.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp slt <8 x i16> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -405,6 +455,8 @@ define <2 x i32> @cmlt2xi32(<2 x i32> %A, <2 x i32> %B) { ; GISEL-LABEL: cmlt2xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp slt <2 x i32> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -421,6 +473,8 @@ define <4 x i32> @cmlt4xi32(<4 x i32> %A, <4 x i32> %B) { ; GISEL-LABEL: cmlt4xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp slt <4 x i32> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -437,6 +491,8 @@ define <2 x i64> @cmlt2xi64(<2 x i64> %A, <2 x i64> %B) { ; GISEL-LABEL: cmlt2xi64: ; GISEL: // %bb.0: ; GISEL-NEXT: cmgt v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp slt <2 x i64> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -452,6 +508,8 @@ define <8 x i8> @cmge8xi8(<8 x i8> %A, <8 x i8> %B) { ; GISEL-LABEL: cmge8xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sge <8 x i8> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -467,6 +525,8 @@ define <16 x i8> @cmge16xi8(<16 x i8> %A, <16 x i8> %B) { ; GISEL-LABEL: cmge16xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sge <16 x i8> %A, %B; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -482,6 +542,8 @@ define <4 x i16> @cmge4xi16(<4 x i16> %A, <4 x i16> %B) { ; GISEL-LABEL: cmge4xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.4h, v0.4h, v1.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sge <4 x i16> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -497,6 +559,8 @@ define <8 x i16> @cmge8xi16(<8 x i16> %A, <8 x i16> %B) { ; GISEL-LABEL: cmge8xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.8h, v0.8h, v1.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sge <8 x i16> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -512,6 +576,8 @@ define <2 x i32> @cmge2xi32(<2 x i32> %A, <2 x i32> %B) { ; GISEL-LABEL: cmge2xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sge <2 x i32> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -527,6 +593,8 @@ define <4 x i32> @cmge4xi32(<4 x i32> %A, <4 x i32> %B) { ; GISEL-LABEL: cmge4xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sge <4 x i32> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -542,6 +610,8 @@ define <2 x i64> @cmge2xi64(<2 x i64> %A, <2 x i64> %B) { ; GISEL-LABEL: cmge2xi64: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp sge <2 x i64> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -558,6 +628,8 @@ define <8 x i8> @cmle8xi8(<8 x i8> %A, <8 x i8> %B) { ; GISEL-LABEL: cmle8xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.8b, v1.8b, v0.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sle <8 x i8> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -574,6 +646,8 @@ define <16 x i8> @cmle16xi8(<16 x i8> %A, <16 x i8> %B) { ; GISEL-LABEL: cmle16xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.16b, v1.16b, v0.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sle <16 x i8> %A, %B; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -590,6 +664,8 @@ define <4 x i16> @cmle4xi16(<4 x i16> %A, <4 x i16> %B) { ; GISEL-LABEL: cmle4xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.4h, v1.4h, v0.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sle <4 x i16> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -606,6 +682,8 @@ define <8 x i16> @cmle8xi16(<8 x i16> %A, <8 x i16> %B) { ; GISEL-LABEL: cmle8xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.8h, v1.8h, v0.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sle <8 x i16> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -622,6 +700,8 @@ define <2 x i32> @cmle2xi32(<2 x i32> %A, <2 x i32> %B) { ; GISEL-LABEL: cmle2xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sle <2 x i32> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -638,6 +718,8 @@ define <4 x i32> @cmle4xi32(<4 x i32> %A, <4 x i32> %B) { ; GISEL-LABEL: cmle4xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sle <4 x i32> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -654,6 +736,8 @@ define <2 x i64> @cmle2xi64(<2 x i64> %A, <2 x i64> %B) { ; GISEL-LABEL: cmle2xi64: ; GISEL: // %bb.0: ; GISEL-NEXT: cmge v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp sle <2 x i64> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -669,6 +753,8 @@ define <8 x i8> @cmhi8xi8(<8 x i8> %A, <8 x i8> %B) { ; GISEL-LABEL: cmhi8xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ugt <8 x i8> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -684,6 +770,8 @@ define <16 x i8> @cmhi16xi8(<16 x i8> %A, <16 x i8> %B) { ; GISEL-LABEL: cmhi16xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ugt <16 x i8> %A, %B; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -699,6 +787,8 @@ define <4 x i16> @cmhi4xi16(<4 x i16> %A, <4 x i16> %B) { ; GISEL-LABEL: cmhi4xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.4h, v0.4h, v1.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ugt <4 x i16> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -714,6 +804,8 @@ define <8 x i16> @cmhi8xi16(<8 x i16> %A, <8 x i16> %B) { ; GISEL-LABEL: cmhi8xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.8h, v0.8h, v1.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ugt <8 x i16> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -729,6 +821,8 @@ define <2 x i32> @cmhi2xi32(<2 x i32> %A, <2 x i32> %B) { ; GISEL-LABEL: cmhi2xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ugt <2 x i32> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -744,6 +838,8 @@ define <4 x i32> @cmhi4xi32(<4 x i32> %A, <4 x i32> %B) { ; GISEL-LABEL: cmhi4xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ugt <4 x i32> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -759,6 +855,8 @@ define <2 x i64> @cmhi2xi64(<2 x i64> %A, <2 x i64> %B) { ; GISEL-LABEL: cmhi2xi64: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp ugt <2 x i64> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -775,6 +873,8 @@ define <8 x i8> @cmlo8xi8(<8 x i8> %A, <8 x i8> %B) { ; GISEL-LABEL: cmlo8xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.8b, v1.8b, v0.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ult <8 x i8> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -791,6 +891,8 @@ define <16 x i8> @cmlo16xi8(<16 x i8> %A, <16 x i8> %B) { ; GISEL-LABEL: cmlo16xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.16b, v1.16b, v0.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ult <16 x i8> %A, %B; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -807,6 +909,8 @@ define <4 x i16> @cmlo4xi16(<4 x i16> %A, <4 x i16> %B) { ; GISEL-LABEL: cmlo4xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.4h, v1.4h, v0.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ult <4 x i16> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -823,6 +927,8 @@ define <8 x i16> @cmlo8xi16(<8 x i16> %A, <8 x i16> %B) { ; GISEL-LABEL: cmlo8xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.8h, v1.8h, v0.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ult <8 x i16> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -839,6 +945,8 @@ define <2 x i32> @cmlo2xi32(<2 x i32> %A, <2 x i32> %B) { ; GISEL-LABEL: cmlo2xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ult <2 x i32> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -855,6 +963,8 @@ define <4 x i32> @cmlo4xi32(<4 x i32> %A, <4 x i32> %B) { ; GISEL-LABEL: cmlo4xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ult <4 x i32> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -871,6 +981,8 @@ define <2 x i64> @cmlo2xi64(<2 x i64> %A, <2 x i64> %B) { ; GISEL-LABEL: cmlo2xi64: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhi v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp ult <2 x i64> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -886,6 +998,8 @@ define <8 x i8> @cmhs8xi8(<8 x i8> %A, <8 x i8> %B) { ; GISEL-LABEL: cmhs8xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp uge <8 x i8> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -901,6 +1015,8 @@ define <16 x i8> @cmhs16xi8(<16 x i8> %A, <16 x i8> %B) { ; GISEL-LABEL: cmhs16xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp uge <16 x i8> %A, %B; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -916,6 +1032,8 @@ define <4 x i16> @cmhs4xi16(<4 x i16> %A, <4 x i16> %B) { ; GISEL-LABEL: cmhs4xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.4h, v0.4h, v1.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp uge <4 x i16> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -931,6 +1049,8 @@ define <8 x i16> @cmhs8xi16(<8 x i16> %A, <8 x i16> %B) { ; GISEL-LABEL: cmhs8xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.8h, v0.8h, v1.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp uge <8 x i16> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -946,6 +1066,8 @@ define <2 x i32> @cmhs2xi32(<2 x i32> %A, <2 x i32> %B) { ; GISEL-LABEL: cmhs2xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp uge <2 x i32> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -961,6 +1083,8 @@ define <4 x i32> @cmhs4xi32(<4 x i32> %A, <4 x i32> %B) { ; GISEL-LABEL: cmhs4xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp uge <4 x i32> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -976,6 +1100,8 @@ define <2 x i64> @cmhs2xi64(<2 x i64> %A, <2 x i64> %B) { ; GISEL-LABEL: cmhs2xi64: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp uge <2 x i64> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -992,6 +1118,8 @@ define <8 x i8> @cmls8xi8(<8 x i8> %A, <8 x i8> %B) { ; GISEL-LABEL: cmls8xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.8b, v1.8b, v0.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ule <8 x i8> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -1008,6 +1136,8 @@ define <16 x i8> @cmls16xi8(<16 x i8> %A, <16 x i8> %B) { ; GISEL-LABEL: cmls16xi8: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.16b, v1.16b, v0.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ule <16 x i8> %A, %B; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -1024,6 +1154,8 @@ define <4 x i16> @cmls4xi16(<4 x i16> %A, <4 x i16> %B) { ; GISEL-LABEL: cmls4xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.4h, v1.4h, v0.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ule <4 x i16> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -1040,6 +1172,8 @@ define <8 x i16> @cmls8xi16(<8 x i16> %A, <8 x i16> %B) { ; GISEL-LABEL: cmls8xi16: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.8h, v1.8h, v0.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ule <8 x i16> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -1056,6 +1190,8 @@ define <2 x i32> @cmls2xi32(<2 x i32> %A, <2 x i32> %B) { ; GISEL-LABEL: cmls2xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ule <2 x i32> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -1072,6 +1208,8 @@ define <4 x i32> @cmls4xi32(<4 x i32> %A, <4 x i32> %B) { ; GISEL-LABEL: cmls4xi32: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ule <4 x i32> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -1088,6 +1226,8 @@ define <2 x i64> @cmls2xi64(<2 x i64> %A, <2 x i64> %B) { ; GISEL-LABEL: cmls2xi64: ; GISEL: // %bb.0: ; GISEL-NEXT: cmhs v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp ule <2 x i64> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -1106,6 +1246,8 @@ define <8 x i8> @cmtst8xi8(<8 x i8> %A, <8 x i8> %B) { ; GISEL-NEXT: and v0.8b, v0.8b, v1.8b ; GISEL-NEXT: cmeq v0.8b, v0.8b, v2.8b ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = and <8 x i8> %A, %B %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer @@ -1125,6 +1267,8 @@ define <16 x i8> @cmtst16xi8(<16 x i8> %A, <16 x i8> %B) { ; GISEL-NEXT: and v0.16b, v0.16b, v1.16b ; GISEL-NEXT: cmeq v0.16b, v0.16b, v2.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = and <16 x i8> %A, %B %tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer @@ -1144,6 +1288,8 @@ define <4 x i16> @cmtst4xi16(<4 x i16> %A, <4 x i16> %B) { ; GISEL-NEXT: and v0.8b, v0.8b, v1.8b ; GISEL-NEXT: cmeq v0.4h, v0.4h, v2.4h ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = and <4 x i16> %A, %B %tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer @@ -1163,6 +1309,8 @@ define <8 x i16> @cmtst8xi16(<8 x i16> %A, <8 x i16> %B) { ; GISEL-NEXT: and v0.16b, v0.16b, v1.16b ; GISEL-NEXT: cmeq v0.8h, v0.8h, v2.8h ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = and <8 x i16> %A, %B %tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer @@ -1182,6 +1330,8 @@ define <2 x i32> @cmtst2xi32(<2 x i32> %A, <2 x i32> %B) { ; GISEL-NEXT: and v0.8b, v0.8b, v1.8b ; GISEL-NEXT: cmeq v0.2s, v0.2s, v2.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = and <2 x i32> %A, %B %tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer @@ -1201,6 +1351,8 @@ define <4 x i32> @cmtst4xi32(<4 x i32> %A, <4 x i32> %B) { ; GISEL-NEXT: and v0.16b, v0.16b, v1.16b ; GISEL-NEXT: cmeq v0.4s, v0.4s, v2.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = and <4 x i32> %A, %B %tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer @@ -1220,6 +1372,8 @@ define <2 x i64> @cmtst2xi64(<2 x i64> %A, <2 x i64> %B) { ; GISEL-NEXT: and v0.16b, v0.16b, v1.16b ; GISEL-NEXT: cmeq v0.2d, v0.2d, v2.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = and <2 x i64> %A, %B %tmp4 = icmp ne <2 x i64> %tmp3, zeroinitializer @@ -1239,6 +1393,8 @@ define <8 x i8> @cmeqz8xi8(<8 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp eq <8 x i8> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -1255,6 +1411,8 @@ define <16 x i8> @cmeqz16xi8(<16 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp eq <16 x i8> %A, zeroinitializer; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -1271,6 +1429,8 @@ define <4 x i16> @cmeqz4xi16(<4 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.4h, v0.4h, v1.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp eq <4 x i16> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -1287,6 +1447,8 @@ define <8 x i16> @cmeqz8xi16(<8 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.8h, v0.8h, v1.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp eq <8 x i16> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -1303,6 +1465,8 @@ define <2 x i32> @cmeqz2xi32(<2 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp eq <2 x i32> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -1319,6 +1483,8 @@ define <4 x i32> @cmeqz4xi32(<4 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp eq <4 x i32> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -1335,6 +1501,8 @@ define <2 x i64> @cmeqz2xi64(<2 x i64> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp eq <2 x i64> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -1352,6 +1520,8 @@ define <8 x i8> @cmgez8xi8(<8 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sge <8 x i8> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -1368,6 +1538,8 @@ define <16 x i8> @cmgez16xi8(<16 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sge <16 x i8> %A, zeroinitializer; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -1384,6 +1556,8 @@ define <4 x i16> @cmgez4xi16(<4 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.4h, v0.4h, v1.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sge <4 x i16> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -1400,6 +1574,8 @@ define <8 x i16> @cmgez8xi16(<8 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.8h, v0.8h, v1.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sge <8 x i16> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -1416,6 +1592,8 @@ define <2 x i32> @cmgez2xi32(<2 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sge <2 x i32> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -1432,6 +1610,8 @@ define <4 x i32> @cmgez4xi32(<4 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sge <4 x i32> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -1448,6 +1628,8 @@ define <2 x i64> @cmgez2xi64(<2 x i64> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp sge <2 x i64> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -1578,6 +1760,8 @@ define <8 x i8> @cmgtz8xi8(<8 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sgt <8 x i8> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -1594,6 +1778,8 @@ define <16 x i8> @cmgtz16xi8(<16 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sgt <16 x i8> %A, zeroinitializer; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -1610,6 +1796,8 @@ define <4 x i16> @cmgtz4xi16(<4 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.4h, v0.4h, v1.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sgt <4 x i16> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -1626,6 +1814,8 @@ define <8 x i16> @cmgtz8xi16(<8 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.8h, v0.8h, v1.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sgt <8 x i16> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -1642,6 +1832,8 @@ define <2 x i32> @cmgtz2xi32(<2 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sgt <2 x i32> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -1658,6 +1850,8 @@ define <4 x i32> @cmgtz4xi32(<4 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sgt <4 x i32> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -1674,6 +1868,8 @@ define <2 x i64> @cmgtz2xi64(<2 x i64> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp sgt <2 x i64> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -1690,6 +1886,8 @@ define <8 x i8> @cmlez8xi8(<8 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.8b, v1.8b, v0.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sle <8 x i8> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -1706,6 +1904,8 @@ define <16 x i8> @cmlez16xi8(<16 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.16b, v1.16b, v0.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp sle <16 x i8> %A, zeroinitializer; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -1722,6 +1922,8 @@ define <4 x i16> @cmlez4xi16(<4 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.4h, v1.4h, v0.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sle <4 x i16> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -1738,6 +1940,8 @@ define <8 x i16> @cmlez8xi16(<8 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.8h, v1.8h, v0.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp sle <8 x i16> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -1754,6 +1958,8 @@ define <2 x i32> @cmlez2xi32(<2 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sle <2 x i32> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -1770,6 +1976,8 @@ define <4 x i32> @cmlez4xi32(<4 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp sle <4 x i32> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -1786,6 +1994,8 @@ define <2 x i64> @cmlez2xi64(<2 x i64> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmge v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp sle <2 x i64> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -1802,6 +2012,8 @@ define <8 x i8> @cmltz8xi8(<8 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.8b, v1.8b, v0.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp slt <8 x i8> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -1818,6 +2030,8 @@ define <16 x i8> @cmltz16xi8(<16 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.16b, v1.16b, v0.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp slt <16 x i8> %A, zeroinitializer; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -1834,6 +2048,8 @@ define <4 x i16> @cmltz4xi16(<4 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.4h, v1.4h, v0.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp slt <4 x i16> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -1850,6 +2066,8 @@ define <8 x i16> @cmltz8xi16(<8 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.8h, v1.8h, v0.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp slt <8 x i16> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -1866,6 +2084,8 @@ define <2 x i32> @cmltz2xi32(<2 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp slt <2 x i32> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -1882,6 +2102,8 @@ define <4 x i32> @cmltz4xi32(<4 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp slt <4 x i32> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -1898,6 +2120,8 @@ define <2 x i64> @cmltz2xi64(<2 x i64> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmgt v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp slt <2 x i64> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -1915,6 +2139,8 @@ define <8 x i8> @cmneqz8xi8(<8 x i8> %A) { ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.8b, v0.8b, v1.8b ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ne <8 x i8> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -1932,6 +2158,8 @@ define <16 x i8> @cmneqz16xi8(<16 x i8> %A) { ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.16b, v0.16b, v1.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ne <16 x i8> %A, zeroinitializer; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -1949,6 +2177,8 @@ define <4 x i16> @cmneqz4xi16(<4 x i16> %A) { ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.4h, v0.4h, v1.4h ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ne <4 x i16> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -1966,6 +2196,8 @@ define <8 x i16> @cmneqz8xi16(<8 x i16> %A) { ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.8h, v0.8h, v1.8h ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ne <8 x i16> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -1983,6 +2215,8 @@ define <2 x i32> @cmneqz2xi32(<2 x i32> %A) { ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.2s, v0.2s, v1.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ne <2 x i32> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2000,6 +2234,8 @@ define <4 x i32> @cmneqz4xi32(<4 x i32> %A) { ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.4s, v0.4s, v1.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ne <4 x i32> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2017,6 +2253,8 @@ define <2 x i64> @cmneqz2xi64(<2 x i64> %A) { ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmeq v0.2d, v0.2d, v1.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp ne <2 x i64> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2035,6 +2273,8 @@ define <8 x i8> @cmhsz8xi8(<8 x i8> %A) { ; GISEL-NEXT: adrp x8, .LCPI126_0 ; GISEL-NEXT: ldr d1, [x8, :lo12:.LCPI126_0] ; GISEL-NEXT: cmhs v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp uge <8 x i8> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -2053,6 +2293,8 @@ define <16 x i8> @cmhsz16xi8(<16 x i8> %A) { ; GISEL-NEXT: adrp x8, .LCPI127_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI127_0] ; GISEL-NEXT: cmhs v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp uge <16 x i8> %A, %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -2071,6 +2313,8 @@ define <4 x i16> @cmhsz4xi16(<4 x i16> %A) { ; GISEL-NEXT: adrp x8, .LCPI128_0 ; GISEL-NEXT: ldr d1, [x8, :lo12:.LCPI128_0] ; GISEL-NEXT: cmhs v0.4h, v0.4h, v1.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp uge <4 x i16> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -2089,6 +2333,8 @@ define <8 x i16> @cmhsz8xi16(<8 x i16> %A) { ; GISEL-NEXT: adrp x8, .LCPI129_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI129_0] ; GISEL-NEXT: cmhs v0.8h, v0.8h, v1.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp uge <8 x i16> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -2107,6 +2353,8 @@ define <2 x i32> @cmhsz2xi32(<2 x i32> %A) { ; GISEL-NEXT: adrp x8, .LCPI130_0 ; GISEL-NEXT: ldr d1, [x8, :lo12:.LCPI130_0] ; GISEL-NEXT: cmhs v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp uge <2 x i32> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2125,6 +2373,8 @@ define <4 x i32> @cmhsz4xi32(<4 x i32> %A) { ; GISEL-NEXT: adrp x8, .LCPI131_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI131_0] ; GISEL-NEXT: cmhs v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp uge <4 x i32> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2144,6 +2394,8 @@ define <2 x i64> @cmhsz2xi64(<2 x i64> %A) { ; GISEL-NEXT: adrp x8, .LCPI132_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI132_0] ; GISEL-NEXT: cmhs v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp uge <2 x i64> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2163,6 +2415,8 @@ define <8 x i8> @cmhiz8xi8(<8 x i8> %A) { ; GISEL-NEXT: adrp x8, .LCPI133_0 ; GISEL-NEXT: ldr d1, [x8, :lo12:.LCPI133_0] ; GISEL-NEXT: cmhi v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ugt <8 x i8> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -2181,6 +2435,8 @@ define <16 x i8> @cmhiz16xi8(<16 x i8> %A) { ; GISEL-NEXT: adrp x8, .LCPI134_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI134_0] ; GISEL-NEXT: cmhi v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ugt <16 x i8> %A, %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -2199,6 +2455,8 @@ define <4 x i16> @cmhiz4xi16(<4 x i16> %A) { ; GISEL-NEXT: adrp x8, .LCPI135_0 ; GISEL-NEXT: ldr d1, [x8, :lo12:.LCPI135_0] ; GISEL-NEXT: cmhi v0.4h, v0.4h, v1.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ugt <4 x i16> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -2217,6 +2475,8 @@ define <8 x i16> @cmhiz8xi16(<8 x i16> %A) { ; GISEL-NEXT: adrp x8, .LCPI136_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI136_0] ; GISEL-NEXT: cmhi v0.8h, v0.8h, v1.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ugt <8 x i16> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -2235,6 +2495,8 @@ define <2 x i32> @cmhiz2xi32(<2 x i32> %A) { ; GISEL-NEXT: adrp x8, .LCPI137_0 ; GISEL-NEXT: ldr d1, [x8, :lo12:.LCPI137_0] ; GISEL-NEXT: cmhi v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ugt <2 x i32> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2253,6 +2515,8 @@ define <4 x i32> @cmhiz4xi32(<4 x i32> %A) { ; GISEL-NEXT: adrp x8, .LCPI138_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI138_0] ; GISEL-NEXT: cmhi v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ugt <4 x i32> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2272,6 +2536,8 @@ define <2 x i64> @cmhiz2xi64(<2 x i64> %A) { ; GISEL-NEXT: adrp x8, .LCPI139_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI139_0] ; GISEL-NEXT: cmhi v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp ugt <2 x i64> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2290,6 +2556,8 @@ define <8 x i8> @cmlsz8xi8(<8 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmhs v0.8b, v1.8b, v0.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ule <8 x i8> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -2308,6 +2576,8 @@ define <16 x i8> @cmlsz16xi8(<16 x i8> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmhs v0.16b, v1.16b, v0.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ule <16 x i8> %A, zeroinitializer; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -2326,6 +2596,8 @@ define <4 x i16> @cmlsz4xi16(<4 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmhs v0.4h, v1.4h, v0.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ule <4 x i16> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -2344,6 +2616,8 @@ define <8 x i16> @cmlsz8xi16(<8 x i16> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmhs v0.8h, v1.8h, v0.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ule <8 x i16> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -2362,6 +2636,8 @@ define <2 x i32> @cmlsz2xi32(<2 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmhs v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ule <2 x i32> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2380,6 +2656,8 @@ define <4 x i32> @cmlsz4xi32(<4 x i32> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmhs v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ule <4 x i32> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2398,6 +2676,8 @@ define <2 x i64> @cmlsz2xi64(<2 x i64> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: movi v1.2d, #0000000000000000 ; GISEL-NEXT: cmhs v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp ule <2 x i64> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2417,6 +2697,8 @@ define <8 x i8> @cmloz8xi8(<8 x i8> %A) { ; GISEL-NEXT: adrp x8, .LCPI147_0 ; GISEL-NEXT: ldr d1, [x8, :lo12:.LCPI147_0] ; GISEL-NEXT: cmhi v0.8b, v1.8b, v0.8b +; GISEL-NEXT: shl v0.8b, v0.8b, #7 +; GISEL-NEXT: sshr v0.8b, v0.8b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ult <8 x i8> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> @@ -2436,6 +2718,8 @@ define <16 x i8> @cmloz16xi8(<16 x i8> %A) { ; GISEL-NEXT: adrp x8, .LCPI148_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI148_0] ; GISEL-NEXT: cmhi v0.16b, v1.16b, v0.16b +; GISEL-NEXT: shl v0.16b, v0.16b, #7 +; GISEL-NEXT: sshr v0.16b, v0.16b, #7 ; GISEL-NEXT: ret %tmp3 = icmp ult <16 x i8> %A, %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> @@ -2455,6 +2739,8 @@ define <4 x i16> @cmloz4xi16(<4 x i16> %A) { ; GISEL-NEXT: adrp x8, .LCPI149_0 ; GISEL-NEXT: ldr d1, [x8, :lo12:.LCPI149_0] ; GISEL-NEXT: cmhi v0.4h, v1.4h, v0.4h +; GISEL-NEXT: shl v0.4h, v0.4h, #15 +; GISEL-NEXT: sshr v0.4h, v0.4h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ult <4 x i16> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> @@ -2474,6 +2760,8 @@ define <8 x i16> @cmloz8xi16(<8 x i16> %A) { ; GISEL-NEXT: adrp x8, .LCPI150_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI150_0] ; GISEL-NEXT: cmhi v0.8h, v1.8h, v0.8h +; GISEL-NEXT: shl v0.8h, v0.8h, #15 +; GISEL-NEXT: sshr v0.8h, v0.8h, #15 ; GISEL-NEXT: ret %tmp3 = icmp ult <8 x i16> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> @@ -2493,6 +2781,8 @@ define <2 x i32> @cmloz2xi32(<2 x i32> %A) { ; GISEL-NEXT: adrp x8, .LCPI151_0 ; GISEL-NEXT: ldr d1, [x8, :lo12:.LCPI151_0] ; GISEL-NEXT: cmhi v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ult <2 x i32> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2512,6 +2802,8 @@ define <4 x i32> @cmloz4xi32(<4 x i32> %A) { ; GISEL-NEXT: adrp x8, .LCPI152_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI152_0] ; GISEL-NEXT: cmhi v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = icmp ult <4 x i32> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2532,6 +2824,8 @@ define <2 x i64> @cmloz2xi64(<2 x i64> %A) { ; GISEL-NEXT: adrp x8, .LCPI153_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI153_0] ; GISEL-NEXT: cmhi v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = icmp ult <2 x i64> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2547,6 +2841,8 @@ define <2 x i32> @fcmoeq2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL-LABEL: fcmoeq2xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp oeq <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2562,6 +2858,8 @@ define <4 x i32> @fcmoeq4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL-LABEL: fcmoeq4xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp oeq <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2576,6 +2874,8 @@ define <2 x i64> @fcmoeq2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL-LABEL: fcmoeq2xdouble: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp oeq <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2591,6 +2891,8 @@ define <2 x i32> @fcmoge2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL-LABEL: fcmoge2xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp oge <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2606,6 +2908,8 @@ define <4 x i32> @fcmoge4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL-LABEL: fcmoge4xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp oge <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2620,6 +2924,8 @@ define <2 x i64> @fcmoge2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL-LABEL: fcmoge2xdouble: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp oge <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2635,6 +2941,8 @@ define <2 x i32> @fcmogt2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL-LABEL: fcmogt2xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ogt <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2650,6 +2958,8 @@ define <4 x i32> @fcmogt4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL-LABEL: fcmogt4xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ogt <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2664,6 +2974,8 @@ define <2 x i64> @fcmogt2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL-LABEL: fcmogt2xdouble: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ogt <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2680,6 +2992,8 @@ define <2 x i32> @fcmole2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL-LABEL: fcmole2xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ole <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2696,6 +3010,8 @@ define <4 x i32> @fcmole4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL-LABEL: fcmole4xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ole <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2712,6 +3028,8 @@ define <2 x i64> @fcmole2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL-LABEL: fcmole2xdouble: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ole <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2728,6 +3046,8 @@ define <2 x i32> @fcmolt2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL-LABEL: fcmolt2xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp olt <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2744,6 +3064,8 @@ define <4 x i32> @fcmolt4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL-LABEL: fcmolt4xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp olt <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2760,6 +3082,8 @@ define <2 x i64> @fcmolt2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL-LABEL: fcmolt2xdouble: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp olt <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2780,6 +3104,8 @@ define <2 x i32> @fcmone2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL-NEXT: fcmgt v2.2s, v0.2s, v1.2s ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s ; GISEL-NEXT: orr v0.8b, v0.8b, v2.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp one <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2800,6 +3126,8 @@ define <4 x i32> @fcmone4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL-NEXT: fcmgt v2.4s, v0.4s, v1.4s ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp one <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2821,6 +3149,8 @@ define <2 x i64> @fcmone2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL-NEXT: fcmgt v2.2d, v0.2d, v1.2d ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp one <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2841,6 +3171,8 @@ define <2 x i32> @fcmord2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL-NEXT: fcmge v2.2s, v0.2s, v1.2s ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s ; GISEL-NEXT: orr v0.8b, v0.8b, v2.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ord <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2861,6 +3193,8 @@ define <4 x i32> @fcmord4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL-NEXT: fcmge v2.4s, v0.4s, v1.4s ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ord <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2881,6 +3215,8 @@ define <2 x i64> @fcmord2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL-NEXT: fcmge v2.2d, v0.2d, v1.2d ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ord <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2904,6 +3240,8 @@ define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s ; GISEL-NEXT: orr v0.8b, v0.8b, v2.8b ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp uno <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2926,6 +3264,8 @@ define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp uno <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -2948,6 +3288,8 @@ define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp uno <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -2970,6 +3312,8 @@ define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s ; GISEL-NEXT: orr v0.8b, v0.8b, v2.8b ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ueq <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -2992,6 +3336,8 @@ define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ueq <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3014,6 +3360,8 @@ define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ueq <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3032,6 +3380,8 @@ define <2 x i32> @fcmuge2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp uge <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3050,6 +3400,8 @@ define <4 x i32> @fcmuge4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp uge <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3068,6 +3420,8 @@ define <2 x i64> @fcmuge2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp uge <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3086,6 +3440,8 @@ define <2 x i32> @fcmugt2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v1.2s, v0.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ugt <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3104,6 +3460,8 @@ define <4 x i32> @fcmugt4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v1.4s, v0.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ugt <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3121,6 +3479,8 @@ define <2 x i64> @fcmugt2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v1.2d, v0.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ugt <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3139,6 +3499,8 @@ define <2 x i32> @fcmule2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v0.2s, v1.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ule <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3157,6 +3519,8 @@ define <4 x i32> @fcmule4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v0.4s, v1.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ule <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3175,6 +3539,8 @@ define <2 x i64> @fcmule2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v0.2d, v1.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ule <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3193,6 +3559,8 @@ define <2 x i32> @fcmult2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v0.2s, v1.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ult <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3211,6 +3579,8 @@ define <4 x i32> @fcmult4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v0.4s, v1.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ult <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3229,6 +3599,8 @@ define <2 x i64> @fcmult2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v0.2d, v1.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ult <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3247,6 +3619,8 @@ define <2 x i32> @fcmune2xfloat(<2 x float> %A, <2 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2s, v0.2s, v1.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp une <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3265,6 +3639,8 @@ define <4 x i32> @fcmune4xfloat(<4 x float> %A, <4 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.4s, v0.4s, v1.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp une <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3283,6 +3659,8 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2d, v0.2d, v1.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp une <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3298,6 +3676,8 @@ define <2 x i32> @fcmoeqz2xfloat(<2 x float> %A) { ; GISEL-LABEL: fcmoeqz2xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2s, v0.2s, #0.0 +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp oeq <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3313,6 +3693,8 @@ define <4 x i32> @fcmoeqz4xfloat(<4 x float> %A) { ; GISEL-LABEL: fcmoeqz4xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.4s, v0.4s, #0.0 +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp oeq <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3327,6 +3709,8 @@ define <2 x i64> @fcmoeqz2xdouble(<2 x double> %A) { ; GISEL-LABEL: fcmoeqz2xdouble: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2d, v0.2d, #0.0 +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp oeq <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3343,6 +3727,8 @@ define <2 x i32> @fcmogez2xfloat(<2 x float> %A) { ; GISEL-LABEL: fcmogez2xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v0.2s, #0.0 +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp oge <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3358,6 +3744,8 @@ define <4 x i32> @fcmogez4xfloat(<4 x float> %A) { ; GISEL-LABEL: fcmogez4xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v0.4s, #0.0 +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp oge <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3372,6 +3760,8 @@ define <2 x i64> @fcmogez2xdouble(<2 x double> %A) { ; GISEL-LABEL: fcmogez2xdouble: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v0.2d, #0.0 +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp oge <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3387,6 +3777,8 @@ define <2 x i32> @fcmogtz2xfloat(<2 x float> %A) { ; GISEL-LABEL: fcmogtz2xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v0.2s, #0.0 +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ogt <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3402,6 +3794,8 @@ define <4 x i32> @fcmogtz4xfloat(<4 x float> %A) { ; GISEL-LABEL: fcmogtz4xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v0.4s, #0.0 +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ogt <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3416,6 +3810,8 @@ define <2 x i64> @fcmogtz2xdouble(<2 x double> %A) { ; GISEL-LABEL: fcmogtz2xdouble: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v0.2d, #0.0 +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ogt <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3431,6 +3827,8 @@ define <2 x i32> @fcmoltz2xfloat(<2 x float> %A) { ; GISEL-LABEL: fcmoltz2xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp olt <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3446,6 +3844,8 @@ define <4 x i32> @fcmoltz4xfloat(<4 x float> %A) { ; GISEL-LABEL: fcmoltz4xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp olt <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3461,6 +3861,8 @@ define <2 x i64> @fcmoltz2xdouble(<2 x double> %A) { ; GISEL-LABEL: fcmoltz2xdouble: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp olt <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3476,6 +3878,8 @@ define <2 x i32> @fcmolez2xfloat(<2 x float> %A) { ; GISEL-LABEL: fcmolez2xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.2s, v0.2s, #0.0 +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ole <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3491,6 +3895,8 @@ define <4 x i32> @fcmolez4xfloat(<4 x float> %A) { ; GISEL-LABEL: fcmolez4xfloat: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.4s, v0.4s, #0.0 +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ole <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3506,6 +3912,8 @@ define <2 x i64> @fcmolez2xdouble(<2 x double> %A) { ; GISEL-LABEL: fcmolez2xdouble: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.2d, v0.2d, #0.0 +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ole <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3526,6 +3934,8 @@ define <2 x i32> @fcmonez2xfloat(<2 x float> %A) { ; GISEL-NEXT: fcmgt v1.2s, v0.2s, #0.0 ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp one <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3546,6 +3956,8 @@ define <4 x i32> @fcmonez4xfloat(<4 x float> %A) { ; GISEL-NEXT: fcmgt v1.4s, v0.4s, #0.0 ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp one <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3566,6 +3978,8 @@ define <2 x i64> @fcmonez2xdouble(<2 x double> %A) { ; GISEL-NEXT: fcmgt v1.2d, v0.2d, #0.0 ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp one <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3586,6 +4000,8 @@ define <2 x i32> @fcmordz2xfloat(<2 x float> %A) { ; GISEL-NEXT: fcmge v1.2s, v0.2s, #0.0 ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ord <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3606,6 +4022,8 @@ define <4 x i32> @fcmordz4xfloat(<4 x float> %A) { ; GISEL-NEXT: fcmge v1.4s, v0.4s, #0.0 ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ord <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3626,6 +4044,8 @@ define <2 x i64> @fcmordz2xdouble(<2 x double> %A) { ; GISEL-NEXT: fcmge v1.2d, v0.2d, #0.0 ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ord <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3648,6 +4068,8 @@ define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) { ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ueq <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3670,6 +4092,8 @@ define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) { ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ueq <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3692,6 +4116,8 @@ define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) { ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ueq <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3710,6 +4136,8 @@ define <2 x i32> @fcmugez2xfloat(<2 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp uge <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3728,6 +4156,8 @@ define <4 x i32> @fcmugez4xfloat(<4 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp uge <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3746,6 +4176,8 @@ define <2 x i64> @fcmugez2xdouble(<2 x double> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp uge <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3764,6 +4196,8 @@ define <2 x i32> @fcmugtz2xfloat(<2 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.2s, v0.2s, #0.0 ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ugt <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3782,6 +4216,8 @@ define <4 x i32> @fcmugtz4xfloat(<4 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.4s, v0.4s, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ugt <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3800,6 +4236,8 @@ define <2 x i64> @fcmugtz2xdouble(<2 x double> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.2d, v0.2d, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ugt <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3818,6 +4256,8 @@ define <2 x i32> @fcmultz2xfloat(<2 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v0.2s, #0.0 ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ult <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3835,6 +4275,8 @@ define <4 x i32> @fcmultz4xfloat(<4 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v0.4s, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ult <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3852,6 +4294,8 @@ define <2 x i64> @fcmultz2xdouble(<2 x double> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v0.2d, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ult <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3870,6 +4314,8 @@ define <2 x i32> @fcmulez2xfloat(<2 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ule <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3888,6 +4334,8 @@ define <4 x i32> @fcmulez4xfloat(<4 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp ule <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3906,6 +4354,8 @@ define <2 x i64> @fcmulez2xdouble(<2 x double> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp ule <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3924,6 +4374,8 @@ define <2 x i32> @fcmunez2xfloat(<2 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2s, v0.2s, #0.0 ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp une <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -3942,6 +4394,8 @@ define <4 x i32> @fcmunez4xfloat(<4 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.4s, v0.4s, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp une <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -3960,6 +4414,8 @@ define <2 x i64> @fcmunez2xdouble(<2 x double> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2d, v0.2d, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp une <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -3982,6 +4438,8 @@ define <2 x i32> @fcmunoz2xfloat(<2 x float> %A) { ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp uno <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4004,6 +4462,8 @@ define <4 x i32> @fcmunoz4xfloat(<4 x float> %A) { ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp uno <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4026,6 +4486,8 @@ define <2 x i64> @fcmunoz2xdouble(<2 x double> %A) { ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp uno <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4042,6 +4504,8 @@ define <2 x i32> @fcmoeq2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL-LABEL: fcmoeq2xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast oeq <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4057,6 +4521,8 @@ define <4 x i32> @fcmoeq4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL-LABEL: fcmoeq4xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast oeq <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4071,6 +4537,8 @@ define <2 x i64> @fcmoeq2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL-LABEL: fcmoeq2xdouble_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast oeq <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4086,6 +4554,8 @@ define <2 x i32> @fcmoge2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL-LABEL: fcmoge2xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast oge <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4101,6 +4571,8 @@ define <4 x i32> @fcmoge4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL-LABEL: fcmoge4xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast oge <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4115,6 +4587,8 @@ define <2 x i64> @fcmoge2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL-LABEL: fcmoge2xdouble_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast oge <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4130,6 +4604,8 @@ define <2 x i32> @fcmogt2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL-LABEL: fcmogt2xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v0.2s, v1.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ogt <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4145,6 +4621,8 @@ define <4 x i32> @fcmogt4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL-LABEL: fcmogt4xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ogt <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4159,6 +4637,8 @@ define <2 x i64> @fcmogt2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL-LABEL: fcmogt2xdouble_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v0.2d, v1.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ogt <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4174,6 +4654,8 @@ define <2 x i32> @fcmole2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL-LABEL: fcmole2xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ole <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4189,6 +4671,8 @@ define <4 x i32> @fcmole4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL-LABEL: fcmole4xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ole <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4204,6 +4688,8 @@ define <2 x i64> @fcmole2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL-LABEL: fcmole2xdouble_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ole <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4219,6 +4705,8 @@ define <2 x i32> @fcmolt2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL-LABEL: fcmolt2xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast olt <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4234,6 +4722,8 @@ define <4 x i32> @fcmolt4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL-LABEL: fcmolt4xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast olt <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4249,6 +4739,8 @@ define <2 x i64> @fcmolt2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL-LABEL: fcmolt2xdouble_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast olt <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4267,6 +4759,8 @@ define <2 x i32> @fcmone2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL-NEXT: fcmgt v2.2s, v0.2s, v1.2s ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s ; GISEL-NEXT: orr v0.8b, v0.8b, v2.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast one <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4285,6 +4779,8 @@ define <4 x i32> @fcmone4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL-NEXT: fcmgt v2.4s, v0.4s, v1.4s ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast one <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4303,6 +4799,8 @@ define <2 x i64> @fcmone2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL-NEXT: fcmgt v2.2d, v0.2d, v1.2d ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast one <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4322,6 +4820,8 @@ define <2 x i32> @fcmord2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL-NEXT: fcmge v2.2s, v0.2s, v1.2s ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s ; GISEL-NEXT: orr v0.8b, v0.8b, v2.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ord <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4341,6 +4841,8 @@ define <4 x i32> @fcmord4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL-NEXT: fcmge v2.4s, v0.4s, v1.4s ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ord <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4360,6 +4862,8 @@ define <2 x i64> @fcmord2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL-NEXT: fcmge v2.2d, v0.2d, v1.2d ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ord <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4382,6 +4886,8 @@ define <2 x i32> @fcmuno2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s ; GISEL-NEXT: orr v0.8b, v0.8b, v2.8b ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast uno <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4403,6 +4909,8 @@ define <4 x i32> @fcmuno4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast uno <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4424,6 +4932,8 @@ define <2 x i64> @fcmuno2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast uno <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4442,6 +4952,8 @@ define <2 x i32> @fcmueq2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s ; GISEL-NEXT: orr v0.8b, v0.8b, v2.8b ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ueq <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4460,6 +4972,8 @@ define <4 x i32> @fcmueq4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ueq <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4478,6 +4992,8 @@ define <2 x i64> @fcmueq2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d ; GISEL-NEXT: orr v0.16b, v0.16b, v2.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ueq <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4494,6 +5010,8 @@ define <2 x i32> @fcmuge2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v1.2s, v0.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast uge <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4510,6 +5028,8 @@ define <4 x i32> @fcmuge4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v1.4s, v0.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast uge <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4526,6 +5046,8 @@ define <2 x i64> @fcmuge2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v1.2d, v0.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast uge <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4542,6 +5064,8 @@ define <2 x i32> @fcmugt2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v1.2s, v0.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ugt <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4558,6 +5082,8 @@ define <4 x i32> @fcmugt4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v1.4s, v0.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ugt <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4574,6 +5100,8 @@ define <2 x i64> @fcmugt2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v1.2d, v0.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ugt <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4590,6 +5118,8 @@ define <2 x i32> @fcmule2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v0.2s, v1.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ule <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4606,6 +5136,8 @@ define <4 x i32> @fcmule4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v0.4s, v1.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ule <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4622,6 +5154,8 @@ define <2 x i64> @fcmule2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v0.2d, v1.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ule <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4638,6 +5172,8 @@ define <2 x i32> @fcmult2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v0.2s, v1.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ult <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4654,6 +5190,8 @@ define <4 x i32> @fcmult4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v0.4s, v1.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ult <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4670,6 +5208,8 @@ define <2 x i64> @fcmult2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v0.2d, v1.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ult <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4687,6 +5227,8 @@ define <2 x i32> @fcmune2xfloat_fast(<2 x float> %A, <2 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2s, v0.2s, v1.2s ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast une <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4704,6 +5246,8 @@ define <4 x i32> @fcmune4xfloat_fast(<4 x float> %A, <4 x float> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.4s, v0.4s, v1.4s ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast une <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4721,6 +5265,8 @@ define <2 x i64> @fcmune2xdouble_fast(<2 x double> %A, <2 x double> %B) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2d, v0.2d, v1.2d ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast une <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4736,6 +5282,8 @@ define <2 x i32> @fcmoeqz2xfloat_fast(<2 x float> %A) { ; GISEL-LABEL: fcmoeqz2xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2s, v0.2s, #0.0 +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast oeq <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4751,6 +5299,8 @@ define <4 x i32> @fcmoeqz4xfloat_fast(<4 x float> %A) { ; GISEL-LABEL: fcmoeqz4xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.4s, v0.4s, #0.0 +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast oeq <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4765,6 +5315,8 @@ define <2 x i64> @fcmoeqz2xdouble_fast(<2 x double> %A) { ; GISEL-LABEL: fcmoeqz2xdouble_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2d, v0.2d, #0.0 +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast oeq <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4781,6 +5333,8 @@ define <2 x i32> @fcmogez2xfloat_fast(<2 x float> %A) { ; GISEL-LABEL: fcmogez2xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v0.2s, #0.0 +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast oge <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4796,6 +5350,8 @@ define <4 x i32> @fcmogez4xfloat_fast(<4 x float> %A) { ; GISEL-LABEL: fcmogez4xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v0.4s, #0.0 +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast oge <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4810,6 +5366,8 @@ define <2 x i64> @fcmogez2xdouble_fast(<2 x double> %A) { ; GISEL-LABEL: fcmogez2xdouble_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v0.2d, #0.0 +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast oge <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4825,6 +5383,8 @@ define <2 x i32> @fcmogtz2xfloat_fast(<2 x float> %A) { ; GISEL-LABEL: fcmogtz2xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v0.2s, #0.0 +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ogt <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4840,6 +5400,8 @@ define <4 x i32> @fcmogtz4xfloat_fast(<4 x float> %A) { ; GISEL-LABEL: fcmogtz4xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v0.4s, #0.0 +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ogt <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4854,6 +5416,8 @@ define <2 x i64> @fcmogtz2xdouble_fast(<2 x double> %A) { ; GISEL-LABEL: fcmogtz2xdouble_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v0.2d, #0.0 +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ogt <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4869,6 +5433,8 @@ define <2 x i32> @fcmoltz2xfloat_fast(<2 x float> %A) { ; GISEL-LABEL: fcmoltz2xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast olt <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4884,6 +5450,8 @@ define <4 x i32> @fcmoltz4xfloat_fast(<4 x float> %A) { ; GISEL-LABEL: fcmoltz4xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast olt <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4899,6 +5467,8 @@ define <2 x i64> @fcmoltz2xdouble_fast(<2 x double> %A) { ; GISEL-LABEL: fcmoltz2xdouble_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast olt <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4914,6 +5484,8 @@ define <2 x i32> @fcmolez2xfloat_fast(<2 x float> %A) { ; GISEL-LABEL: fcmolez2xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.2s, v0.2s, #0.0 +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ole <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4929,6 +5501,8 @@ define <4 x i32> @fcmolez4xfloat_fast(<4 x float> %A) { ; GISEL-LABEL: fcmolez4xfloat_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.4s, v0.4s, #0.0 +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ole <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4944,6 +5518,8 @@ define <2 x i64> @fcmolez2xdouble_fast(<2 x double> %A) { ; GISEL-LABEL: fcmolez2xdouble_fast: ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.2d, v0.2d, #0.0 +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ole <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -4962,6 +5538,8 @@ define <2 x i32> @fcmonez2xfloat_fast(<2 x float> %A) { ; GISEL-NEXT: fcmgt v1.2s, v0.2s, #0.0 ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast one <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -4980,6 +5558,8 @@ define <4 x i32> @fcmonez4xfloat_fast(<4 x float> %A) { ; GISEL-NEXT: fcmgt v1.4s, v0.4s, #0.0 ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast one <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -4998,6 +5578,8 @@ define <2 x i64> @fcmonez2xdouble_fast(<2 x double> %A) { ; GISEL-NEXT: fcmgt v1.2d, v0.2d, #0.0 ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast one <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -5017,6 +5599,8 @@ define <2 x i32> @fcmordz2xfloat_fast(<2 x float> %A) { ; GISEL-NEXT: fcmge v1.2s, v0.2s, #0.0 ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ord <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -5036,6 +5620,8 @@ define <4 x i32> @fcmordz4xfloat_fast(<4 x float> %A) { ; GISEL-NEXT: fcmge v1.4s, v0.4s, #0.0 ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ord <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -5055,6 +5641,8 @@ define <2 x i64> @fcmordz2xdouble_fast(<2 x double> %A) { ; GISEL-NEXT: fcmge v1.2d, v0.2d, #0.0 ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ord <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -5073,6 +5661,8 @@ define <2 x i32> @fcmueqz2xfloat_fast(<2 x float> %A) { ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ueq <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -5091,6 +5681,8 @@ define <4 x i32> @fcmueqz4xfloat_fast(<4 x float> %A) { ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ueq <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -5109,6 +5701,8 @@ define <2 x i64> @fcmueqz2xdouble_fast(<2 x double> %A) { ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ueq <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -5125,6 +5719,8 @@ define <2 x i32> @fcmugez2xfloat_fast(<2 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast uge <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -5141,6 +5737,8 @@ define <4 x i32> @fcmugez4xfloat_fast(<4 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast uge <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -5157,6 +5755,8 @@ define <2 x i64> @fcmugez2xdouble_fast(<2 x double> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast uge <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -5173,6 +5773,8 @@ define <2 x i32> @fcmugtz2xfloat_fast(<2 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.2s, v0.2s, #0.0 ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ugt <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -5189,6 +5791,8 @@ define <4 x i32> @fcmugtz4xfloat_fast(<4 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.4s, v0.4s, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ugt <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -5205,6 +5809,8 @@ define <2 x i64> @fcmugtz2xdouble_fast(<2 x double> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmle v0.2d, v0.2d, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ugt <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -5221,6 +5827,8 @@ define <2 x i32> @fcmultz2xfloat_fast(<2 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2s, v0.2s, #0.0 ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ult <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -5237,6 +5845,8 @@ define <4 x i32> @fcmultz4xfloat_fast(<4 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.4s, v0.4s, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ult <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -5253,6 +5863,8 @@ define <2 x i64> @fcmultz2xdouble_fast(<2 x double> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmge v0.2d, v0.2d, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ult <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -5270,6 +5882,8 @@ define <2 x i32> @fcmulez2xfloat_fast(<2 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ule <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -5286,6 +5900,8 @@ define <4 x i32> @fcmulez4xfloat_fast(<4 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast ule <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -5302,6 +5918,8 @@ define <2 x i64> @fcmulez2xdouble_fast(<2 x double> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmgt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast ule <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -5319,6 +5937,8 @@ define <2 x i32> @fcmunez2xfloat_fast(<2 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2s, v0.2s, #0.0 ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast une <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -5336,6 +5956,8 @@ define <4 x i32> @fcmunez4xfloat_fast(<4 x float> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.4s, v0.4s, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast une <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -5353,6 +5975,8 @@ define <2 x i64> @fcmunez2xdouble_fast(<2 x double> %A) { ; GISEL: // %bb.0: ; GISEL-NEXT: fcmeq v0.2d, v0.2d, #0.0 ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast une <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> @@ -5374,6 +5998,8 @@ define <2 x i32> @fcmunoz2xfloat_fast(<2 x float> %A) { ; GISEL-NEXT: fcmlt v0.2s, v0.2s, #0.0 ; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b ; GISEL-NEXT: mvn v0.8b, v0.8b +; GISEL-NEXT: shl v0.2s, v0.2s, #31 +; GISEL-NEXT: sshr v0.2s, v0.2s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast uno <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> @@ -5395,6 +6021,8 @@ define <4 x i32> @fcmunoz4xfloat_fast(<4 x float> %A) { ; GISEL-NEXT: fcmlt v0.4s, v0.4s, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 ; GISEL-NEXT: ret %tmp3 = fcmp fast uno <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> @@ -5416,6 +6044,8 @@ define <2 x i64> @fcmunoz2xdouble_fast(<2 x double> %A) { ; GISEL-NEXT: fcmlt v0.2d, v0.2d, #0.0 ; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b ; GISEL-NEXT: mvn v0.16b, v0.16b +; GISEL-NEXT: shl v0.2d, v0.2d, #63 +; GISEL-NEXT: sshr v0.2d, v0.2d, #63 ; GISEL-NEXT: ret %tmp3 = fcmp fast uno <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> -- 2.7.4